From 350afa8a1101f62ce31bc4ed6f69cf4b90ec4fa2 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 8 Aug 2024 06:29:34 +0000 Subject: x86/split_lock: Move Split and Bus lock code to a dedicated file Bus Lock Detect functionality on AMD platforms works identical to Intel. Move split_lock and bus_lock specific code from intel.c to a dedicated file so that it can be compiled and supported on non-Intel platforms. Also, introduce CONFIG_X86_BUS_LOCK_DETECT, make it dependent on CONFIG_CPU_SUP_INTEL and add compilation dependency of the new bus_lock.c file on CONFIG_X86_BUS_LOCK_DETECT. Signed-off-by: Ravi Bangoria Signed-off-by: Thomas Gleixner Reviewed-by: Tom Lendacky Link: https://lore.kernel.org/all/20240808062937.1149-2-ravi.bangoria@amd.com --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index f8d150343d42..d606b2a1de99 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -976,7 +976,7 @@ struct task_struct { #ifdef CONFIG_ARCH_HAS_CPU_PASID unsigned pasid_activated:1; #endif -#ifdef CONFIG_CPU_SUP_INTEL +#ifdef CONFIG_X86_BUS_LOCK_DETECT unsigned reported_split_lock:1; #endif #ifdef CONFIG_TASK_DELAY_ACCT -- cgit v1.2.3 From 8cf9a01edc216b16b5839eb793ac544d2c97ce97 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Wed, 11 Sep 2024 15:42:57 -0400 Subject: fs: Introduce FOP_ASYNC_LOCK Some lock managers (NLM, kNFSD) fastidiously avoid blocking their kernel threads while servicing blocking locks. If a filesystem supports asynchronous lock requests those lock managers can use notifications to quickly inform clients they have acquired a file lock. Historically, only posix_lock_file() was capable of supporting asynchronous locks so the check for support was simply file_operations->lock(), but with recent changes in DLM, both GFS2 and OCFS2 also support asynchronous locks and have started signalling their support with EXPORT_OP_ASYNC_LOCK. We recently noticed that those changes dropped the checks for whether a filesystem simply defaults to posix_lock_file(), so async lock notifications have not been attempted for NLM and NFSv4.1+ for most filesystems. While trying to fix this it has become clear that testing both the export flag combined with testing ->lock() creates quite a layering mess. It seems appropriate to signal support with a fop_flag. Add FOP_ASYNC_LOCK so that filesystems with ->lock() can signal their capability to handle lock requests asynchronously. Add a helper for lock managers to properly test that support. Signed-off-by: Benjamin Coddington Link: https://lore.kernel.org/r/3330d5a324abe2ce9c1dafe89cacdc6db41945d1.1726083391.git.bcodding@redhat.com Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- include/linux/filelock.h | 5 +++++ include/linux/fs.h | 2 ++ 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/filelock.h b/include/linux/filelock.h index daee999d05f3..58c1120a8253 100644 --- a/include/linux/filelock.h +++ b/include/linux/filelock.h @@ -180,6 +180,11 @@ static inline void locks_wake_up(struct file_lock *fl) wake_up(&fl->c.flc_wait); } +static inline bool locks_can_async_lock(const struct file_operations *fops) +{ + return !fops->lock || fops->fop_flags & FOP_ASYNC_LOCK; +} + /* fs/locks.c */ void locks_free_lock_context(struct inode *inode); void locks_free_lock(struct file_lock *fl); diff --git a/include/linux/fs.h b/include/linux/fs.h index 6ca11e241a24..78221ae589d9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2074,6 +2074,8 @@ struct file_operations { #define FOP_DIO_PARALLEL_WRITE ((__force fop_flags_t)(1 << 3)) /* Contains huge pages */ #define FOP_HUGE_PAGES ((__force fop_flags_t)(1 << 4)) +/* Supports asynchronous lock callbacks */ +#define FOP_ASYNC_LOCK ((__force fop_flags_t)(1 << 5)) /* Wrap a directory iterator that needs exclusive inode access */ int wrap_directory_iterator(struct file *, struct dir_context *, -- cgit v1.2.3 From 318580ad7f2828f6269e0b8819e943ddedda3375 Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Thu, 29 Aug 2024 14:41:09 +0800 Subject: hugetlbfs: support tracepoint Add basic tracepoints for {alloc, evict, free}_inode, setattr and fallocate. These can help users to debug hugetlbfs more conveniently. Signed-off-by: Hongbo Li Link: https://lore.kernel.org/r/20240829064110.67884-2-lihongbo22@huawei.com Reviewed-by: Steven Rostedt (Google) Signed-off-by: Christian Brauner --- MAINTAINERS | 1 + include/trace/events/hugetlbfs.h | 156 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 157 insertions(+) create mode 100644 include/trace/events/hugetlbfs.h (limited to 'include') diff --git a/MAINTAINERS b/MAINTAINERS index 0c94ec0ca478..5e628017cbaa 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10367,6 +10367,7 @@ F: Documentation/mm/hugetlbfs_reserv.rst F: Documentation/mm/vmemmap_dedup.rst F: fs/hugetlbfs/ F: include/linux/hugetlb.h +F: include/trace/events/hugetlbfs.h F: mm/hugetlb.c F: mm/hugetlb_vmemmap.c F: mm/hugetlb_vmemmap.h diff --git a/include/trace/events/hugetlbfs.h b/include/trace/events/hugetlbfs.h new file mode 100644 index 000000000000..8331c904a9ba --- /dev/null +++ b/include/trace/events/hugetlbfs.h @@ -0,0 +1,156 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM hugetlbfs + +#if !defined(_TRACE_HUGETLBFS_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_HUGETLBFS_H + +#include + +TRACE_EVENT(hugetlbfs_alloc_inode, + + TP_PROTO(struct inode *inode, struct inode *dir, int mode), + + TP_ARGS(inode, dir, mode), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(ino_t, dir) + __field(__u16, mode) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->dir = dir->i_ino; + __entry->mode = mode; + ), + + TP_printk("dev %d,%d ino %lu dir %lu mode 0%o", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long) __entry->ino, + (unsigned long) __entry->dir, __entry->mode) +); + +DECLARE_EVENT_CLASS(hugetlbfs__inode, + + TP_PROTO(struct inode *inode), + + TP_ARGS(inode), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(__u16, mode) + __field(loff_t, size) + __field(unsigned int, nlink) + __field(unsigned int, seals) + __field(blkcnt_t, blocks) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->mode = inode->i_mode; + __entry->size = inode->i_size; + __entry->nlink = inode->i_nlink; + __entry->seals = HUGETLBFS_I(inode)->seals; + __entry->blocks = inode->i_blocks; + ), + + TP_printk("dev %d,%d ino %lu mode 0%o size %lld nlink %u seals %u blocks %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, + __entry->mode, __entry->size, __entry->nlink, __entry->seals, + (unsigned long long)__entry->blocks) +); + +DEFINE_EVENT(hugetlbfs__inode, hugetlbfs_evict_inode, + + TP_PROTO(struct inode *inode), + + TP_ARGS(inode) +); + +DEFINE_EVENT(hugetlbfs__inode, hugetlbfs_free_inode, + + TP_PROTO(struct inode *inode), + + TP_ARGS(inode) +); + +TRACE_EVENT(hugetlbfs_setattr, + + TP_PROTO(struct inode *inode, struct dentry *dentry, + struct iattr *attr), + + TP_ARGS(inode, dentry, attr), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(unsigned int, d_len) + __string(d_name, dentry->d_name.name) + __field(unsigned int, ia_valid) + __field(unsigned int, ia_mode) + __field(loff_t, old_size) + __field(loff_t, ia_size) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->d_len = dentry->d_name.len; + __assign_str(d_name); + __entry->ia_valid = attr->ia_valid; + __entry->ia_mode = attr->ia_mode; + __entry->old_size = inode->i_size; + __entry->ia_size = attr->ia_size; + ), + + TP_printk("dev %d,%d ino %lu name %.*s valid %#x mode 0%o old_size %lld size %lld", + MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long)__entry->ino, + __entry->d_len, __get_str(d_name), __entry->ia_valid, __entry->ia_mode, + __entry->old_size, __entry->ia_size) +); + +TRACE_EVENT(hugetlbfs_fallocate, + + TP_PROTO(struct inode *inode, int mode, + loff_t offset, loff_t len, int ret), + + TP_ARGS(inode, mode, offset, len, ret), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(int, mode) + __field(loff_t, offset) + __field(loff_t, len) + __field(loff_t, size) + __field(int, ret) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->mode = mode; + __entry->offset = offset; + __entry->len = len; + __entry->size = inode->i_size; + __entry->ret = ret; + ), + + TP_printk("dev %d,%d ino %lu mode 0%o offset %lld len %lld size %lld ret %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long)__entry->ino, __entry->mode, + (unsigned long long)__entry->offset, + (unsigned long long)__entry->len, + (unsigned long long)__entry->size, + __entry->ret) +); + +#endif /* _TRACE_HUGETLBFS_H */ + + /* This part must be outside protection */ +#include -- cgit v1.2.3 From 2b0996c7646293c71c1297c00e07e54cee9bec5c Mon Sep 17 00:00:00 2001 From: Rosen Penev Date: Fri, 6 Sep 2024 12:53:59 -0700 Subject: wifi: ath9k: remove ath9k_platform_data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Completely unused here in favor of Device Tree based setup. The DT code in here should currently match what is available with platform files. Any such lapse can always be added. Signed-off-by: Rosen Penev Acked-by: Toke Høiland-Jørgensen Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20240906195359.6982-4-rosenp@gmail.com --- .../net/wireless/ath/ath9k/ath9k_pci_owl_loader.c | 1 - drivers/net/wireless/ath/ath9k/hw.c | 2 +- drivers/net/wireless/ath/ath9k/init.c | 52 +--------------------- include/linux/ath9k_platform.h | 51 --------------------- 4 files changed, 3 insertions(+), 103 deletions(-) delete mode 100644 include/linux/ath9k_platform.h (limited to 'include') diff --git a/drivers/net/wireless/ath/ath9k/ath9k_pci_owl_loader.c b/drivers/net/wireless/ath/ath9k/ath9k_pci_owl_loader.c index 004ca5f536be..fe1013a3a588 100644 --- a/drivers/net/wireless/ath/ath9k/ath9k_pci_owl_loader.c +++ b/drivers/net/wireless/ath/ath9k/ath9k_pci_owl_loader.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c index 04a4b9ea61c3..36db734c74ae 100644 --- a/drivers/net/wireless/ath/ath9k/hw.c +++ b/drivers/net/wireless/ath/ath9k/hw.c @@ -490,7 +490,7 @@ static void ath9k_hw_init_macaddr(struct ath_hw *ah) u16 eeval; static const u32 EEP_MAC[] = { EEP_MAC_LSW, EEP_MAC_MID, EEP_MAC_MSW }; - /* MAC address may already be loaded via ath9k_platform_data */ + /* MAC address may already be loaded via NVMEM */ if (is_valid_ether_addr(common->macaddr)) return; diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c index 7fad7e75af6a..f9e77c4624d9 100644 --- a/drivers/net/wireless/ath/ath9k/init.c +++ b/drivers/net/wireless/ath/ath9k/init.c @@ -18,7 +18,6 @@ #include #include -#include #include #include #include @@ -583,8 +582,8 @@ static int ath9k_nvmem_request_eeprom(struct ath_softc *sc) /* nvmem cell might not be defined, or the nvmem * subsystem isn't included. In this case, follow - * the established "just return 0;" convention of - * ath9k_init_platform to say: + * the established "just return 0;" convention + * to say: * "All good. Nothing to see here. Please go on." */ if (err == -ENOENT || err == -EOPNOTSUPP) @@ -620,49 +619,6 @@ static int ath9k_nvmem_request_eeprom(struct ath_softc *sc) return 0; } -static int ath9k_init_platform(struct ath_softc *sc) -{ - struct ath9k_platform_data *pdata = sc->dev->platform_data; - struct ath_hw *ah = sc->sc_ah; - struct ath_common *common = ath9k_hw_common(ah); - int ret; - - if (!pdata) - return 0; - - if (!pdata->use_eeprom) { - ah->ah_flags &= ~AH_USE_EEPROM; - ah->gpio_mask = pdata->gpio_mask; - ah->gpio_val = pdata->gpio_val; - ah->led_pin = pdata->led_pin; - ah->is_clk_25mhz = pdata->is_clk_25mhz; - ah->get_mac_revision = pdata->get_mac_revision; - ah->external_reset = pdata->external_reset; - ah->disable_2ghz = pdata->disable_2ghz; - ah->disable_5ghz = pdata->disable_5ghz; - - if (!pdata->endian_check) - ah->ah_flags |= AH_NO_EEP_SWAP; - } - - if (pdata->eeprom_name) { - ret = ath9k_eeprom_request(sc, pdata->eeprom_name); - if (ret) - return ret; - } - - if (pdata->led_active_high) - ah->config.led_active_high = true; - - if (pdata->tx_gain_buffalo) - ah->config.tx_gain_buffalo = true; - - if (pdata->macaddr) - ether_addr_copy(common->macaddr, pdata->macaddr); - - return 0; -} - static int ath9k_of_init(struct ath_softc *sc) { struct device_node *np = sc->dev->of_node; @@ -748,10 +704,6 @@ static int ath9k_init_softc(u16 devid, struct ath_softc *sc, */ ath9k_init_pcoem_platform(sc); - ret = ath9k_init_platform(sc); - if (ret) - return ret; - ret = ath9k_of_init(sc); if (ret) return ret; diff --git a/include/linux/ath9k_platform.h b/include/linux/ath9k_platform.h deleted file mode 100644 index 76860a461ed2..000000000000 --- a/include/linux/ath9k_platform.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2008 Atheros Communications Inc. - * Copyright (c) 2009 Gabor Juhos - * Copyright (c) 2009 Imre Kaloz - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#ifndef _LINUX_ATH9K_PLATFORM_H -#define _LINUX_ATH9K_PLATFORM_H - -#define ATH9K_PLAT_EEP_MAX_WORDS 2048 - -struct ath9k_platform_data { - const char *eeprom_name; - - u16 eeprom_data[ATH9K_PLAT_EEP_MAX_WORDS]; - u8 *macaddr; - - int led_pin; - u32 gpio_mask; - u32 gpio_val; - - u32 bt_active_pin; - u32 bt_priority_pin; - u32 wlan_active_pin; - - bool endian_check; - bool is_clk_25mhz; - bool tx_gain_buffalo; - bool disable_2ghz; - bool disable_5ghz; - bool led_active_high; - - int (*get_mac_revision)(void); - int (*external_reset)(void); - - bool use_eeprom; -}; - -#endif /* _LINUX_ATH9K_PLATFORM_H */ -- cgit v1.2.3 From 1cc2e1faafb3b5a2be25112559bdb495736b5af7 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 20 Sep 2024 10:57:57 +0200 Subject: pwm: Add more locking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This ensures that a pwm_chip that has no corresponding driver isn't used and that a driver doesn't go away while a callback is still running. In the presence of device links this isn't necessary yet (so this is no fix) but for pwm character device support this is needed. To not serialize all pwm_apply_state() calls, this introduces a per chip lock. An additional complication is that for atomic chips a mutex cannot be used (as pwm_apply_atomic() must not sleep) and a spinlock cannot be held while calling an operation for a sleeping chip. So depending on the chip being atomic or not a spinlock or a mutex is used. An additional change implemented here is that on driver remove the .free() callback is called for each requested pwm_device. This is the right time because later (e.g. when the consumer calls pwm_put()) the free function is (maybe) not available any more. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/026aa891c8270a11723a1ba7e4256f456f7e1e86.1726819463.git.u.kleine-koenig@baylibre.com Signed-off-by: Uwe Kleine-König --- drivers/pwm/core.c | 100 +++++++++++++++++++++++++++++++++++++++++++++++----- include/linux/pwm.h | 13 +++++++ 2 files changed, 105 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index 6e752e148b98..5a095eb46b54 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -31,6 +31,24 @@ static DEFINE_MUTEX(pwm_lock); static DEFINE_IDR(pwm_chips); +static void pwmchip_lock(struct pwm_chip *chip) +{ + if (chip->atomic) + spin_lock(&chip->atomic_lock); + else + mutex_lock(&chip->nonatomic_lock); +} + +static void pwmchip_unlock(struct pwm_chip *chip) +{ + if (chip->atomic) + spin_unlock(&chip->atomic_lock); + else + mutex_unlock(&chip->nonatomic_lock); +} + +DEFINE_GUARD(pwmchip, struct pwm_chip *, pwmchip_lock(_T), pwmchip_unlock(_T)) + static void pwm_apply_debug(struct pwm_device *pwm, const struct pwm_state *state) { @@ -220,6 +238,7 @@ static int __pwm_apply(struct pwm_device *pwm, const struct pwm_state *state) int pwm_apply_might_sleep(struct pwm_device *pwm, const struct pwm_state *state) { int err; + struct pwm_chip *chip = pwm->chip; /* * Some lowlevel driver's implementations of .apply() make use of @@ -230,7 +249,12 @@ int pwm_apply_might_sleep(struct pwm_device *pwm, const struct pwm_state *state) */ might_sleep(); - if (IS_ENABLED(CONFIG_PWM_DEBUG) && pwm->chip->atomic) { + guard(pwmchip)(chip); + + if (!chip->operational) + return -ENODEV; + + if (IS_ENABLED(CONFIG_PWM_DEBUG) && chip->atomic) { /* * Catch any drivers that have been marked as atomic but * that will sleep anyway. @@ -254,9 +278,16 @@ EXPORT_SYMBOL_GPL(pwm_apply_might_sleep); */ int pwm_apply_atomic(struct pwm_device *pwm, const struct pwm_state *state) { - WARN_ONCE(!pwm->chip->atomic, + struct pwm_chip *chip = pwm->chip; + + WARN_ONCE(!chip->atomic, "sleeping PWM driver used in atomic context\n"); + guard(pwmchip)(chip); + + if (!chip->operational) + return -ENODEV; + return __pwm_apply(pwm, state); } EXPORT_SYMBOL_GPL(pwm_apply_atomic); @@ -334,8 +365,18 @@ static int pwm_capture(struct pwm_device *pwm, struct pwm_capture *result, if (!ops->capture) return -ENOSYS; + /* + * Holding the pwm_lock is probably not needed. If you use pwm_capture() + * and you're interested to speed it up, please convince yourself it's + * really not needed, test and then suggest a patch on the mailing list. + */ guard(mutex)(&pwm_lock); + guard(pwmchip)(chip); + + if (!chip->operational) + return -ENODEV; + return ops->capture(chip, pwm, result, timeout); } @@ -368,6 +409,14 @@ static int pwm_device_request(struct pwm_device *pwm, const char *label) if (test_bit(PWMF_REQUESTED, &pwm->flags)) return -EBUSY; + /* + * This function is called while holding pwm_lock. As .operational only + * changes while holding this lock, checking it here without holding the + * chip lock is fine. + */ + if (!chip->operational) + return -ENODEV; + if (!try_module_get(chip->owner)) return -ENODEV; @@ -396,7 +445,9 @@ err_get_device: */ struct pwm_state state = { 0, }; - err = ops->get_state(chip, pwm, &state); + scoped_guard(pwmchip, chip) + err = ops->get_state(chip, pwm, &state); + trace_pwm_get(pwm, &state, err); if (!err) @@ -1020,6 +1071,7 @@ struct pwm_chip *pwmchip_alloc(struct device *parent, unsigned int npwm, size_t chip->npwm = npwm; chip->uses_pwmchip_alloc = true; + chip->operational = false; pwmchip_dev = &chip->dev; device_initialize(pwmchip_dev); @@ -1125,6 +1177,11 @@ int __pwmchip_add(struct pwm_chip *chip, struct module *owner) chip->owner = owner; + if (chip->atomic) + spin_lock_init(&chip->atomic_lock); + else + mutex_init(&chip->nonatomic_lock); + guard(mutex)(&pwm_lock); ret = idr_alloc(&pwm_chips, chip, 0, 0, GFP_KERNEL); @@ -1138,6 +1195,9 @@ int __pwmchip_add(struct pwm_chip *chip, struct module *owner) if (IS_ENABLED(CONFIG_OF)) of_pwmchip_add(chip); + scoped_guard(pwmchip, chip) + chip->operational = true; + ret = device_add(&chip->dev); if (ret) goto err_device_add; @@ -1145,6 +1205,9 @@ int __pwmchip_add(struct pwm_chip *chip, struct module *owner) return 0; err_device_add: + scoped_guard(pwmchip, chip) + chip->operational = false; + if (IS_ENABLED(CONFIG_OF)) of_pwmchip_remove(chip); @@ -1164,11 +1227,27 @@ void pwmchip_remove(struct pwm_chip *chip) { pwmchip_sysfs_unexport(chip); - if (IS_ENABLED(CONFIG_OF)) - of_pwmchip_remove(chip); + scoped_guard(mutex, &pwm_lock) { + unsigned int i; + + scoped_guard(pwmchip, chip) + chip->operational = false; + + for (i = 0; i < chip->npwm; ++i) { + struct pwm_device *pwm = &chip->pwms[i]; + + if (test_and_clear_bit(PWMF_REQUESTED, &pwm->flags)) { + dev_warn(&chip->dev, "Freeing requested PWM #%u\n", i); + if (pwm->chip->ops->free) + pwm->chip->ops->free(pwm->chip, pwm); + } + } + + if (IS_ENABLED(CONFIG_OF)) + of_pwmchip_remove(chip); - scoped_guard(mutex, &pwm_lock) idr_remove(&pwm_chips, chip->id); + } device_del(&chip->dev); } @@ -1538,12 +1617,17 @@ void pwm_put(struct pwm_device *pwm) guard(mutex)(&pwm_lock); - if (!test_and_clear_bit(PWMF_REQUESTED, &pwm->flags)) { + /* + * Trigger a warning if a consumer called pwm_put() twice. + * If the chip isn't operational, PWMF_REQUESTED was already cleared in + * pwmchip_remove(). So don't warn in this case. + */ + if (chip->operational && !test_and_clear_bit(PWMF_REQUESTED, &pwm->flags)) { pr_warn("PWM device already freed\n"); return; } - if (chip->ops->free) + if (chip->operational && chip->ops->free) pwm->chip->ops->free(pwm->chip, pwm); pwm->label = NULL; diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 8acd60b53f58..3ea73e075abe 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -275,6 +275,9 @@ struct pwm_ops { * @of_xlate: request a PWM device given a device tree PWM specifier * @atomic: can the driver's ->apply() be called in atomic context * @uses_pwmchip_alloc: signals if pwmchip_allow was used to allocate this chip + * @operational: signals if the chip can be used (or is already deregistered) + * @nonatomic_lock: mutex for nonatomic chips + * @atomic_lock: mutex for atomic chips * @pwms: array of PWM devices allocated by the framework */ struct pwm_chip { @@ -290,6 +293,16 @@ struct pwm_chip { /* only used internally by the PWM framework */ bool uses_pwmchip_alloc; + bool operational; + union { + /* + * depending on the chip being atomic or not either the mutex or + * the spinlock is used. It protects .operational and + * synchronizes the callbacks in .ops + */ + struct mutex nonatomic_lock; + spinlock_t atomic_lock; + }; struct pwm_device pwms[] __counted_by(npwm); }; -- cgit v1.2.3 From 17e40c25158f2505cbcdeda96624afcbab4af368 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 20 Sep 2024 10:57:58 +0200 Subject: pwm: New abstraction for PWM waveforms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Up to now the configuration of a PWM setting is described exclusively by a struct pwm_state which contains information about period, duty_cycle, polarity and if the PWM is enabled. (There is another member usage_power which doesn't completely fit into pwm_state, I ignore it here for simplicity.) Instead of a polarity the new abstraction has a member duty_offset_ns that defines when the rising edge happens after the period start. This is more general, as with a pwm_state the rising edge can only happen at the period's start or such that the falling edge is at the end of the period (i.e. duty_offset_ns == 0 or duty_offset_ns == period_length_ns - duty_length_ns). A disabled PWM is modeled by .period_length_ns = 0. In my eyes this is a nice usage of that otherwise unusable setting, as it doesn't define anything about the future which matches the fact that consumers should consider the state of the output as undefined and it's just there to say "No further requirements about the output, you can save some power.". Further I renamed period and duty_cycle to period_length_ns and duty_length_ns. In the past there was confusion from time to time about duty_cycle being measured in nanoseconds because people expected a percentage of period instead. With "length_ns" as suffix the semantic should be more obvious to people unfamiliar with the pwm subsystem. period is renamed to period_length_ns for consistency. The API for consumers doesn't change yet, but lowlevel drivers can implement callbacks that work with pwm_waveforms instead of pwm_states. A new thing about these callbacks is that the calculation of hardware settings needed to implement a certain waveform is separated from actually writing these settings. The motivation for that is that this allows a consumer to query the hardware capabilities without actually modifying the hardware state. The rounding rules that are expected to be implemented in the round_waveform_tohw() are: First pick the biggest possible period not bigger than wf->period_length_ns. For that period pick the biggest possible duty setting not bigger than wf->duty_length_ns. Third pick the biggest possible offset not bigger than wf->duty_offset_ns. If the requested period is too small for the hardware, it's expected that a setting with the minimal period and duty_length_ns = duty_offset_ns = 0 is returned and this fact is signaled by a return value of 1. Signed-off-by: Uwe Kleine-König Tested-by: Trevor Gamblin Link: https://lore.kernel.org/r/df0faa33bf9e7c9e2e5eab8d31bbf61e861bd401.1726819463.git.u.kleine-koenig@baylibre.com [ukleinek: Update pwm_check_rounding() to return bool instead of int.] Signed-off-by: Uwe Kleine-König --- drivers/pwm/core.c | 234 +++++++++++++++++++++++++++++++++++++++++++++++----- include/linux/pwm.h | 36 ++++++++ 2 files changed, 249 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index 5a095eb46b54..bbe7bfdb1549 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -49,6 +49,102 @@ static void pwmchip_unlock(struct pwm_chip *chip) DEFINE_GUARD(pwmchip, struct pwm_chip *, pwmchip_lock(_T), pwmchip_unlock(_T)) +static void pwm_wf2state(const struct pwm_waveform *wf, struct pwm_state *state) +{ + if (wf->period_length_ns) { + if (wf->duty_length_ns + wf->duty_offset_ns < wf->period_length_ns) + *state = (struct pwm_state){ + .enabled = true, + .polarity = PWM_POLARITY_NORMAL, + .period = wf->period_length_ns, + .duty_cycle = wf->duty_length_ns, + }; + else + *state = (struct pwm_state){ + .enabled = true, + .polarity = PWM_POLARITY_INVERSED, + .period = wf->period_length_ns, + .duty_cycle = wf->period_length_ns - wf->duty_length_ns, + }; + } else { + *state = (struct pwm_state){ + .enabled = false, + }; + } +} + +static void pwm_state2wf(const struct pwm_state *state, struct pwm_waveform *wf) +{ + if (state->enabled) { + if (state->polarity == PWM_POLARITY_NORMAL) + *wf = (struct pwm_waveform){ + .period_length_ns = state->period, + .duty_length_ns = state->duty_cycle, + .duty_offset_ns = 0, + }; + else + *wf = (struct pwm_waveform){ + .period_length_ns = state->period, + .duty_length_ns = state->period - state->duty_cycle, + .duty_offset_ns = state->duty_cycle, + }; + } else { + *wf = (struct pwm_waveform){ + .period_length_ns = 0, + }; + } +} + +static bool pwm_check_rounding(const struct pwm_waveform *wf, + const struct pwm_waveform *wf_rounded) +{ + if (!wf->period_length_ns) + return true; + + if (wf->period_length_ns < wf_rounded->period_length_ns) + return false; + + if (wf->duty_length_ns < wf_rounded->duty_length_ns) + return false; + + if (wf->duty_offset_ns < wf_rounded->duty_offset_ns) + return false; + + return true; +} + +static int __pwm_round_waveform_tohw(struct pwm_chip *chip, struct pwm_device *pwm, + const struct pwm_waveform *wf, void *wfhw) +{ + const struct pwm_ops *ops = chip->ops; + + return ops->round_waveform_tohw(chip, pwm, wf, wfhw); +} + +static int __pwm_round_waveform_fromhw(struct pwm_chip *chip, struct pwm_device *pwm, + const void *wfhw, struct pwm_waveform *wf) +{ + const struct pwm_ops *ops = chip->ops; + + return ops->round_waveform_fromhw(chip, pwm, wfhw, wf); +} + +static int __pwm_read_waveform(struct pwm_chip *chip, struct pwm_device *pwm, void *wfhw) +{ + const struct pwm_ops *ops = chip->ops; + + return ops->read_waveform(chip, pwm, wfhw); +} + +static int __pwm_write_waveform(struct pwm_chip *chip, struct pwm_device *pwm, const void *wfhw) +{ + const struct pwm_ops *ops = chip->ops; + + return ops->write_waveform(chip, pwm, wfhw); +} + +#define WFHWSIZE 20 + static void pwm_apply_debug(struct pwm_device *pwm, const struct pwm_state *state) { @@ -182,6 +278,7 @@ static bool pwm_state_valid(const struct pwm_state *state) static int __pwm_apply(struct pwm_device *pwm, const struct pwm_state *state) { struct pwm_chip *chip; + const struct pwm_ops *ops; int err; if (!pwm || !state) @@ -205,6 +302,7 @@ static int __pwm_apply(struct pwm_device *pwm, const struct pwm_state *state) } chip = pwm->chip; + ops = chip->ops; if (state->period == pwm->state.period && state->duty_cycle == pwm->state.duty_cycle && @@ -213,18 +311,69 @@ static int __pwm_apply(struct pwm_device *pwm, const struct pwm_state *state) state->usage_power == pwm->state.usage_power) return 0; - err = chip->ops->apply(chip, pwm, state); - trace_pwm_apply(pwm, state, err); - if (err) - return err; + if (ops->write_waveform) { + struct pwm_waveform wf; + char wfhw[WFHWSIZE]; - pwm->state = *state; + BUG_ON(WFHWSIZE < ops->sizeof_wfhw); - /* - * only do this after pwm->state was applied as some - * implementations of .get_state depend on this - */ - pwm_apply_debug(pwm, state); + pwm_state2wf(state, &wf); + + /* + * The rounding is wrong here for states with inverted polarity. + * While .apply() rounds down duty_cycle (which represents the + * time from the start of the period to the inner edge), + * .round_waveform_tohw() rounds down the time the PWM is high. + * Can be fixed if the need arises, until reported otherwise + * let's assume that consumers don't care. + */ + + err = __pwm_round_waveform_tohw(chip, pwm, &wf, &wfhw); + if (err) { + if (err > 0) + /* + * This signals an invalid request, typically + * the requested period (or duty_offset) is + * smaller than possible with the hardware. + */ + return -EINVAL; + + return err; + } + + if (IS_ENABLED(CONFIG_PWM_DEBUG)) { + struct pwm_waveform wf_rounded; + + err = __pwm_round_waveform_fromhw(chip, pwm, &wfhw, &wf_rounded); + if (err) + return err; + + if (!pwm_check_rounding(&wf, &wf_rounded)) + dev_err(&chip->dev, "Wrong rounding: requested %llu/%llu [+%llu], result %llu/%llu [+%llu]\n", + wf.duty_length_ns, wf.period_length_ns, wf.duty_offset_ns, + wf_rounded.duty_length_ns, wf_rounded.period_length_ns, wf_rounded.duty_offset_ns); + } + + err = __pwm_write_waveform(chip, pwm, &wfhw); + if (err) + return err; + + pwm->state = *state; + + } else { + err = ops->apply(chip, pwm, state); + trace_pwm_apply(pwm, state, err); + if (err) + return err; + + pwm->state = *state; + + /* + * only do this after pwm->state was applied as some + * implementations of .get_state() depend on this + */ + pwm_apply_debug(pwm, state); + } return 0; } @@ -292,6 +441,41 @@ int pwm_apply_atomic(struct pwm_device *pwm, const struct pwm_state *state) } EXPORT_SYMBOL_GPL(pwm_apply_atomic); +static int pwm_get_state_hw(struct pwm_device *pwm, struct pwm_state *state) +{ + struct pwm_chip *chip = pwm->chip; + const struct pwm_ops *ops = chip->ops; + int ret = -EOPNOTSUPP; + + if (ops->read_waveform) { + char wfhw[WFHWSIZE]; + struct pwm_waveform wf; + + BUG_ON(WFHWSIZE < ops->sizeof_wfhw); + + scoped_guard(pwmchip, chip) { + + ret = __pwm_read_waveform(chip, pwm, &wfhw); + if (ret) + return ret; + + ret = __pwm_round_waveform_fromhw(chip, pwm, &wfhw, &wf); + if (ret) + return ret; + } + + pwm_wf2state(&wf, state); + + } else if (ops->get_state) { + scoped_guard(pwmchip, chip) + ret = ops->get_state(chip, pwm, state); + + trace_pwm_get(pwm, state, ret); + } + + return ret; +} + /** * pwm_adjust_config() - adjust the current PWM config to the PWM arguments * @pwm: PWM device @@ -435,7 +619,7 @@ err_get_device: } } - if (ops->get_state) { + if (ops->read_waveform || ops->get_state) { /* * Zero-initialize state because most drivers are unaware of * .usage_power. The other members of state are supposed to be @@ -445,11 +629,7 @@ err_get_device: */ struct pwm_state state = { 0, }; - scoped_guard(pwmchip, chip) - err = ops->get_state(chip, pwm, &state); - - trace_pwm_get(pwm, &state, err); - + err = pwm_get_state_hw(pwm, &state); if (!err) pwm->state = state; @@ -1136,12 +1316,24 @@ static bool pwm_ops_check(const struct pwm_chip *chip) { const struct pwm_ops *ops = chip->ops; - if (!ops->apply) - return false; + if (ops->write_waveform) { + if (!ops->round_waveform_tohw || + !ops->round_waveform_fromhw || + !ops->write_waveform) + return false; - if (IS_ENABLED(CONFIG_PWM_DEBUG) && !ops->get_state) - dev_warn(pwmchip_parent(chip), - "Please implement the .get_state() callback\n"); + if (WFHWSIZE < ops->sizeof_wfhw) { + dev_warn(pwmchip_parent(chip), "WFHWSIZE < %zu\n", ops->sizeof_wfhw); + return false; + } + } else { + if (!ops->apply) + return false; + + if (IS_ENABLED(CONFIG_PWM_DEBUG) && !ops->get_state) + dev_warn(pwmchip_parent(chip), + "Please implement the .get_state() callback\n"); + } return true; } diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 3ea73e075abe..d8cfe1c9b19d 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -49,6 +49,31 @@ enum { PWMF_EXPORTED = 1, }; +/** + * struct pwm_waveform - description of a PWM waveform + * @period_length_ns: PWM period + * @duty_length_ns: PWM duty cycle + * @duty_offset_ns: offset of the rising edge from the period's start + * + * This is a representation of a PWM waveform alternative to struct pwm_state + * below. It's more expressive than struct pwm_state as it contains a + * duty_offset_ns and so can represent offsets other than zero (with .polarity = + * PWM_POLARITY_NORMAL) and period - duty_cycle (.polarity = + * PWM_POLARITY_INVERSED). + * + * Note there is no explicit bool for enabled. A "disabled" PWM is represented + * by .period_length_ns = 0. Note further that the behaviour of a "disabled" PWM + * is undefined. Depending on the hardware's capabilities it might drive the + * active or inactive level, go high-z or even continue to toggle. + * + * The unit for all three members is nanoseconds. + */ +struct pwm_waveform { + u64 period_length_ns; + u64 duty_length_ns; + u64 duty_offset_ns; +}; + /* * struct pwm_state - state of a PWM channel * @period: PWM period (in nanoseconds) @@ -259,6 +284,17 @@ struct pwm_ops { void (*free)(struct pwm_chip *chip, struct pwm_device *pwm); int (*capture)(struct pwm_chip *chip, struct pwm_device *pwm, struct pwm_capture *result, unsigned long timeout); + + size_t sizeof_wfhw; + int (*round_waveform_tohw)(struct pwm_chip *chip, struct pwm_device *pwm, + const struct pwm_waveform *wf, void *wfhw); + int (*round_waveform_fromhw)(struct pwm_chip *chip, struct pwm_device *pwm, + const void *wfhw, struct pwm_waveform *wf); + int (*read_waveform)(struct pwm_chip *chip, struct pwm_device *pwm, + void *wfhw); + int (*write_waveform)(struct pwm_chip *chip, struct pwm_device *pwm, + const void *wfhw); + int (*apply)(struct pwm_chip *chip, struct pwm_device *pwm, const struct pwm_state *state); int (*get_state)(struct pwm_chip *chip, struct pwm_device *pwm, -- cgit v1.2.3 From 6c5126c6406d1c31e91f5b925c621c1c785366be Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 20 Sep 2024 10:57:59 +0200 Subject: pwm: Provide new consumer API functions for waveforms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provide API functions for consumers to work with waveforms. Note that one relevant difference between pwm_get_state() and pwm_get_waveform*() is that the latter yields the actually configured hardware state, while the former yields the last state passed to pwm_apply*() and so doesn't account for hardware specific rounding. Signed-off-by: Uwe Kleine-König Tested-by: Trevor Gamblin Link: https://lore.kernel.org/r/6c97d27682853f603e18e9196043886dd671845d.1726819463.git.u.kleine-koenig@baylibre.com Signed-off-by: Uwe Kleine-König --- drivers/pwm/core.c | 261 ++++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/pwm.h | 6 +- 2 files changed, 266 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index bbe7bfdb1549..038f17dd2757 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -49,6 +49,30 @@ static void pwmchip_unlock(struct pwm_chip *chip) DEFINE_GUARD(pwmchip, struct pwm_chip *, pwmchip_lock(_T), pwmchip_unlock(_T)) +static bool pwm_wf_valid(const struct pwm_waveform *wf) +{ + /* + * For now restrict waveforms to period_length_ns <= S64_MAX to provide + * some space for future extensions. One possibility is to simplify + * representing waveforms with inverted polarity using negative values + * somehow. + */ + if (wf->period_length_ns > S64_MAX) + return false; + + if (wf->duty_length_ns > wf->period_length_ns) + return false; + + /* + * .duty_offset_ns is supposed to be smaller than .period_length_ns, apart + * from the corner case .duty_offset_ns == 0 && .period_length_ns == 0. + */ + if (wf->duty_offset_ns && wf->duty_offset_ns >= wf->period_length_ns) + return false; + + return true; +} + static void pwm_wf2state(const struct pwm_waveform *wf, struct pwm_state *state) { if (wf->period_length_ns) { @@ -95,6 +119,29 @@ static void pwm_state2wf(const struct pwm_state *state, struct pwm_waveform *wf) } } +static int pwmwfcmp(const struct pwm_waveform *a, const struct pwm_waveform *b) +{ + if (a->period_length_ns > b->period_length_ns) + return 1; + + if (a->period_length_ns < b->period_length_ns) + return -1; + + if (a->duty_length_ns > b->duty_length_ns) + return 1; + + if (a->duty_length_ns < b->duty_length_ns) + return -1; + + if (a->duty_offset_ns > b->duty_offset_ns) + return 1; + + if (a->duty_offset_ns < b->duty_offset_ns) + return -1; + + return 0; +} + static bool pwm_check_rounding(const struct pwm_waveform *wf, const struct pwm_waveform *wf_rounded) { @@ -145,6 +192,220 @@ static int __pwm_write_waveform(struct pwm_chip *chip, struct pwm_device *pwm, c #define WFHWSIZE 20 +/** + * pwm_round_waveform_might_sleep - Query hardware capabilities + * Cannot be used in atomic context. + * @pwm: PWM device + * @wf: waveform to round and output parameter + * + * Typically a given waveform cannot be implemented exactly by hardware, e.g. + * because hardware only supports coarse period resolution or no duty_offset. + * This function returns the actually implemented waveform if you pass wf to + * pwm_set_waveform_might_sleep now. + * + * Note however that the world doesn't stop turning when you call it, so when + * doing + * + * pwm_round_waveform_might_sleep(mypwm, &wf); + * pwm_set_waveform_might_sleep(mypwm, &wf, true); + * + * the latter might fail, e.g. because an input clock changed its rate between + * these two calls and the waveform determined by + * pwm_round_waveform_might_sleep() cannot be implemented any more. + * + * Returns 0 on success, 1 if there is no valid hardware configuration matching + * the input waveform under the PWM rounding rules or a negative errno. + */ +int pwm_round_waveform_might_sleep(struct pwm_device *pwm, struct pwm_waveform *wf) +{ + struct pwm_chip *chip = pwm->chip; + const struct pwm_ops *ops = chip->ops; + struct pwm_waveform wf_req = *wf; + char wfhw[WFHWSIZE]; + int ret_tohw, ret_fromhw; + + BUG_ON(WFHWSIZE < ops->sizeof_wfhw); + + if (!pwm_wf_valid(wf)) + return -EINVAL; + + guard(pwmchip)(chip); + + if (!chip->operational) + return -ENODEV; + + ret_tohw = __pwm_round_waveform_tohw(chip, pwm, wf, wfhw); + if (ret_tohw < 0) + return ret_tohw; + + if (IS_ENABLED(CONFIG_PWM_DEBUG) && ret_tohw > 1) + dev_err(&chip->dev, "Unexpected return value from __pwm_round_waveform_tohw: requested %llu/%llu [+%llu], return value %d\n", + wf_req.duty_length_ns, wf_req.period_length_ns, wf_req.duty_offset_ns, ret_tohw); + + ret_fromhw = __pwm_round_waveform_fromhw(chip, pwm, wfhw, wf); + if (ret_fromhw < 0) + return ret_fromhw; + + if (IS_ENABLED(CONFIG_PWM_DEBUG) && ret_fromhw > 0) + dev_err(&chip->dev, "Unexpected return value from __pwm_round_waveform_fromhw: requested %llu/%llu [+%llu], return value %d\n", + wf_req.duty_length_ns, wf_req.period_length_ns, wf_req.duty_offset_ns, ret_tohw); + + if (IS_ENABLED(CONFIG_PWM_DEBUG) && + ret_tohw == 0 && !pwm_check_rounding(&wf_req, wf)) + dev_err(&chip->dev, "Wrong rounding: requested %llu/%llu [+%llu], result %llu/%llu [+%llu]\n", + wf_req.duty_length_ns, wf_req.period_length_ns, wf_req.duty_offset_ns, + wf->duty_length_ns, wf->period_length_ns, wf->duty_offset_ns); + + return ret_tohw; +} +EXPORT_SYMBOL_GPL(pwm_round_waveform_might_sleep); + +/** + * pwm_get_waveform_might_sleep - Query hardware about current configuration + * Cannot be used in atomic context. + * @pwm: PWM device + * @wf: output parameter + * + * Stores the current configuration of the PWM in @wf. Note this is the + * equivalent of pwm_get_state_hw() (and not pwm_get_state()) for pwm_waveform. + */ +int pwm_get_waveform_might_sleep(struct pwm_device *pwm, struct pwm_waveform *wf) +{ + struct pwm_chip *chip = pwm->chip; + const struct pwm_ops *ops = chip->ops; + char wfhw[WFHWSIZE]; + int err; + + BUG_ON(WFHWSIZE < ops->sizeof_wfhw); + + guard(pwmchip)(chip); + + if (!chip->operational) + return -ENODEV; + + err = __pwm_read_waveform(chip, pwm, &wfhw); + if (err) + return err; + + return __pwm_round_waveform_fromhw(chip, pwm, &wfhw, wf); +} +EXPORT_SYMBOL_GPL(pwm_get_waveform_might_sleep); + +/* Called with the pwmchip lock held */ +static int __pwm_set_waveform(struct pwm_device *pwm, + const struct pwm_waveform *wf, + bool exact) +{ + struct pwm_chip *chip = pwm->chip; + const struct pwm_ops *ops = chip->ops; + char wfhw[WFHWSIZE]; + struct pwm_waveform wf_rounded; + int err; + + BUG_ON(WFHWSIZE < ops->sizeof_wfhw); + + if (!pwm_wf_valid(wf)) + return -EINVAL; + + err = __pwm_round_waveform_tohw(chip, pwm, wf, &wfhw); + if (err) + return err; + + if ((IS_ENABLED(CONFIG_PWM_DEBUG) || exact) && wf->period_length_ns) { + err = __pwm_round_waveform_fromhw(chip, pwm, &wfhw, &wf_rounded); + if (err) + return err; + + if (IS_ENABLED(CONFIG_PWM_DEBUG) && !pwm_check_rounding(wf, &wf_rounded)) + dev_err(&chip->dev, "Wrong rounding: requested %llu/%llu [+%llu], result %llu/%llu [+%llu]\n", + wf->duty_length_ns, wf->period_length_ns, wf->duty_offset_ns, + wf_rounded.duty_length_ns, wf_rounded.period_length_ns, wf_rounded.duty_offset_ns); + + if (exact && pwmwfcmp(wf, &wf_rounded)) { + dev_dbg(&chip->dev, "Requested no rounding, but %llu/%llu [+%llu] -> %llu/%llu [+%llu]\n", + wf->duty_length_ns, wf->period_length_ns, wf->duty_offset_ns, + wf_rounded.duty_length_ns, wf_rounded.period_length_ns, wf_rounded.duty_offset_ns); + + return 1; + } + } + + err = __pwm_write_waveform(chip, pwm, &wfhw); + if (err) + return err; + + /* update .state */ + pwm_wf2state(wf, &pwm->state); + + if (IS_ENABLED(CONFIG_PWM_DEBUG) && ops->read_waveform && wf->period_length_ns) { + struct pwm_waveform wf_set; + + err = __pwm_read_waveform(chip, pwm, &wfhw); + if (err) + /* maybe ignore? */ + return err; + + err = __pwm_round_waveform_fromhw(chip, pwm, &wfhw, &wf_set); + if (err) + /* maybe ignore? */ + return err; + + if (pwmwfcmp(&wf_set, &wf_rounded) != 0) + dev_err(&chip->dev, + "Unexpected setting: requested %llu/%llu [+%llu], expected %llu/%llu [+%llu], set %llu/%llu [+%llu]\n", + wf->duty_length_ns, wf->period_length_ns, wf->duty_offset_ns, + wf_rounded.duty_length_ns, wf_rounded.period_length_ns, wf_rounded.duty_offset_ns, + wf_set.duty_length_ns, wf_set.period_length_ns, wf_set.duty_offset_ns); + } + return 0; +} + +/** + * pwm_set_waveform_might_sleep - Apply a new waveform + * Cannot be used in atomic context. + * @pwm: PWM device + * @wf: The waveform to apply + * @exact: If true no rounding is allowed + * + * Typically a requested waveform cannot be implemented exactly, e.g. because + * you requested .period_length_ns = 100 ns, but the hardware can only set + * periods that are a multiple of 8.5 ns. With that hardware passing exact = + * true results in pwm_set_waveform_might_sleep() failing and returning 1. If + * exact = false you get a period of 93.5 ns (i.e. the biggest period not bigger + * than the requested value). + * Note that even with exact = true, some rounding by less than 1 is + * possible/needed. In the above example requesting .period_length_ns = 94 and + * exact = true, you get the hardware configured with period = 93.5 ns. + */ +int pwm_set_waveform_might_sleep(struct pwm_device *pwm, + const struct pwm_waveform *wf, bool exact) +{ + struct pwm_chip *chip = pwm->chip; + int err; + + might_sleep(); + + guard(pwmchip)(chip); + + if (!chip->operational) + return -ENODEV; + + if (IS_ENABLED(CONFIG_PWM_DEBUG) && chip->atomic) { + /* + * Catch any drivers that have been marked as atomic but + * that will sleep anyway. + */ + non_block_start(); + err = __pwm_set_waveform(pwm, wf, exact); + non_block_end(); + } else { + err = __pwm_set_waveform(pwm, wf, exact); + } + + return err; +} +EXPORT_SYMBOL_GPL(pwm_set_waveform_might_sleep); + static void pwm_apply_debug(struct pwm_device *pwm, const struct pwm_state *state) { diff --git a/include/linux/pwm.h b/include/linux/pwm.h index d8cfe1c9b19d..c3d9ddeafa65 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -358,7 +358,11 @@ static inline void pwmchip_set_drvdata(struct pwm_chip *chip, void *data) } #if IS_ENABLED(CONFIG_PWM) -/* PWM user APIs */ + +/* PWM consumer APIs */ +int pwm_round_waveform_might_sleep(struct pwm_device *pwm, struct pwm_waveform *wf); +int pwm_get_waveform_might_sleep(struct pwm_device *pwm, struct pwm_waveform *wf); +int pwm_set_waveform_might_sleep(struct pwm_device *pwm, const struct pwm_waveform *wf, bool exact); int pwm_apply_might_sleep(struct pwm_device *pwm, const struct pwm_state *state); int pwm_apply_atomic(struct pwm_device *pwm, const struct pwm_state *state); int pwm_adjust_config(struct pwm_device *pwm); -- cgit v1.2.3 From 1afd01db1a76cdd1d96696e3790d66c79621784c Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 20 Sep 2024 10:58:00 +0200 Subject: pwm: Add tracing for waveform callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds trace events for the recently introduced waveform callbacks. With the introduction of some helper macros consistency among the different events is ensured. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/1d71879b0de3bf01459c7a9d0f040d43eb5ace56.1726819463.git.u.kleine-koenig@baylibre.com Signed-off-by: Uwe Kleine-König --- drivers/pwm/core.c | 24 ++++++-- include/trace/events/pwm.h | 134 ++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 146 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index 038f17dd2757..c3bdebf4cf6e 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -164,30 +164,46 @@ static int __pwm_round_waveform_tohw(struct pwm_chip *chip, struct pwm_device *p const struct pwm_waveform *wf, void *wfhw) { const struct pwm_ops *ops = chip->ops; + int ret; + + ret = ops->round_waveform_tohw(chip, pwm, wf, wfhw); + trace_pwm_round_waveform_tohw(pwm, wf, wfhw, ret); - return ops->round_waveform_tohw(chip, pwm, wf, wfhw); + return ret; } static int __pwm_round_waveform_fromhw(struct pwm_chip *chip, struct pwm_device *pwm, const void *wfhw, struct pwm_waveform *wf) { const struct pwm_ops *ops = chip->ops; + int ret; + + ret = ops->round_waveform_fromhw(chip, pwm, wfhw, wf); + trace_pwm_round_waveform_fromhw(pwm, wfhw, wf, ret); - return ops->round_waveform_fromhw(chip, pwm, wfhw, wf); + return ret; } static int __pwm_read_waveform(struct pwm_chip *chip, struct pwm_device *pwm, void *wfhw) { const struct pwm_ops *ops = chip->ops; + int ret; + + ret = ops->read_waveform(chip, pwm, wfhw); + trace_pwm_read_waveform(pwm, wfhw, ret); - return ops->read_waveform(chip, pwm, wfhw); + return ret; } static int __pwm_write_waveform(struct pwm_chip *chip, struct pwm_device *pwm, const void *wfhw) { const struct pwm_ops *ops = chip->ops; + int ret; + + ret = ops->write_waveform(chip, pwm, wfhw); + trace_pwm_write_waveform(pwm, wfhw, ret); - return ops->write_waveform(chip, pwm, wfhw); + return ret; } #define WFHWSIZE 20 diff --git a/include/trace/events/pwm.h b/include/trace/events/pwm.h index 8022701c446d..8ba898fd335c 100644 --- a/include/trace/events/pwm.h +++ b/include/trace/events/pwm.h @@ -8,15 +8,135 @@ #include #include +#define TP_PROTO_pwm(args...) \ + TP_PROTO(struct pwm_device *pwm, args) + +#define TP_ARGS_pwm(args...) \ + TP_ARGS(pwm, args) + +#define TP_STRUCT__entry_pwm(args...) \ + TP_STRUCT__entry( \ + __field(unsigned int, chipid) \ + __field(unsigned int, hwpwm) \ + args) + +#define TP_fast_assign_pwm(args...) \ + TP_fast_assign( \ + __entry->chipid = pwm->chip->id; \ + __entry->hwpwm = pwm->hwpwm; \ + args) + +#define TP_printk_pwm(fmt, args...) \ + TP_printk("pwmchip%u.%u: " fmt, __entry->chipid, __entry->hwpwm, args) + +#define __field_pwmwf(wf) \ + __field(u64, wf ## _period_length_ns) \ + __field(u64, wf ## _duty_length_ns) \ + __field(u64, wf ## _duty_offset_ns) \ + +#define fast_assign_pwmwf(wf) \ + __entry->wf ## _period_length_ns = wf->period_length_ns; \ + __entry->wf ## _duty_length_ns = wf->duty_length_ns; \ + __entry->wf ## _duty_offset_ns = wf->duty_offset_ns + +#define printk_pwmwf_format(wf) \ + "%lld/%lld [+%lld]" + +#define printk_pwmwf_formatargs(wf) \ + __entry->wf ## _duty_length_ns, __entry->wf ## _period_length_ns, __entry->wf ## _duty_offset_ns + +TRACE_EVENT(pwm_round_waveform_tohw, + + TP_PROTO_pwm(const struct pwm_waveform *wf, void *wfhw, int err), + + TP_ARGS_pwm(wf, wfhw, err), + + TP_STRUCT__entry_pwm( + __field_pwmwf(wf) + __field(void *, wfhw) + __field(int, err) + ), + + TP_fast_assign_pwm( + fast_assign_pwmwf(wf); + __entry->wfhw = wfhw; + __entry->err = err; + ), + + TP_printk_pwm(printk_pwmwf_format(wf) " > %p err=%d", + printk_pwmwf_formatargs(wf), __entry->wfhw, __entry->err) +); + +TRACE_EVENT(pwm_round_waveform_fromhw, + + TP_PROTO_pwm(const void *wfhw, struct pwm_waveform *wf, int err), + + TP_ARGS_pwm(wfhw, wf, err), + + TP_STRUCT__entry_pwm( + __field(const void *, wfhw) + __field_pwmwf(wf) + __field(int, err) + ), + + TP_fast_assign_pwm( + __entry->wfhw = wfhw; + fast_assign_pwmwf(wf); + __entry->err = err; + ), + + TP_printk_pwm("%p > " printk_pwmwf_format(wf) " err=%d", + __entry->wfhw, printk_pwmwf_formatargs(wf), __entry->err) +); + +TRACE_EVENT(pwm_read_waveform, + + TP_PROTO_pwm(void *wfhw, int err), + + TP_ARGS_pwm(wfhw, err), + + TP_STRUCT__entry_pwm( + __field(void *, wfhw) + __field(int, err) + ), + + TP_fast_assign_pwm( + __entry->wfhw = wfhw; + __entry->err = err; + ), + + TP_printk_pwm("%p err=%d", + __entry->wfhw, __entry->err) +); + +TRACE_EVENT(pwm_write_waveform, + + TP_PROTO_pwm(const void *wfhw, int err), + + TP_ARGS_pwm(wfhw, err), + + TP_STRUCT__entry_pwm( + __field(const void *, wfhw) + __field(int, err) + ), + + TP_fast_assign_pwm( + __entry->wfhw = wfhw; + __entry->err = err; + ), + + TP_printk_pwm("%p err=%d", + __entry->wfhw, __entry->err) +); + + DECLARE_EVENT_CLASS(pwm, TP_PROTO(struct pwm_device *pwm, const struct pwm_state *state, int err), TP_ARGS(pwm, state, err), - TP_STRUCT__entry( - __field(unsigned int, chipid) - __field(unsigned int, hwpwm) + TP_STRUCT__entry_pwm( __field(u64, period) __field(u64, duty_cycle) __field(enum pwm_polarity, polarity) @@ -24,9 +144,7 @@ DECLARE_EVENT_CLASS(pwm, __field(int, err) ), - TP_fast_assign( - __entry->chipid = pwm->chip->id; - __entry->hwpwm = pwm->hwpwm; + TP_fast_assign_pwm( __entry->period = state->period; __entry->duty_cycle = state->duty_cycle; __entry->polarity = state->polarity; @@ -34,8 +152,8 @@ DECLARE_EVENT_CLASS(pwm, __entry->err = err; ), - TP_printk("pwmchip%u.%u: period=%llu duty_cycle=%llu polarity=%d enabled=%d err=%d", - __entry->chipid, __entry->hwpwm, __entry->period, __entry->duty_cycle, + TP_printk_pwm("period=%llu duty_cycle=%llu polarity=%d enabled=%d err=%d", + __entry->period, __entry->duty_cycle, __entry->polarity, __entry->enabled, __entry->err) ); -- cgit v1.2.3 From caf78b0465053c23aa6211b9815dd5433766627d Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 24 Sep 2024 12:08:08 +0200 Subject: regcache: Improve documentation of available cache types There is some user confusion about which cache types to choose when which is not helped by the lack of any central documentation providing an overview of what's available. Provide a short overview in the API header to try to help reduce this. Signed-off-by: Mark Brown Link: https://patch.msgid.link/20240924-regcache-document-types-v1-1-e157054e1215@kernel.org Signed-off-by: Mark Brown --- include/linux/regmap.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index f9ccad32fc5c..da07584284ab 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -54,7 +54,14 @@ struct sdw_slave; #define REGMAP_UPSHIFT(s) (-(s)) #define REGMAP_DOWNSHIFT(s) (s) -/* An enum of all the supported cache types */ +/* + * The supported cache types, the default is no cache. Any new caches + * should usually use the maple tree cache unless they specifically + * require that there are never any allocations at runtime and can't + * provide defaults in which case they should use the flat cache. The + * rbtree cache *may* have some performance advantage for very low end + * systems that make heavy use of cache syncs but is mainly legacy. + */ enum regcache_type { REGCACHE_NONE, REGCACHE_RBTREE, -- cgit v1.2.3 From 5441b6975adc26aeaca316b9075e04a98238b1b3 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 25 Sep 2024 17:38:04 +0800 Subject: regulator: Add of_regulator_get_optional() for pure DT regulator lookup The to-be-introduced I2C component prober needs to enable regulator supplies (and toggle GPIO pins) for the various components it intends to probe. To support this, a new "pure DT lookup" method for getting regulator supplies is needed, since the device normally requesting the supply won't get created until after the component is probed to be available. Add a new of_regulator_get_optional() function for this. This mirrors the existing regulator_get_optional() function, but is OF-specific. The underlying code that supports the existing regulator_get*() functions has been reworked in previous patches to support this specific case. Also convert an existing usage of "dev && dev->of_node" to "dev_of_node(dev)". Link: https://lore.kernel.org/all/20231220203537.83479-2-jernej.skrabec@gmail.com/ [1] Signed-off-by: Chen-Yu Tsai Link: https://patch.msgid.link/20240925093807.1026949-2-wenst@chromium.org Reviewed-by: Andy Shevchenko Signed-off-by: Mark Brown --- drivers/regulator/core.c | 4 +-- drivers/regulator/internal.h | 2 ++ drivers/regulator/of_regulator.c | 51 +++++++++++++++++++++++++++++++++----- include/linux/regulator/consumer.h | 20 +++++++++++++++ 4 files changed, 69 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 1179766811f5..d0b3879f2746 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1959,8 +1959,8 @@ static struct regulator_dev *regulator_dev_lookup(struct device *dev, regulator_supply_alias(&dev, &supply); /* first do a dt based lookup */ - if (dev && dev->of_node) { - r = of_regulator_dev_lookup(dev, supply); + if (dev_of_node(dev)) { + r = of_regulator_dev_lookup(dev, dev_of_node(dev), supply); if (!IS_ERR(r)) return r; if (PTR_ERR(r) == -EPROBE_DEFER) diff --git a/drivers/regulator/internal.h b/drivers/regulator/internal.h index 5b43f802468d..f62cacbbc729 100644 --- a/drivers/regulator/internal.h +++ b/drivers/regulator/internal.h @@ -67,6 +67,7 @@ static inline struct regulator_dev *dev_to_rdev(struct device *dev) #ifdef CONFIG_OF struct regulator_dev *of_regulator_dev_lookup(struct device *dev, + struct device_node *np, const char *supply); struct regulator_init_data *regulator_of_get_init_data(struct device *dev, const struct regulator_desc *desc, @@ -82,6 +83,7 @@ bool of_check_coupling_data(struct regulator_dev *rdev); #else static inline struct regulator_dev *of_regulator_dev_lookup(struct device *dev, + struct device_node *np, const char *supply) { return ERR_PTR(-ENODEV); diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c index 3f490d81abc2..358c3ed791db 100644 --- a/drivers/regulator/of_regulator.c +++ b/drivers/regulator/of_regulator.c @@ -588,7 +588,8 @@ err_node_put: /** * of_get_regulator - get a regulator device node based on supply name - * @dev: Device pointer for the consumer (of regulator) device + * @dev: Device pointer for dev_printk() messages + * @node: Device node pointer for supply property lookup * @supply: regulator supply name * * Extract the regulator device node corresponding to the supply name. @@ -596,15 +597,16 @@ err_node_put: * Return: Pointer to the &struct device_node corresponding to the regulator * if found, or %NULL if not found. */ -static struct device_node *of_get_regulator(struct device *dev, const char *supply) +static struct device_node *of_get_regulator(struct device *dev, struct device_node *node, + const char *supply) { struct device_node *regnode = NULL; char prop_name[64]; /* 64 is max size of property name */ - dev_dbg(dev, "Looking up %s-supply from device tree\n", supply); + dev_dbg(dev, "Looking up %s-supply from device node %pOF\n", supply, node); snprintf(prop_name, 64, "%s-supply", supply); - regnode = of_parse_phandle(dev->of_node, prop_name, 0); + regnode = of_parse_phandle(node, prop_name, 0); if (regnode) return regnode; @@ -628,6 +630,7 @@ static struct regulator_dev *of_find_regulator_by_node(struct device_node *np) /** * of_regulator_dev_lookup - lookup a regulator device with device tree only * @dev: Device pointer for regulator supply lookup. + * @np: Device node pointer for regulator supply lookup. * @supply: Supply name or regulator ID. * * Return: Pointer to the &struct regulator_dev on success, or ERR_PTR() @@ -642,13 +645,13 @@ static struct regulator_dev *of_find_regulator_by_node(struct device_node *np) * * -%ENODEV if lookup fails permanently. * * -%EPROBE_DEFER if lookup could succeed in the future. */ -struct regulator_dev *of_regulator_dev_lookup(struct device *dev, +struct regulator_dev *of_regulator_dev_lookup(struct device *dev, struct device_node *np, const char *supply) { struct regulator_dev *r; struct device_node *node; - node = of_get_regulator(dev, supply); + node = of_get_regulator(dev, np, supply); if (node) { r = of_find_regulator_by_node(node); of_node_put(node); @@ -665,6 +668,42 @@ struct regulator_dev *of_regulator_dev_lookup(struct device *dev, return ERR_PTR(-ENODEV); } +static struct regulator *_of_regulator_get(struct device *dev, struct device_node *node, + const char *id, enum regulator_get_type get_type) +{ + struct regulator_dev *r; + int ret; + + ret = _regulator_get_common_check(dev, id, get_type); + if (ret) + return ERR_PTR(ret); + + r = of_regulator_dev_lookup(dev, node, id); + return _regulator_get_common(r, dev, id, get_type); +} + +/** + * of_regulator_get_optional - get optional regulator via device tree lookup + * @dev: device used for dev_printk() messages + * @node: device node for regulator "consumer" + * @id: Supply name + * + * Return: pointer to struct regulator corresponding to the regulator producer, + * or PTR_ERR() encoded error number. + * + * This is intended for use by consumers that want to get a regulator + * supply directly from a device node, and can and want to deal with + * absence of such supplies. This will _not_ consider supply aliases. + * See regulator_dev_lookup(). + */ +struct regulator *of_regulator_get_optional(struct device *dev, + struct device_node *node, + const char *id) +{ + return _of_regulator_get(dev, node, id, OPTIONAL_GET); +} +EXPORT_SYMBOL_GPL(of_regulator_get_optional); + /* * Returns number of regulators coupled with rdev. */ diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index b9ce521910a0..2b22f07e491c 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -168,6 +168,19 @@ int devm_regulator_get_enable_read_voltage(struct device *dev, const char *id); void regulator_put(struct regulator *regulator); void devm_regulator_put(struct regulator *regulator); +#if IS_ENABLED(CONFIG_OF) +struct regulator *__must_check of_regulator_get_optional(struct device *dev, + struct device_node *node, + const char *id); +#else +static inline struct regulator *__must_check of_regulator_get_optional(struct device *dev, + struct device_node *node, + const char *id) +{ + return ERR_PTR(-ENODEV); +} +#endif + int regulator_register_supply_alias(struct device *dev, const char *id, struct device *alias_dev, const char *alias_id); @@ -350,6 +363,13 @@ devm_regulator_get_optional(struct device *dev, const char *id) return ERR_PTR(-ENODEV); } +static inline struct regulator *__must_check of_regulator_get_optional(struct device *dev, + struct device_node *node, + const char *id) +{ + return ERR_PTR(-ENODEV); +} + static inline void regulator_put(struct regulator *regulator) { } -- cgit v1.2.3 From 36ec3f437227470568e5f460997f367f5446a34d Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 25 Sep 2024 17:38:05 +0800 Subject: regulator: Add devres version of of_regulator_get_optional() There are existing uses for a devres version of of_regulator_get_optional() in power domain drivers. On MediaTek platforms, power domains may have regulator supplies tied to them. The driver currently tries to use devm_regulator_get() to not have to manage the lifecycle, but ends up doing it in a very hacky way by replacing the device node of the power domain controller device to the device node of the power domain that is currently being registered, getting the supply, and reverting the device node. Provide a better API so that the hack can be replaced. Signed-off-by: Chen-Yu Tsai Link: https://patch.msgid.link/20240925093807.1026949-3-wenst@chromium.org Signed-off-by: Mark Brown --- drivers/regulator/devres.c | 39 ++++++++++++++++++++++++++++++++++++++ drivers/regulator/internal.h | 16 ++++++++++------ drivers/regulator/of_regulator.c | 4 ++-- include/linux/regulator/consumer.h | 17 +++++++++++++++++ 4 files changed, 68 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/regulator/devres.c b/drivers/regulator/devres.c index 1b893cdd1aad..36164aec30e8 100644 --- a/drivers/regulator/devres.c +++ b/drivers/regulator/devres.c @@ -749,3 +749,42 @@ void *devm_regulator_irq_helper(struct device *dev, return ptr; } EXPORT_SYMBOL_GPL(devm_regulator_irq_helper); + +#if IS_ENABLED(CONFIG_OF) +static struct regulator *_devm_of_regulator_get(struct device *dev, struct device_node *node, + const char *id, int get_type) +{ + struct regulator **ptr, *regulator; + + ptr = devres_alloc(devm_regulator_release, sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return ERR_PTR(-ENOMEM); + + regulator = _of_regulator_get(dev, node, id, get_type); + if (!IS_ERR(regulator)) { + *ptr = regulator; + devres_add(dev, ptr); + } else { + devres_free(ptr); + } + + return regulator; +} + +/** + * devm_of_regulator_get_optional - Resource managed of_regulator_get_optional() + * @dev: device used for dev_printk() messages and resource lifetime management + * @node: device node for regulator "consumer" + * @id: supply name or regulator ID. + * + * Managed regulator_get_optional(). Regulators returned from this + * function are automatically regulator_put() on driver detach. See + * of_regulator_get_optional() for more information. + */ +struct regulator *devm_of_regulator_get_optional(struct device *dev, struct device_node *node, + const char *id) +{ + return _devm_of_regulator_get(dev, node, id, OPTIONAL_GET); +} +EXPORT_SYMBOL_GPL(devm_of_regulator_get_optional); +#endif diff --git a/drivers/regulator/internal.h b/drivers/regulator/internal.h index f62cacbbc729..b3d48dc38bc4 100644 --- a/drivers/regulator/internal.h +++ b/drivers/regulator/internal.h @@ -65,6 +65,13 @@ static inline struct regulator_dev *dev_to_rdev(struct device *dev) return container_of(dev, struct regulator_dev, dev); } +enum regulator_get_type { + NORMAL_GET, + EXCLUSIVE_GET, + OPTIONAL_GET, + MAX_GET_TYPE +}; + #ifdef CONFIG_OF struct regulator_dev *of_regulator_dev_lookup(struct device *dev, struct device_node *np, @@ -74,6 +81,9 @@ struct regulator_init_data *regulator_of_get_init_data(struct device *dev, struct regulator_config *config, struct device_node **node); +struct regulator *_of_regulator_get(struct device *dev, struct device_node *node, + const char *id, enum regulator_get_type get_type); + struct regulator_dev *of_parse_coupled_regulator(struct regulator_dev *rdev, int index); @@ -116,12 +126,6 @@ static inline bool of_check_coupling_data(struct regulator_dev *rdev) } #endif -enum regulator_get_type { - NORMAL_GET, - EXCLUSIVE_GET, - OPTIONAL_GET, - MAX_GET_TYPE -}; int _regulator_get_common_check(struct device *dev, const char *id, enum regulator_get_type get_type); diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c index 358c3ed791db..3d85762beda6 100644 --- a/drivers/regulator/of_regulator.c +++ b/drivers/regulator/of_regulator.c @@ -668,8 +668,8 @@ struct regulator_dev *of_regulator_dev_lookup(struct device *dev, struct device_ return ERR_PTR(-ENODEV); } -static struct regulator *_of_regulator_get(struct device *dev, struct device_node *node, - const char *id, enum regulator_get_type get_type) +struct regulator *_of_regulator_get(struct device *dev, struct device_node *node, + const char *id, enum regulator_get_type get_type) { struct regulator_dev *r; int ret; diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index 2b22f07e491c..8c3c372ad735 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -172,6 +172,9 @@ void devm_regulator_put(struct regulator *regulator); struct regulator *__must_check of_regulator_get_optional(struct device *dev, struct device_node *node, const char *id); +struct regulator *__must_check devm_of_regulator_get_optional(struct device *dev, + struct device_node *node, + const char *id); #else static inline struct regulator *__must_check of_regulator_get_optional(struct device *dev, struct device_node *node, @@ -179,6 +182,13 @@ static inline struct regulator *__must_check of_regulator_get_optional(struct de { return ERR_PTR(-ENODEV); } + +static inline struct regulator *__must_check devm_of_regulator_get_optional(struct device *dev, + struct device_node *node, + const char *id) +{ + return ERR_PTR(-ENODEV); +} #endif int regulator_register_supply_alias(struct device *dev, const char *id, @@ -370,6 +380,13 @@ static inline struct regulator *__must_check of_regulator_get_optional(struct de return ERR_PTR(-ENODEV); } +static inline struct regulator *__must_check devm_of_regulator_get_optional(struct device *dev, + struct device_node *node, + const char *id) +{ + return ERR_PTR(-ENODEV); +} + static inline void regulator_put(struct regulator *regulator) { } -- cgit v1.2.3 From 0809a9ccac4a2ffdfd1561bb551aec6099775545 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 2 Sep 2024 20:59:47 +0800 Subject: spi: remove {devm_}spi_alloc_master/slave() All the {devm_}spi_alloc_master/slave() have been replaced, so they can be removed and replaced in doc and comment. No functional changed. Signed-off-by: Yang Yingliang Link: https://patch.msgid.link/20240902125947.1368-8-yangyingliang@huaweicloud.com Signed-off-by: Mark Brown --- Documentation/driver-api/driver-model/devres.rst | 4 ++-- drivers/spi/spi.c | 14 +++++------ include/linux/spi/spi.h | 30 ------------------------ 3 files changed, 9 insertions(+), 39 deletions(-) (limited to 'include') diff --git a/Documentation/driver-api/driver-model/devres.rst b/Documentation/driver-api/driver-model/devres.rst index 5f2ee8d717b1..ebbf8e4cc85f 100644 --- a/Documentation/driver-api/driver-model/devres.rst +++ b/Documentation/driver-api/driver-model/devres.rst @@ -462,8 +462,8 @@ SLAVE DMA ENGINE devm_acpi_dma_controller_free() SPI - devm_spi_alloc_master() - devm_spi_alloc_slave() + devm_spi_alloc_host() + devm_spi_alloc_target() devm_spi_optimize_message() devm_spi_register_controller() devm_spi_register_host() diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index c1dad30a4528..073ffae97767 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -3238,9 +3238,9 @@ static int spi_controller_id_alloc(struct spi_controller *ctlr, int start, int e } /** - * spi_register_controller - register SPI master or slave controller - * @ctlr: initialized master, originally from spi_alloc_master() or - * spi_alloc_slave() + * spi_register_controller - register SPI host or target controller + * @ctlr: initialized controller, originally from spi_alloc_host() or + * spi_alloc_target() * Context: can sleep * * SPI controllers connect to their drivers using some non-SPI bus, @@ -3390,11 +3390,11 @@ static void devm_spi_unregister(struct device *dev, void *res) } /** - * devm_spi_register_controller - register managed SPI master or slave + * devm_spi_register_controller - register managed SPI host or target * controller * @dev: device managing SPI controller - * @ctlr: initialized controller, originally from spi_alloc_master() or - * spi_alloc_slave() + * @ctlr: initialized controller, originally from spi_alloc_host() or + * spi_alloc_target() * Context: can sleep * * Register a SPI device as with spi_register_controller() which will @@ -3478,7 +3478,7 @@ void spi_unregister_controller(struct spi_controller *ctlr) /* * Release the last reference on the controller if its driver - * has not yet been converted to devm_spi_alloc_master/slave(). + * has not yet been converted to devm_spi_alloc_host/target(). */ if (!ctlr->devm_allocated) put_device(&ctlr->dev); diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 4b95663163e0..8497f4747e24 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -824,21 +824,6 @@ void spi_take_timestamp_post(struct spi_controller *ctlr, extern struct spi_controller *__spi_alloc_controller(struct device *host, unsigned int size, bool slave); -static inline struct spi_controller *spi_alloc_master(struct device *host, - unsigned int size) -{ - return __spi_alloc_controller(host, size, false); -} - -static inline struct spi_controller *spi_alloc_slave(struct device *host, - unsigned int size) -{ - if (!IS_ENABLED(CONFIG_SPI_SLAVE)) - return NULL; - - return __spi_alloc_controller(host, size, true); -} - static inline struct spi_controller *spi_alloc_host(struct device *dev, unsigned int size) { @@ -858,21 +843,6 @@ struct spi_controller *__devm_spi_alloc_controller(struct device *dev, unsigned int size, bool slave); -static inline struct spi_controller *devm_spi_alloc_master(struct device *dev, - unsigned int size) -{ - return __devm_spi_alloc_controller(dev, size, false); -} - -static inline struct spi_controller *devm_spi_alloc_slave(struct device *dev, - unsigned int size) -{ - if (!IS_ENABLED(CONFIG_SPI_SLAVE)) - return NULL; - - return __devm_spi_alloc_controller(dev, size, true); -} - static inline struct spi_controller *devm_spi_alloc_host(struct device *dev, unsigned int size) { -- cgit v1.2.3 From 6074e905023d09f64f2c896f475820a5623deb2c Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 16 Sep 2024 13:00:25 +0200 Subject: firmware: sysfb: Add a sysfb_handles_screen_info() helper function That can be used by drivers to check if the Generic System Framebuffers (sysfb) support can handle the data contained in the global screen_info. Drivers might need this information to know if have to setup the system framebuffer, or if they have to delegate this action to sysfb instead. Suggested-by: Thomas Zimmermann Signed-off-by: Javier Martinez Canillas Reviewed-by: Thomas Zimmermann Link: https://lore.kernel.org/r/20240916110040.1688511-2-javierm@redhat.com Signed-off-by: Tzung-Bi Shih --- drivers/firmware/sysfb.c | 19 +++++++++++++++++++ include/linux/sysfb.h | 7 +++++++ 2 files changed, 26 insertions(+) (limited to 'include') diff --git a/drivers/firmware/sysfb.c b/drivers/firmware/sysfb.c index 02a07d3d0d40..770e74be14f3 100644 --- a/drivers/firmware/sysfb.c +++ b/drivers/firmware/sysfb.c @@ -77,6 +77,25 @@ void sysfb_disable(struct device *dev) } EXPORT_SYMBOL_GPL(sysfb_disable); +/** + * sysfb_handles_screen_info() - reports if sysfb handles the global screen_info + * + * Callers can use sysfb_handles_screen_info() to determine whether the Generic + * System Framebuffers (sysfb) can handle the global screen_info data structure + * or not. Drivers might need this information to know if they have to setup the + * system framebuffer, or if they have to delegate this action to sysfb instead. + * + * Returns: + * True if sysfb handles the global screen_info data structure. + */ +bool sysfb_handles_screen_info(void) +{ + const struct screen_info *si = &screen_info; + + return !!screen_info_video_type(si); +} +EXPORT_SYMBOL_GPL(sysfb_handles_screen_info); + #if defined(CONFIG_PCI) static bool sysfb_pci_dev_is_enabled(struct pci_dev *pdev) { diff --git a/include/linux/sysfb.h b/include/linux/sysfb.h index bef5f06a91de..07cbab516942 100644 --- a/include/linux/sysfb.h +++ b/include/linux/sysfb.h @@ -60,12 +60,19 @@ struct efifb_dmi_info { void sysfb_disable(struct device *dev); +bool sysfb_handles_screen_info(void); + #else /* CONFIG_SYSFB */ static inline void sysfb_disable(struct device *dev) { } +static inline bool sysfb_handles_screen_info(void) +{ + return false; +} + #endif /* CONFIG_SYSFB */ #ifdef CONFIG_EFI -- cgit v1.2.3 From e4ca0e59c39442546866f3dd514a3a5956577daf Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 3 Sep 2024 20:59:04 +0300 Subject: types: Complement the aligned types with signed 64-bit one Some user may want to use aligned signed 64-bit type. Provide it for them. Signed-off-by: Andy Shevchenko Link: https://patch.msgid.link/20240903180218.3640501-2-andriy.shevchenko@linux.intel.com Signed-off-by: Jonathan Cameron --- include/linux/types.h | 3 ++- include/uapi/linux/types.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/types.h b/include/linux/types.h index 2bc8766ba20c..2d7b9ae8714c 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -115,8 +115,9 @@ typedef u64 u_int64_t; typedef s64 int64_t; #endif -/* this is a special 64bit data type that is 8-byte aligned */ +/* These are the special 64-bit data types that are 8-byte aligned */ #define aligned_u64 __aligned_u64 +#define aligned_s64 __aligned_s64 #define aligned_be64 __aligned_be64 #define aligned_le64 __aligned_le64 diff --git a/include/uapi/linux/types.h b/include/uapi/linux/types.h index 6375a0684052..48b933938877 100644 --- a/include/uapi/linux/types.h +++ b/include/uapi/linux/types.h @@ -53,6 +53,7 @@ typedef __u32 __bitwise __wsum; * No conversions are necessary between 32-bit user-space and a 64-bit kernel. */ #define __aligned_u64 __u64 __attribute__((aligned(8))) +#define __aligned_s64 __s64 __attribute__((aligned(8))) #define __aligned_be64 __be64 __attribute__((aligned(8))) #define __aligned_le64 __le64 __attribute__((aligned(8))) -- cgit v1.2.3 From faf178607772f28006e403d7bab6c4217d4ee447 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 7 Sep 2024 19:24:46 +0200 Subject: iio: adc: Constify struct iio_map 'struct iio_map' are not modified in these drivers. Constifying this structure moves some data to a read-only section, so increase overall security. In order to do it, the prototype of iio_map_array_register() and devm_iio_map_array_register(), and a few structures that hold a "struct iio_map *" need to be adjusted. On a x86_64, with allmodconfig, as an example: Before: ====== text data bss dec hex filename 21086 760 0 21846 5556 drivers/iio/adc/axp20x_adc.o After: ===== text data bss dec hex filename 21470 360 0 21830 5546 drivers/iio/adc/axp20x_adc.o 33842 1697 384 35923 8c53 drivers/iio/addac/ad74413r.o -- Compile tested only Signed-off-by: Christophe JAILLET Link: https://patch.msgid.link/5729dc3cc3892ecf0d8ea28c5f7307b34e27493e.1725729801.git.christophe.jaillet@wanadoo.fr Signed-off-by: Jonathan Cameron --- drivers/iio/adc/axp20x_adc.c | 6 +++--- drivers/iio/adc/axp288_adc.c | 2 +- drivers/iio/adc/da9150-gpadc.c | 2 +- drivers/iio/adc/intel_mrfld_adc.c | 2 +- drivers/iio/adc/lp8788_adc.c | 6 +++--- drivers/iio/adc/mp2629_adc.c | 2 +- drivers/iio/adc/rn5t618-adc.c | 2 +- drivers/iio/adc/sun4i-gpadc-iio.c | 2 +- drivers/iio/inkern.c | 7 ++++--- include/linux/iio/driver.h | 5 +++-- 10 files changed, 19 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/drivers/iio/adc/axp20x_adc.c b/drivers/iio/adc/axp20x_adc.c index d43c8d124a0c..940c92f2792a 100644 --- a/drivers/iio/adc/axp20x_adc.c +++ b/drivers/iio/adc/axp20x_adc.c @@ -155,7 +155,7 @@ enum axp813_adc_channel_v { AXP813_BATT_V, }; -static struct iio_map axp20x_maps[] = { +static const struct iio_map axp20x_maps[] = { { .consumer_dev_name = "axp20x-usb-power-supply", .consumer_channel = "vbus_v", @@ -187,7 +187,7 @@ static struct iio_map axp20x_maps[] = { }, { /* sentinel */ } }; -static struct iio_map axp22x_maps[] = { +static const struct iio_map axp22x_maps[] = { { .consumer_dev_name = "axp20x-battery-power-supply", .consumer_channel = "batt_v", @@ -1044,7 +1044,7 @@ struct axp_data { unsigned long adc_en2_mask; int (*adc_rate)(struct axp20x_adc_iio *info, int rate); - struct iio_map *maps; + const struct iio_map *maps; }; static const struct axp_data axp192_data = { diff --git a/drivers/iio/adc/axp288_adc.c b/drivers/iio/adc/axp288_adc.c index 8c3acc0cd7e9..45542efc3ece 100644 --- a/drivers/iio/adc/axp288_adc.c +++ b/drivers/iio/adc/axp288_adc.c @@ -103,7 +103,7 @@ static const struct iio_chan_spec axp288_adc_channels[] = { }; /* for consumer drivers */ -static struct iio_map axp288_adc_default_maps[] = { +static const struct iio_map axp288_adc_default_maps[] = { IIO_MAP("TS_PIN", "axp288-batt", "axp288-batt-temp"), IIO_MAP("PMIC_TEMP", "axp288-pmic", "axp288-pmic-temp"), IIO_MAP("GPADC", "axp288-gpadc", "axp288-system-temp"), diff --git a/drivers/iio/adc/da9150-gpadc.c b/drivers/iio/adc/da9150-gpadc.c index 8f0d3fb63b67..82628746ba8e 100644 --- a/drivers/iio/adc/da9150-gpadc.c +++ b/drivers/iio/adc/da9150-gpadc.c @@ -291,7 +291,7 @@ static const struct iio_chan_spec da9150_gpadc_channels[] = { }; /* Default maps used by da9150-charger */ -static struct iio_map da9150_gpadc_default_maps[] = { +static const struct iio_map da9150_gpadc_default_maps[] = { { .consumer_dev_name = "da9150-charger", .consumer_channel = "CHAN_IBUS", diff --git a/drivers/iio/adc/intel_mrfld_adc.c b/drivers/iio/adc/intel_mrfld_adc.c index 0590a126f321..30c8c09e3716 100644 --- a/drivers/iio/adc/intel_mrfld_adc.c +++ b/drivers/iio/adc/intel_mrfld_adc.c @@ -164,7 +164,7 @@ static const struct iio_chan_spec mrfld_adc_channels[] = { BCOVE_ADC_CHANNEL(IIO_TEMP, 8, "CH8", 0xC6), }; -static struct iio_map iio_maps[] = { +static const struct iio_map iio_maps[] = { IIO_MAP("CH0", "bcove-battery", "VBATRSLT"), IIO_MAP("CH1", "bcove-battery", "BATTID"), IIO_MAP("CH2", "bcove-battery", "IBATRSLT"), diff --git a/drivers/iio/adc/lp8788_adc.c b/drivers/iio/adc/lp8788_adc.c index 6d9b354bc705..0d49be0061a2 100644 --- a/drivers/iio/adc/lp8788_adc.c +++ b/drivers/iio/adc/lp8788_adc.c @@ -26,7 +26,7 @@ struct lp8788_adc { struct lp8788 *lp; - struct iio_map *map; + const struct iio_map *map; struct mutex lock; }; @@ -149,7 +149,7 @@ static const struct iio_chan_spec lp8788_adc_channels[] = { }; /* default maps used by iio consumer (lp8788-charger driver) */ -static struct iio_map lp8788_default_iio_maps[] = { +static const struct iio_map lp8788_default_iio_maps[] = { { .consumer_dev_name = "lp8788-charger", .consumer_channel = "lp8788_vbatt_5p0", @@ -168,7 +168,7 @@ static int lp8788_iio_map_register(struct device *dev, struct lp8788_platform_data *pdata, struct lp8788_adc *adc) { - struct iio_map *map; + const struct iio_map *map; int ret; map = (!pdata || !pdata->adc_pdata) ? diff --git a/drivers/iio/adc/mp2629_adc.c b/drivers/iio/adc/mp2629_adc.c index 5fbf9b6abd9c..921d3e193752 100644 --- a/drivers/iio/adc/mp2629_adc.c +++ b/drivers/iio/adc/mp2629_adc.c @@ -52,7 +52,7 @@ static struct iio_chan_spec mp2629_channels[] = { MP2629_ADC_CHAN(INPUT_CURRENT, IIO_CURRENT) }; -static struct iio_map mp2629_adc_maps[] = { +static const struct iio_map mp2629_adc_maps[] = { MP2629_MAP(BATT_VOLT, "batt-volt"), MP2629_MAP(SYSTEM_VOLT, "system-volt"), MP2629_MAP(INPUT_VOLT, "input-volt"), diff --git a/drivers/iio/adc/rn5t618-adc.c b/drivers/iio/adc/rn5t618-adc.c index ce5f3011fe00..b33536157adc 100644 --- a/drivers/iio/adc/rn5t618-adc.c +++ b/drivers/iio/adc/rn5t618-adc.c @@ -185,7 +185,7 @@ static const struct iio_chan_spec rn5t618_adc_iio_channels[] = { RN5T618_ADC_CHANNEL(AIN0, IIO_VOLTAGE, "AIN0") }; -static struct iio_map rn5t618_maps[] = { +static const struct iio_map rn5t618_maps[] = { IIO_MAP("VADP", "rn5t618-power", "vadp"), IIO_MAP("VUSB", "rn5t618-power", "vusb"), { /* sentinel */ } diff --git a/drivers/iio/adc/sun4i-gpadc-iio.c b/drivers/iio/adc/sun4i-gpadc-iio.c index 100ecced5fc1..5d459f050634 100644 --- a/drivers/iio/adc/sun4i-gpadc-iio.c +++ b/drivers/iio/adc/sun4i-gpadc-iio.c @@ -114,7 +114,7 @@ struct sun4i_gpadc_iio { .datasheet_name = _name, \ } -static struct iio_map sun4i_gpadc_hwmon_maps[] = { +static const struct iio_map sun4i_gpadc_hwmon_maps[] = { { .adc_channel_label = "temp_adc", .consumer_dev_name = "iio_hwmon.0", diff --git a/drivers/iio/inkern.c b/drivers/iio/inkern.c index 151099be2863..7f325b3ed08f 100644 --- a/drivers/iio/inkern.c +++ b/drivers/iio/inkern.c @@ -20,7 +20,7 @@ struct iio_map_internal { struct iio_dev *indio_dev; - struct iio_map *map; + const struct iio_map *map; struct list_head l; }; @@ -42,7 +42,7 @@ static int iio_map_array_unregister_locked(struct iio_dev *indio_dev) return ret; } -int iio_map_array_register(struct iio_dev *indio_dev, struct iio_map *maps) +int iio_map_array_register(struct iio_dev *indio_dev, const struct iio_map *maps) { struct iio_map_internal *mapi; int i = 0; @@ -86,7 +86,8 @@ static void iio_map_array_unregister_cb(void *indio_dev) iio_map_array_unregister(indio_dev); } -int devm_iio_map_array_register(struct device *dev, struct iio_dev *indio_dev, struct iio_map *maps) +int devm_iio_map_array_register(struct device *dev, struct iio_dev *indio_dev, + const struct iio_map *maps) { int ret; diff --git a/include/linux/iio/driver.h b/include/linux/iio/driver.h index 7a157ed218f6..7f8b55551ed0 100644 --- a/include/linux/iio/driver.h +++ b/include/linux/iio/driver.h @@ -18,7 +18,7 @@ struct iio_map; * @map: array of mappings specifying association of channel with client */ int iio_map_array_register(struct iio_dev *indio_dev, - struct iio_map *map); + const struct iio_map *map); /** * iio_map_array_unregister() - tell the core to remove consumer mappings for @@ -38,6 +38,7 @@ int iio_map_array_unregister(struct iio_dev *indio_dev); * handle de-registration of the IIO map object when the device's refcount goes to * zero. */ -int devm_iio_map_array_register(struct device *dev, struct iio_dev *indio_dev, struct iio_map *maps); +int devm_iio_map_array_register(struct device *dev, struct iio_dev *indio_dev, + const struct iio_map *maps); #endif -- cgit v1.2.3 From 0f702757c68b9790a844e5c07073d3d1c777b13a Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Mon, 26 Aug 2024 11:34:11 +0800 Subject: ARM: samsung: Remove obsoleted declaration for s3c_hwmon_set_platdata The s3c_hwmon_set_platdata() have been removed since commit 0d297df03890 ("ARM: s3c: simplify platform code"), and now it is useless, so remove it. Signed-off-by: Gaosheng Cui Link: https://lore.kernel.org/r/20240826033411.4022822-1-cuigaosheng1@huawei.com Signed-off-by: Krzysztof Kozlowski --- include/linux/platform_data/hwmon-s3c.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/platform_data/hwmon-s3c.h b/include/linux/platform_data/hwmon-s3c.h index 1707ad4147df..7d21e0c41037 100644 --- a/include/linux/platform_data/hwmon-s3c.h +++ b/include/linux/platform_data/hwmon-s3c.h @@ -33,14 +33,4 @@ struct s3c_hwmon_pdata { struct s3c_hwmon_chcfg *in[8]; }; -/** - * s3c_hwmon_set_platdata - Set platform data for S3C HWMON device - * @pd: Platform data to register to device. - * - * Register the given platform data for use with the S3C HWMON device. - * The call will copy the platform data, so the board definitions can - * make the structure itself __initdata. - */ -extern void __init s3c_hwmon_set_platdata(struct s3c_hwmon_pdata *pd); - #endif /* __HWMON_S3C_H__ */ -- cgit v1.2.3 From 3a6ad95d97eb62a7b7c804ef7eeb329a1f697d00 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Mon, 26 Aug 2024 11:31:18 +0800 Subject: ASoC: samsung: Remove obsoleted declaration for s3c64xx_ac97_setup_gpio The s3c64xx_ac97_setup_gpio() have been removed since commit 0d297df03890 ("ARM: s3c: simplify platform code"), and now it is useless, so remove it. Signed-off-by: Gaosheng Cui Link: https://lore.kernel.org/r/20240826033118.4021727-1-cuigaosheng1@huawei.com Signed-off-by: Krzysztof Kozlowski --- include/linux/platform_data/asoc-s3c.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/platform_data/asoc-s3c.h b/include/linux/platform_data/asoc-s3c.h index f9c00f839e9f..085dd8e8af76 100644 --- a/include/linux/platform_data/asoc-s3c.h +++ b/include/linux/platform_data/asoc-s3c.h @@ -13,8 +13,6 @@ #include -extern void s3c64xx_ac97_setup_gpio(int); - struct samsung_i2s_type { /* If the Primary DAI has 5.1 Channels */ #define QUIRK_PRI_6CHAN (1 << 0) -- cgit v1.2.3 From 2d3e0135cefccbcd8459112a8afe260e7b51ff6d Mon Sep 17 00:00:00 2001 From: Inbaraj E Date: Tue, 17 Sep 2024 15:13:55 +0530 Subject: dt-bindings: clock: samsung: remove define with number of clocks for FSD Number of clocks supported by Linux drivers might vary - sometimes we add new clocks, not exposed previously. Therefore these numbers of clocks should not be in the bindings, as that prevents changing them. Remove it entirely from the bindings, once Linux drivers stopped using them. Signed-off-by: Inbaraj E Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20240917094355.37887-3-inbaraj.e@samsung.com Signed-off-by: Krzysztof Kozlowski --- include/dt-bindings/clock/fsd-clk.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/clock/fsd-clk.h b/include/dt-bindings/clock/fsd-clk.h index c8a2af1dd1ad..3f7b64d93558 100644 --- a/include/dt-bindings/clock/fsd-clk.h +++ b/include/dt-bindings/clock/fsd-clk.h @@ -28,7 +28,6 @@ #define DOUT_CMU_IMEM_ACLK 13 #define DOUT_CMU_IMEM_DMACLK 14 #define GAT_CMU_FSYS0_SHARED0DIV4 15 -#define CMU_NR_CLK 16 /* PERIC */ #define PERIC_SCLK_UART0 1 @@ -76,7 +75,6 @@ #define PERIC_EQOS_PHYRXCLK_MUX 43 #define PERIC_EQOS_PHYRXCLK 44 #define PERIC_DOUT_RGMII_CLK 45 -#define PERIC_NR_CLK 46 /* FSYS0 */ #define UFS0_MPHY_REFCLK_IXTAL24 1 @@ -101,7 +99,6 @@ #define FSYS0_EQOS_TOP0_IPCLKPORT_RGMII_CLK_I 20 #define FSYS0_EQOS_TOP0_IPCLKPORT_CLK_RX_I 21 #define FSYS0_DOUT_FSYS0_PERIBUS_GRP 22 -#define FSYS0_NR_CLK 23 /* FSYS1 */ #define PCIE_LINK0_IPCLKPORT_DBI_ACLK 1 @@ -112,7 +109,6 @@ #define PCIE_LINK1_IPCLKPORT_AUX_ACLK 6 #define PCIE_LINK1_IPCLKPORT_MSTR_ACLK 7 #define PCIE_LINK1_IPCLKPORT_SLV_ACLK 8 -#define FSYS1_NR_CLK 9 /* IMEM */ #define IMEM_DMA0_IPCLKPORT_ACLK 1 @@ -126,11 +122,9 @@ #define IMEM_TMU_TOP_IPCLKPORT_I_CLK_TS 9 #define IMEM_TMU_GPU_IPCLKPORT_I_CLK_TS 10 #define IMEM_TMU_GT_IPCLKPORT_I_CLK_TS 11 -#define IMEM_NR_CLK 12 /* MFC */ #define MFC_MFC_IPCLKPORT_ACLK 1 -#define MFC_NR_CLK 2 /* CAM_CSI */ #define CAM_CSI0_0_IPCLKPORT_I_ACLK 1 @@ -145,6 +139,5 @@ #define CAM_CSI2_1_IPCLKPORT_I_ACLK 10 #define CAM_CSI2_2_IPCLKPORT_I_ACLK 11 #define CAM_CSI2_3_IPCLKPORT_I_ACLK 12 -#define CAM_CSI_NR_CLK 13 #endif /*_DT_BINDINGS_CLOCK_FSD_H */ -- cgit v1.2.3 From a370b72ec7165ebe1230d0225cbe66f6526e68ef Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Sun, 18 Aug 2024 21:48:13 +0900 Subject: tracing: Add a comment about ftrace_regs definition To clarify what will be expected on ftrace_regs, add a comment to the architecture independent definition of the ftrace_regs. Signed-off-by: Masami Hiramatsu (Google) Acked-by: Mark Rutland Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index fd5e84d0ec47..42106b3de396 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -117,6 +117,32 @@ extern int ftrace_enabled; #ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS +/** + * ftrace_regs - ftrace partial/optimal register set + * + * ftrace_regs represents a group of registers which is used at the + * function entry and exit. There are three types of registers. + * + * - Registers for passing the parameters to callee, including the stack + * pointer. (e.g. rcx, rdx, rdi, rsi, r8, r9 and rsp on x86_64) + * - Registers for passing the return values to caller. + * (e.g. rax and rdx on x86_64) + * - Registers for hooking the function call and return including the + * frame pointer (the frame pointer is architecture/config dependent) + * (e.g. rip, rbp and rsp for x86_64) + * + * Also, architecture dependent fields can be used for internal process. + * (e.g. orig_ax on x86_64) + * + * On the function entry, those registers will be restored except for + * the stack pointer, so that user can change the function parameters + * and instruction pointer (e.g. live patching.) + * On the function exit, only registers which is used for return values + * are restored. + * + * NOTE: user *must not* access regs directly, only do it via APIs, because + * the member can be changed according to the architecture. + */ struct ftrace_regs { struct pt_regs regs; }; -- cgit v1.2.3 From a312a0f7834e605e7c41570f0e9525d0fc4a70a4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 14 Sep 2024 17:48:06 -0400 Subject: fgraph: Use fgraph data to store subtime for profiler Instead of having the "subtime" for the function profiler in the infrastructure ftrace_ret_stack structure, have it use the fgraph data reserve and retrieve functions. This will keep the limited shadow stack from wasting 8 bytes for something that is seldom used. Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Jiri Olsa Link: https://lore.kernel.org/20240914214826.780323141@goodmis.org Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 4 +--- kernel/trace/fgraph.c | 64 ++++++++++++++++++++++++++++++++++++++------------ kernel/trace/ftrace.c | 23 +++++++++--------- 3 files changed, 62 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 42106b3de396..aabd348cad4a 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1081,6 +1081,7 @@ struct fgraph_ops { void *fgraph_reserve_data(int idx, int size_bytes); void *fgraph_retrieve_data(int idx, int *size_bytes); +void *fgraph_retrieve_parent_data(int idx, int *size_bytes, int depth); /* * Stack of return addresses for functions @@ -1091,9 +1092,6 @@ struct ftrace_ret_stack { unsigned long ret; unsigned long func; unsigned long long calltime; -#ifdef CONFIG_FUNCTION_PROFILER - unsigned long long subtime; -#endif #ifdef HAVE_FUNCTION_GRAPH_FP_TEST unsigned long fp; #endif diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index d7d4fb403f6f..095ceb752b28 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -390,21 +390,7 @@ void *fgraph_reserve_data(int idx, int size_bytes) */ void *fgraph_retrieve_data(int idx, int *size_bytes) { - int offset = current->curr_ret_stack - 1; - unsigned long val; - - val = get_fgraph_entry(current, offset); - while (__get_type(val) == FGRAPH_TYPE_DATA) { - if (__get_data_index(val) == idx) - goto found; - offset -= __get_data_size(val) + 1; - val = get_fgraph_entry(current, offset); - } - return NULL; -found: - if (size_bytes) - *size_bytes = __get_data_size(val) * sizeof(long); - return get_data_type_data(current, offset); + return fgraph_retrieve_parent_data(idx, size_bytes, 0); } /** @@ -460,6 +446,54 @@ get_ret_stack(struct task_struct *t, int offset, int *frame_offset) return RET_STACK(t, offset); } +/** + * fgraph_retrieve_parent_data - get data from a parent function + * @idx: The index into the fgraph_array (fgraph_ops::idx) + * @size_bytes: A pointer to retrieved data size + * @depth: The depth to find the parent (0 is the current function) + * + * This is similar to fgraph_retrieve_data() but can be used to retrieve + * data from a parent caller function. + * + * Return: a pointer to the specified parent data or NULL if not found + */ +void *fgraph_retrieve_parent_data(int idx, int *size_bytes, int depth) +{ + struct ftrace_ret_stack *ret_stack = NULL; + int offset = current->curr_ret_stack; + unsigned long val; + + if (offset <= 0) + return NULL; + + for (;;) { + int next_offset; + + ret_stack = get_ret_stack(current, offset, &next_offset); + if (!ret_stack || --depth < 0) + break; + offset = next_offset; + } + + if (!ret_stack) + return NULL; + + offset--; + + val = get_fgraph_entry(current, offset); + while (__get_type(val) == FGRAPH_TYPE_DATA) { + if (__get_data_index(val) == idx) + goto found; + offset -= __get_data_size(val) + 1; + val = get_fgraph_entry(current, offset); + } + return NULL; +found: + if (size_bytes) + *size_bytes = __get_data_size(val) * sizeof(long); + return get_data_type_data(current, offset); +} + /* Both enabled by default (can be cleared by function_graph tracer flags */ static bool fgraph_sleep_time = true; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index d2dd71d04b8a..bac1f2ee1983 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -823,7 +823,7 @@ void ftrace_graph_graph_time_control(bool enable) static int profile_graph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops) { - struct ftrace_ret_stack *ret_stack; + unsigned long long *subtime; function_profile_call(trace->func, 0, NULL, NULL); @@ -831,9 +831,9 @@ static int profile_graph_entry(struct ftrace_graph_ent *trace, if (!current->ret_stack) return 0; - ret_stack = ftrace_graph_get_ret_stack(current, 0); - if (ret_stack) - ret_stack->subtime = 0; + subtime = fgraph_reserve_data(gops->idx, sizeof(*subtime)); + if (subtime) + *subtime = 0; return 1; } @@ -841,11 +841,12 @@ static int profile_graph_entry(struct ftrace_graph_ent *trace, static void profile_graph_return(struct ftrace_graph_ret *trace, struct fgraph_ops *gops) { - struct ftrace_ret_stack *ret_stack; struct ftrace_profile_stat *stat; unsigned long long calltime; + unsigned long long *subtime; struct ftrace_profile *rec; unsigned long flags; + int size; local_irq_save(flags); stat = this_cpu_ptr(&ftrace_profile_stats); @@ -861,13 +862,13 @@ static void profile_graph_return(struct ftrace_graph_ret *trace, if (!fgraph_graph_time) { /* Append this call time to the parent time to subtract */ - ret_stack = ftrace_graph_get_ret_stack(current, 1); - if (ret_stack) - ret_stack->subtime += calltime; + subtime = fgraph_retrieve_parent_data(gops->idx, &size, 1); + if (subtime) + *subtime += calltime; - ret_stack = ftrace_graph_get_ret_stack(current, 0); - if (ret_stack && ret_stack->subtime < calltime) - calltime -= ret_stack->subtime; + subtime = fgraph_retrieve_data(gops->idx, &size); + if (subtime && *subtime && *subtime < calltime) + calltime -= *subtime; else calltime = 0; } -- cgit v1.2.3 From 3c9880f3ab52b52b5b4e1850a70e80dd7329cb4c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 14 Sep 2024 17:48:07 -0400 Subject: ftrace: Use a running sleeptime instead of saving on shadow stack The fgraph "sleep-time" option tells the function graph tracer and the profiler whether to include the time a function "sleeps" (is scheduled off the CPU) in its duration for the function. By default it is true, which means the duration of a function is calculated by the timestamp of when the function was entered to the timestamp of when it exits. If the "sleep-time" option is disabled, it needs to remove the time that the task was not running on the CPU during the function. Currently it is done in a sched_switch tracepoint probe where it moves the "calltime" (time of entry of the function) forward by the sleep time calculated. It updates all the calltime in the shadow stack. This is time consuming for those users of the function graph tracer that does not care about the sleep time. Instead, add a "ftrace_sleeptime" to the task_struct that gets the sleep time added each time the task wakes up. Then have the function entry save the current "ftrace_sleeptime" and on function exit, move the calltime forward by the difference of the current "ftrace_sleeptime" from the saved sleeptime. This removes one dependency of "calltime" needed to be on the shadow stack. It also simplifies the code that removes the sleep time of functions. TODO: Only enable the sched_switch tracepoint when this is needed. Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Jiri Olsa Link: https://lore.kernel.org/20240914214826.938908568@goodmis.org Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/sched.h | 1 + kernel/trace/fgraph.c | 16 ++------------- kernel/trace/ftrace.c | 39 ++++++++++++++++++++++++++---------- kernel/trace/trace.h | 1 + kernel/trace/trace_functions_graph.c | 28 ++++++++++++++++++++++++++ 5 files changed, 60 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index e6ee4258169a..c08f3bdb11a5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1441,6 +1441,7 @@ struct task_struct { /* Timestamp for last schedule: */ unsigned long long ftrace_timestamp; + unsigned long long ftrace_sleeptime; /* * Number of functions that haven't been traced diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index 095ceb752b28..b2e95bf82211 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -495,7 +495,7 @@ found: } /* Both enabled by default (can be cleared by function_graph tracer flags */ -static bool fgraph_sleep_time = true; +bool fgraph_sleep_time = true; #ifdef CONFIG_DYNAMIC_FTRACE /* @@ -1046,9 +1046,7 @@ ftrace_graph_probe_sched_switch(void *ignore, bool preempt, struct task_struct *next, unsigned int prev_state) { - struct ftrace_ret_stack *ret_stack; unsigned long long timestamp; - int offset; /* * Does the user want to count the time a function was asleep. @@ -1065,17 +1063,7 @@ ftrace_graph_probe_sched_switch(void *ignore, bool preempt, if (!next->ftrace_timestamp) return; - /* - * Update all the counters in next to make up for the - * time next was sleeping. - */ - timestamp -= next->ftrace_timestamp; - - for (offset = next->curr_ret_stack; offset > 0; ) { - ret_stack = get_ret_stack(next, offset, &offset); - if (ret_stack) - ret_stack->calltime += timestamp; - } + next->ftrace_sleeptime += timestamp - next->ftrace_timestamp; } static DEFINE_PER_CPU(unsigned long *, idle_ret_stack); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index bac1f2ee1983..90b3975d5315 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -820,10 +820,15 @@ void ftrace_graph_graph_time_control(bool enable) fgraph_graph_time = enable; } +struct profile_fgraph_data { + unsigned long long subtime; + unsigned long long sleeptime; +}; + static int profile_graph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops) { - unsigned long long *subtime; + struct profile_fgraph_data *profile_data; function_profile_call(trace->func, 0, NULL, NULL); @@ -831,9 +836,12 @@ static int profile_graph_entry(struct ftrace_graph_ent *trace, if (!current->ret_stack) return 0; - subtime = fgraph_reserve_data(gops->idx, sizeof(*subtime)); - if (subtime) - *subtime = 0; + profile_data = fgraph_reserve_data(gops->idx, sizeof(*profile_data)); + if (!profile_data) + return 0; + + profile_data->subtime = 0; + profile_data->sleeptime = current->ftrace_sleeptime; return 1; } @@ -841,9 +849,10 @@ static int profile_graph_entry(struct ftrace_graph_ent *trace, static void profile_graph_return(struct ftrace_graph_ret *trace, struct fgraph_ops *gops) { + struct profile_fgraph_data *profile_data; + struct profile_fgraph_data *parent_data; struct ftrace_profile_stat *stat; unsigned long long calltime; - unsigned long long *subtime; struct ftrace_profile *rec; unsigned long flags; int size; @@ -859,16 +868,24 @@ static void profile_graph_return(struct ftrace_graph_ret *trace, calltime = trace->rettime - trace->calltime; + if (!fgraph_sleep_time) { + profile_data = fgraph_retrieve_data(gops->idx, &size); + if (profile_data && current->ftrace_sleeptime) + calltime -= current->ftrace_sleeptime - profile_data->sleeptime; + } + if (!fgraph_graph_time) { /* Append this call time to the parent time to subtract */ - subtime = fgraph_retrieve_parent_data(gops->idx, &size, 1); - if (subtime) - *subtime += calltime; + parent_data = fgraph_retrieve_parent_data(gops->idx, &size, 1); + if (parent_data) + parent_data->subtime += calltime; + + if (!profile_data) + profile_data = fgraph_retrieve_data(gops->idx, &size); - subtime = fgraph_retrieve_data(gops->idx, &size); - if (subtime && *subtime && *subtime < calltime) - calltime -= *subtime; + if (profile_data && profile_data->subtime && profile_data->subtime < calltime) + calltime -= profile_data->subtime; else calltime = 0; } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index c866991b9c78..2f8017f8d34d 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1048,6 +1048,7 @@ static inline void ftrace_graph_addr_finish(struct fgraph_ops *gops, struct ftra #endif /* CONFIG_DYNAMIC_FTRACE */ extern unsigned int fgraph_max_depth; +extern bool fgraph_sleep_time; static inline bool ftrace_graph_ignore_func(struct fgraph_ops *gops, struct ftrace_graph_ent *trace) diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index a569daaac4c4..bbd898f5a73c 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -133,6 +133,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace, unsigned long *task_var = fgraph_get_task_var(gops); struct trace_array *tr = gops->private; struct trace_array_cpu *data; + unsigned long *sleeptime; unsigned long flags; unsigned int trace_ctx; long disabled; @@ -167,6 +168,13 @@ int trace_graph_entry(struct ftrace_graph_ent *trace, if (ftrace_graph_ignore_irqs()) return 0; + /* save the current sleep time if we are to ignore it */ + if (!fgraph_sleep_time) { + sleeptime = fgraph_reserve_data(gops->idx, sizeof(*sleeptime)); + if (sleeptime) + *sleeptime = current->ftrace_sleeptime; + } + /* * Stop here if tracing_threshold is set. We only write function return * events to the ring buffer. @@ -238,6 +246,22 @@ void __trace_graph_return(struct trace_array *tr, trace_buffer_unlock_commit_nostack(buffer, event); } +static void handle_nosleeptime(struct ftrace_graph_ret *trace, + struct fgraph_ops *gops) +{ + unsigned long long *sleeptime; + int size; + + if (fgraph_sleep_time) + return; + + sleeptime = fgraph_retrieve_data(gops->idx, &size); + if (!sleeptime) + return; + + trace->calltime += current->ftrace_sleeptime - *sleeptime; +} + void trace_graph_return(struct ftrace_graph_ret *trace, struct fgraph_ops *gops) { @@ -256,6 +280,8 @@ void trace_graph_return(struct ftrace_graph_ret *trace, return; } + handle_nosleeptime(trace, gops); + local_irq_save(flags); cpu = raw_smp_processor_id(); data = per_cpu_ptr(tr->array_buffer.data, cpu); @@ -278,6 +304,8 @@ static void trace_graph_thresh_return(struct ftrace_graph_ret *trace, return; } + handle_nosleeptime(trace, gops); + if (tracing_thresh && (trace->rettime - trace->calltime < tracing_thresh)) return; -- cgit v1.2.3 From f1f36e22bee967db5e812a65e24389e54c46f3c2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 14 Sep 2024 17:48:08 -0400 Subject: ftrace: Have calltime be saved in the fgraph storage The calltime field in the shadow stack frame is only used by the function graph tracer and profiler. But now that there's other users of the function graph infrastructure, this adds overhead and wastes space on the shadow stack. Move the calltime to the fgraph data storage, where the function graph and profiler entry functions will save it in its own graph storage and retrieve it in its exit functions. Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Jiri Olsa Link: https://lore.kernel.org/20240914214827.096968730@goodmis.org Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 1 - kernel/trace/fgraph.c | 5 --- kernel/trace/ftrace.c | 19 ++++++------ kernel/trace/trace_functions_graph.c | 60 ++++++++++++++++++++++++------------ 4 files changed, 51 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index aabd348cad4a..e684addf6508 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1091,7 +1091,6 @@ void *fgraph_retrieve_parent_data(int idx, int *size_bytes, int depth); struct ftrace_ret_stack { unsigned long ret; unsigned long func; - unsigned long long calltime; #ifdef HAVE_FUNCTION_GRAPH_FP_TEST unsigned long fp; #endif diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index b2e95bf82211..58a28ec35dab 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -558,7 +558,6 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int fgraph_idx) { struct ftrace_ret_stack *ret_stack; - unsigned long long calltime; unsigned long val; int offset; @@ -588,8 +587,6 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, return -EBUSY; } - calltime = trace_clock_local(); - offset = READ_ONCE(current->curr_ret_stack); ret_stack = RET_STACK(current, offset); offset += FGRAPH_FRAME_OFFSET; @@ -623,7 +620,6 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, ret_stack->ret = ret; ret_stack->func = func; - ret_stack->calltime = calltime; #ifdef HAVE_FUNCTION_GRAPH_FP_TEST ret_stack->fp = frame_pointer; #endif @@ -757,7 +753,6 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, *offset += FGRAPH_FRAME_OFFSET; *ret = ret_stack->ret; trace->func = ret_stack->func; - trace->calltime = ret_stack->calltime; trace->overrun = atomic_read(¤t->trace_overrun); trace->depth = current->curr_ret_depth; /* diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 90b3975d5315..cae388122ca8 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -821,6 +821,7 @@ void ftrace_graph_graph_time_control(bool enable) } struct profile_fgraph_data { + unsigned long long calltime; unsigned long long subtime; unsigned long long sleeptime; }; @@ -842,6 +843,7 @@ static int profile_graph_entry(struct ftrace_graph_ent *trace, profile_data->subtime = 0; profile_data->sleeptime = current->ftrace_sleeptime; + profile_data->calltime = trace_clock_local(); return 1; } @@ -850,9 +852,9 @@ static void profile_graph_return(struct ftrace_graph_ret *trace, struct fgraph_ops *gops) { struct profile_fgraph_data *profile_data; - struct profile_fgraph_data *parent_data; struct ftrace_profile_stat *stat; unsigned long long calltime; + unsigned long long rettime = trace_clock_local(); struct ftrace_profile *rec; unsigned long flags; int size; @@ -862,29 +864,28 @@ static void profile_graph_return(struct ftrace_graph_ret *trace, if (!stat->hash || !ftrace_profile_enabled) goto out; + profile_data = fgraph_retrieve_data(gops->idx, &size); + /* If the calltime was zero'd ignore it */ - if (!trace->calltime) + if (!profile_data || !profile_data->calltime) goto out; - calltime = trace->rettime - trace->calltime; + calltime = rettime - profile_data->calltime; if (!fgraph_sleep_time) { - profile_data = fgraph_retrieve_data(gops->idx, &size); - if (profile_data && current->ftrace_sleeptime) + if (current->ftrace_sleeptime) calltime -= current->ftrace_sleeptime - profile_data->sleeptime; } if (!fgraph_graph_time) { + struct profile_fgraph_data *parent_data; /* Append this call time to the parent time to subtract */ parent_data = fgraph_retrieve_parent_data(gops->idx, &size, 1); if (parent_data) parent_data->subtime += calltime; - if (!profile_data) - profile_data = fgraph_retrieve_data(gops->idx, &size); - - if (profile_data && profile_data->subtime && profile_data->subtime < calltime) + if (profile_data->subtime && profile_data->subtime < calltime) calltime -= profile_data->subtime; else calltime = 0; diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index bbd898f5a73c..5c1b150fbba3 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -127,13 +127,18 @@ static inline int ftrace_graph_ignore_irqs(void) return in_hardirq(); } +struct fgraph_times { + unsigned long long calltime; + unsigned long long sleeptime; /* may be optional! */ +}; + int trace_graph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops) { unsigned long *task_var = fgraph_get_task_var(gops); struct trace_array *tr = gops->private; struct trace_array_cpu *data; - unsigned long *sleeptime; + struct fgraph_times *ftimes; unsigned long flags; unsigned int trace_ctx; long disabled; @@ -168,12 +173,18 @@ int trace_graph_entry(struct ftrace_graph_ent *trace, if (ftrace_graph_ignore_irqs()) return 0; - /* save the current sleep time if we are to ignore it */ - if (!fgraph_sleep_time) { - sleeptime = fgraph_reserve_data(gops->idx, sizeof(*sleeptime)); - if (sleeptime) - *sleeptime = current->ftrace_sleeptime; + if (fgraph_sleep_time) { + /* Only need to record the calltime */ + ftimes = fgraph_reserve_data(gops->idx, sizeof(ftimes->calltime)); + } else { + ftimes = fgraph_reserve_data(gops->idx, sizeof(*ftimes)); + if (ftimes) + ftimes->sleeptime = current->ftrace_sleeptime; } + if (!ftimes) + return 0; + + ftimes->calltime = trace_clock_local(); /* * Stop here if tracing_threshold is set. We only write function return @@ -247,19 +258,13 @@ void __trace_graph_return(struct trace_array *tr, } static void handle_nosleeptime(struct ftrace_graph_ret *trace, - struct fgraph_ops *gops) + struct fgraph_times *ftimes, + int size) { - unsigned long long *sleeptime; - int size; - - if (fgraph_sleep_time) - return; - - sleeptime = fgraph_retrieve_data(gops->idx, &size); - if (!sleeptime) + if (fgraph_sleep_time || size < sizeof(*ftimes)) return; - trace->calltime += current->ftrace_sleeptime - *sleeptime; + ftimes->calltime += current->ftrace_sleeptime - ftimes->sleeptime; } void trace_graph_return(struct ftrace_graph_ret *trace, @@ -268,9 +273,11 @@ void trace_graph_return(struct ftrace_graph_ret *trace, unsigned long *task_var = fgraph_get_task_var(gops); struct trace_array *tr = gops->private; struct trace_array_cpu *data; + struct fgraph_times *ftimes; unsigned long flags; unsigned int trace_ctx; long disabled; + int size; int cpu; ftrace_graph_addr_finish(gops, trace); @@ -280,7 +287,13 @@ void trace_graph_return(struct ftrace_graph_ret *trace, return; } - handle_nosleeptime(trace, gops); + ftimes = fgraph_retrieve_data(gops->idx, &size); + if (!ftimes) + return; + + handle_nosleeptime(trace, ftimes, size); + + trace->calltime = ftimes->calltime; local_irq_save(flags); cpu = raw_smp_processor_id(); @@ -297,6 +310,9 @@ void trace_graph_return(struct ftrace_graph_ret *trace, static void trace_graph_thresh_return(struct ftrace_graph_ret *trace, struct fgraph_ops *gops) { + struct fgraph_times *ftimes; + int size; + ftrace_graph_addr_finish(gops, trace); if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT)) { @@ -304,10 +320,16 @@ static void trace_graph_thresh_return(struct ftrace_graph_ret *trace, return; } - handle_nosleeptime(trace, gops); + ftimes = fgraph_retrieve_data(gops->idx, &size); + if (!ftimes) + return; + + handle_nosleeptime(trace, ftimes, size); + + trace->calltime = ftimes->calltime; if (tracing_thresh && - (trace->rettime - trace->calltime < tracing_thresh)) + (trace->rettime - ftimes->calltime < tracing_thresh)) return; else trace_graph_return(trace, gops); -- cgit v1.2.3 From 26228256b796eb0145bdfb2ae34ec8c4c0ef1319 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 6 Sep 2024 09:52:16 +0200 Subject: backlight: lcd: Test against struct fb_info.lcd_dev Add struct fb_info.lcd_dev for fbdev drivers to store a reference to their lcd device. Update the lcd's fb_notifier_callback() to test for this field. The lcd module can now detect if an lcd device belongs to an fbdev device. This works similar to the bl_dev for backlights and will allow for the removal of the check_fb callback from several fbdev driver's lcd devices. Signed-off-by: Thomas Zimmermann Reviewed-by: Daniel Thompson Link: https://lore.kernel.org/r/20240906075439.98476-3-tzimmermann@suse.de Signed-off-by: Lee Jones --- drivers/video/backlight/lcd.c | 3 +++ include/linux/fb.h | 13 +++++++++++++ 2 files changed, 16 insertions(+) (limited to 'include') diff --git a/drivers/video/backlight/lcd.c b/drivers/video/backlight/lcd.c index 2f57d6867d42..c69407aed296 100644 --- a/drivers/video/backlight/lcd.c +++ b/drivers/video/backlight/lcd.c @@ -30,6 +30,7 @@ static int fb_notifier_callback(struct notifier_block *self, struct lcd_device *ld = container_of(self, struct lcd_device, fb_notif); struct fb_event *evdata = data; struct fb_info *info = evdata->info; + struct lcd_device *fb_lcd = fb_lcd_device(info); guard(mutex)(&ld->ops_lock); @@ -37,6 +38,8 @@ static int fb_notifier_callback(struct notifier_block *self, return 0; if (ld->ops->check_fb && !ld->ops->check_fb(ld, info)) return 0; + if (fb_lcd && fb_lcd != ld) + return 0; if (event == FB_EVENT_BLANK) { if (ld->ops->set_power) diff --git a/include/linux/fb.h b/include/linux/fb.h index 267b59ead432..5ba187e08cf7 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -21,6 +21,7 @@ struct fb_info; struct file; struct i2c_adapter; struct inode; +struct lcd_device; struct module; struct notifier_block; struct page; @@ -480,6 +481,13 @@ struct fb_info { struct mutex bl_curve_mutex; u8 bl_curve[FB_BACKLIGHT_LEVELS]; #endif + + /* + * Assigned LCD device; set before framebuffer + * registration, remove after unregister + */ + struct lcd_device *lcd_dev; + #ifdef CONFIG_FB_DEFERRED_IO struct delayed_work deferred_work; unsigned long npagerefs; @@ -754,6 +762,11 @@ static inline struct backlight_device *fb_bl_device(struct fb_info *info) } #endif +static inline struct lcd_device *fb_lcd_device(struct fb_info *info) +{ + return info->lcd_dev; +} + /* fbmon.c */ #define FB_MAXTIMINGS 0 #define FB_VSYNCTIMINGS 1 -- cgit v1.2.3 From 48ffe2074c2864ab64ee2004e7ebf3d6a6730fbf Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 6 Sep 2024 09:52:17 +0200 Subject: backlight: lcd: Add LCD_POWER_ constants for power states Duplicate FB_BLANK_ constants as LCD_POWER_ constants in the lcd header file. Allows lcd drivers to avoid including the fbdev header file and removes a compile-time dependency between the two subsystems. The new LCD_POWER_ constants have the same values as their FB_BLANK_ counterparts. Hence semantics does not change and the lcd drivers can be converted one by one. Each instance of FB_BLANK_UNBLANK becomes LCD_POWER_ON, each of FB_BLANK_POWERDOWN becomes LCD_POWER_OFF, FB_BLANK_NORMAL becomes LCD_POWER_REDUCED and FB_BLANK_VSYNC_SUSPEND becomes LCD_POWER_REDUCED_VSYNC_SUSPEND. Lcd code or drivers do not use FB_BLANK_HSYNC_SUSPEND, so no new constants for this is being added. The tokens LCD_POWER_REDUCED and LCD_POWER_REDUCED_VSYNC_SUSPEND are deprecated and drivers should replace them with LCD_POWER_ON and LCD_POWER_OFF. See also commit a1cacb8a8e70 ("backlight: Add BACKLIGHT_POWER_ constants for power states"), which added similar constants for backlight drivers. v2: - fix typo in commit description Signed-off-by: Thomas Zimmermann Reviewed-by: Daniel Thompson Link: https://lore.kernel.org/r/20240906075439.98476-4-tzimmermann@suse.de Signed-off-by: Lee Jones --- drivers/video/backlight/lcd.c | 22 +++++++++++++++++++++- include/linux/lcd.h | 5 +++++ 2 files changed, 26 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/video/backlight/lcd.c b/drivers/video/backlight/lcd.c index c69407aed296..713f7fb8b10a 100644 --- a/drivers/video/backlight/lcd.c +++ b/drivers/video/backlight/lcd.c @@ -20,6 +20,24 @@ #if defined(CONFIG_FB) || (defined(CONFIG_FB_MODULE) && \ defined(CONFIG_LCD_CLASS_DEVICE_MODULE)) +static int to_lcd_power(int fb_blank) +{ + switch (fb_blank) { + case FB_BLANK_UNBLANK: + return LCD_POWER_ON; + /* deprecated; TODO: should become 'off' */ + case FB_BLANK_NORMAL: + return LCD_POWER_REDUCED; + case FB_BLANK_VSYNC_SUSPEND: + return LCD_POWER_REDUCED_VSYNC_SUSPEND; + /* 'off' */ + case FB_BLANK_HSYNC_SUSPEND: + case FB_BLANK_POWERDOWN: + default: + return LCD_POWER_OFF; + } +} + /* This callback gets called when something important happens inside a * framebuffer driver. We're looking if that important event is blanking, * and if it is, we're switching lcd power as well ... @@ -42,8 +60,10 @@ static int fb_notifier_callback(struct notifier_block *self, return 0; if (event == FB_EVENT_BLANK) { + int power = to_lcd_power(*(int *)evdata->data); + if (ld->ops->set_power) - ld->ops->set_power(ld, *(int *)evdata->data); + ld->ops->set_power(ld, power); } else { if (ld->ops->set_mode) ld->ops->set_mode(ld, evdata->data); diff --git a/include/linux/lcd.h b/include/linux/lcd.h index 68703a51dc53..dfcc54d327f5 100644 --- a/include/linux/lcd.h +++ b/include/linux/lcd.h @@ -14,6 +14,11 @@ #include #include +#define LCD_POWER_ON (0) +#define LCD_POWER_REDUCED (1) // deprecated; don't use in new code +#define LCD_POWER_REDUCED_VSYNC_SUSPEND (2) // deprecated; don't use in new code +#define LCD_POWER_OFF (4) + /* Notes on locking: * * lcd_device->ops_lock is an internal backlight lock protecting the ops -- cgit v1.2.3 From c38a7db56d18b3ec07f3ad52c1e3f1f05c375011 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 6 Sep 2024 09:52:31 +0200 Subject: backlight: platform_lcd: Remove match_fb from struct plat_lcd_data The match_fb callback in struct plat_lcd_data is unused. Remove it. Signed-off-by: Thomas Zimmermann Reviewed-by: Daniel Thompson Link: https://lore.kernel.org/r/20240906075439.98476-18-tzimmermann@suse.de Signed-off-by: Lee Jones --- drivers/video/backlight/platform_lcd.c | 4 ---- include/video/platform_lcd.h | 3 --- 2 files changed, 7 deletions(-) (limited to 'include') diff --git a/drivers/video/backlight/platform_lcd.c b/drivers/video/backlight/platform_lcd.c index 08d0ff400d88..8b89d2f47df7 100644 --- a/drivers/video/backlight/platform_lcd.c +++ b/drivers/video/backlight/platform_lcd.c @@ -53,10 +53,6 @@ static int platform_lcd_set_power(struct lcd_device *lcd, int power) static int platform_lcd_match(struct lcd_device *lcd, struct fb_info *info) { struct platform_lcd *plcd = to_our_lcd(lcd); - struct plat_lcd_data *pdata = plcd->pdata; - - if (pdata->match_fb) - return pdata->match_fb(pdata, info); return plcd->us->parent == info->device; } diff --git a/include/video/platform_lcd.h b/include/video/platform_lcd.h index 6a95184a28c1..2bdf46519298 100644 --- a/include/video/platform_lcd.h +++ b/include/video/platform_lcd.h @@ -8,11 +8,8 @@ */ struct plat_lcd_data; -struct fb_info; struct plat_lcd_data { int (*probe)(struct plat_lcd_data *); void (*set_power)(struct plat_lcd_data *, unsigned int power); - int (*match_fb)(struct plat_lcd_data *, struct fb_info *); }; - -- cgit v1.2.3 From 43e1120deb3768c86aa3875c7073658e44a30ea5 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 6 Sep 2024 09:52:40 +0200 Subject: backlight: lcd: Replace check_fb with controls_device Rename check_fb in struct lcd_ops to controls_device. The callback is now independent from fbdev's struct fb_info and tests if an lcd device controls a hardware display device. The new naming and semantics follow similar functionality for backlight devices. v2: - fix typos in commit description (Daniel) Signed-off-by: Thomas Zimmermann Reviewed-by: Daniel Thompson Link: https://lore.kernel.org/r/20240906075439.98476-27-tzimmermann@suse.de Signed-off-by: Lee Jones --- drivers/video/backlight/lcd.c | 2 +- drivers/video/backlight/platform_lcd.c | 11 +++++------ include/linux/lcd.h | 16 ++++++++++++---- 3 files changed, 18 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/video/backlight/lcd.c b/drivers/video/backlight/lcd.c index 713f7fb8b10a..dd175b446180 100644 --- a/drivers/video/backlight/lcd.c +++ b/drivers/video/backlight/lcd.c @@ -54,7 +54,7 @@ static int fb_notifier_callback(struct notifier_block *self, if (!ld->ops) return 0; - if (ld->ops->check_fb && !ld->ops->check_fb(ld, info)) + if (ld->ops->controls_device && !ld->ops->controls_device(ld, info->device)) return 0; if (fb_lcd && fb_lcd != ld) return 0; diff --git a/drivers/video/backlight/platform_lcd.c b/drivers/video/backlight/platform_lcd.c index 69a22d1a8a35..c9fe50f4d8ed 100644 --- a/drivers/video/backlight/platform_lcd.c +++ b/drivers/video/backlight/platform_lcd.c @@ -9,7 +9,6 @@ #include #include -#include #include #include @@ -50,17 +49,17 @@ static int platform_lcd_set_power(struct lcd_device *lcd, int power) return 0; } -static int platform_lcd_match(struct lcd_device *lcd, struct fb_info *info) +static bool platform_lcd_controls_device(struct lcd_device *lcd, struct device *display_device) { struct platform_lcd *plcd = to_our_lcd(lcd); - return plcd->us->parent == info->device; + return plcd->us->parent == display_device; } static const struct lcd_ops platform_lcd_ops = { - .get_power = platform_lcd_get_power, - .set_power = platform_lcd_set_power, - .check_fb = platform_lcd_match, + .get_power = platform_lcd_get_power, + .set_power = platform_lcd_set_power, + .controls_device = platform_lcd_controls_device, }; static int platform_lcd_probe(struct platform_device *pdev) diff --git a/include/linux/lcd.h b/include/linux/lcd.h index dfcc54d327f5..8399b5ed48f2 100644 --- a/include/linux/lcd.h +++ b/include/linux/lcd.h @@ -35,7 +35,6 @@ */ struct lcd_device; -struct fb_info; struct lcd_properties { /* The maximum value for contrast (read-only) */ @@ -54,9 +53,18 @@ struct lcd_ops { int (*set_contrast)(struct lcd_device *, int contrast); /* Set LCD panel mode (resolutions ...) */ int (*set_mode)(struct lcd_device *, struct fb_videomode *); - /* Check if given framebuffer device is the one LCD is bound to; - return 0 if not, !=0 if it is. If NULL, lcd always matches the fb. */ - int (*check_fb)(struct lcd_device *, struct fb_info *); + + /* + * Check if the LCD controls the given display device. This + * operation is optional and if not implemented it is assumed that + * the display is always the one controlled by the LCD. + * + * RETURNS: + * + * If display_dev is NULL or display_dev matches the device controlled by + * the LCD, return true. Otherwise return false. + */ + bool (*controls_device)(struct lcd_device *lcd, struct device *display_device); }; struct lcd_device { -- cgit v1.2.3 From 02e224d096ef58fe59e96609de6018e133f33512 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 6 Sep 2024 09:52:41 +0200 Subject: backlight: lcd: Remove struct fb_videomode from set_mode callback Implementations of struct lcd_ops.set_mode only require the resolution from struct fb_videomode. Pass the xres and yres fields, but remove the dependency on the fbdev data structure. Signed-off-by: Thomas Zimmermann Reviewed-by: Daniel Thompson Link: https://lore.kernel.org/r/20240906075439.98476-28-tzimmermann@suse.de Signed-off-by: Lee Jones --- drivers/video/backlight/corgi_lcd.c | 5 ++--- drivers/video/backlight/lcd.c | 4 +++- drivers/video/backlight/tdo24m.c | 5 ++--- include/linux/lcd.h | 7 +++++-- 4 files changed, 12 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/video/backlight/corgi_lcd.c b/drivers/video/backlight/corgi_lcd.c index 35c3fd3281ca..69f49371ea35 100644 --- a/drivers/video/backlight/corgi_lcd.c +++ b/drivers/video/backlight/corgi_lcd.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -332,12 +331,12 @@ static void corgi_lcd_power_off(struct corgi_lcd *lcd) POWER1_VW_OFF | POWER1_GVSS_OFF | POWER1_VDD_OFF); } -static int corgi_lcd_set_mode(struct lcd_device *ld, struct fb_videomode *m) +static int corgi_lcd_set_mode(struct lcd_device *ld, u32 xres, u32 yres) { struct corgi_lcd *lcd = lcd_get_data(ld); int mode = CORGI_LCD_MODE_QVGA; - if (m->xres == 640 || m->xres == 480) + if (xres == 640 || xres == 480) mode = CORGI_LCD_MODE_VGA; if (lcd->mode == mode) diff --git a/drivers/video/backlight/lcd.c b/drivers/video/backlight/lcd.c index dd175b446180..3267acf8dc5b 100644 --- a/drivers/video/backlight/lcd.c +++ b/drivers/video/backlight/lcd.c @@ -65,8 +65,10 @@ static int fb_notifier_callback(struct notifier_block *self, if (ld->ops->set_power) ld->ops->set_power(ld, power); } else { + const struct fb_videomode *videomode = evdata->data; + if (ld->ops->set_mode) - ld->ops->set_mode(ld, evdata->data); + ld->ops->set_mode(ld, videomode->xres, videomode->yres); } return 0; diff --git a/drivers/video/backlight/tdo24m.c b/drivers/video/backlight/tdo24m.c index a14a94114e9d..c04ee3d04d87 100644 --- a/drivers/video/backlight/tdo24m.c +++ b/drivers/video/backlight/tdo24m.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include @@ -308,12 +307,12 @@ static int tdo24m_get_power(struct lcd_device *ld) return lcd->power; } -static int tdo24m_set_mode(struct lcd_device *ld, struct fb_videomode *m) +static int tdo24m_set_mode(struct lcd_device *ld, u32 xres, u32 yres) { struct tdo24m *lcd = lcd_get_data(ld); int mode = MODE_QVGA; - if (m->xres == 640 || m->xres == 480) + if (xres == 640 || xres == 480) mode = MODE_VGA; if (lcd->mode == mode) diff --git a/include/linux/lcd.h b/include/linux/lcd.h index 8399b5ed48f2..59a80b396a71 100644 --- a/include/linux/lcd.h +++ b/include/linux/lcd.h @@ -51,8 +51,11 @@ struct lcd_ops { int (*get_contrast)(struct lcd_device *); /* Set LCD panel contrast */ int (*set_contrast)(struct lcd_device *, int contrast); - /* Set LCD panel mode (resolutions ...) */ - int (*set_mode)(struct lcd_device *, struct fb_videomode *); + + /* + * Set LCD panel mode (resolutions ...) + */ + int (*set_mode)(struct lcd_device *lcd, u32 xres, u32 yres); /* * Check if the LCD controls the given display device. This -- cgit v1.2.3 From 0d580d99749e759b62dc8e28f511310e9235da7a Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 6 Sep 2024 09:52:42 +0200 Subject: backlight: lcd: Do not include in lcd header With the exception of fb_notifier_callback(), none of the lcd code uses fbdev; especially not the lcd drivers. Remove the include statement for from the public lcd header. v2: - fix typos in commit description Signed-off-by: Thomas Zimmermann Reviewed-by: Daniel Thompson Link: https://lore.kernel.org/r/20240906075439.98476-29-tzimmermann@suse.de Signed-off-by: Lee Jones --- include/linux/lcd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/lcd.h b/include/linux/lcd.h index 59a80b396a71..c3ccdff4519a 100644 --- a/include/linux/lcd.h +++ b/include/linux/lcd.h @@ -12,7 +12,6 @@ #include #include #include -#include #define LCD_POWER_ON (0) #define LCD_POWER_REDUCED (1) // deprecated; don't use in new code -- cgit v1.2.3 From fb4c31587adfa9cd50661a535bdbfcc4da57ee38 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Tue, 10 Sep 2024 18:32:51 +0200 Subject: reset: amlogic: add auxiliary reset driver support Add support for the reset controller present in the audio clock controller of the g12 and sm1 SoC families, using the auxiliary bus. This is expected to replace the driver currently present directly within the related clock driver. Signed-off-by: Jerome Brunet Reviewed-by: Philipp Zabel Link: https://lore.kernel.org/r/20240910-meson-rst-aux-v5-9-60be62635d3e@baylibre.com Signed-off-by: Philipp Zabel --- drivers/reset/amlogic/Kconfig | 8 ++ drivers/reset/amlogic/Makefile | 1 + drivers/reset/amlogic/reset-meson-aux.c | 136 +++++++++++++++++++++++++++++ drivers/reset/amlogic/reset-meson-common.c | 25 +++++- drivers/reset/amlogic/reset-meson.c | 3 + drivers/reset/amlogic/reset-meson.h | 4 + include/soc/amlogic/reset-meson-aux.h | 23 +++++ 7 files changed, 198 insertions(+), 2 deletions(-) create mode 100644 drivers/reset/amlogic/reset-meson-aux.c create mode 100644 include/soc/amlogic/reset-meson-aux.h (limited to 'include') diff --git a/drivers/reset/amlogic/Kconfig b/drivers/reset/amlogic/Kconfig index 1d77987088f4..3bee9fd60269 100644 --- a/drivers/reset/amlogic/Kconfig +++ b/drivers/reset/amlogic/Kconfig @@ -11,6 +11,14 @@ config RESET_MESON help This enables the reset driver for Amlogic SoCs. +config RESET_MESON_AUX + tristate "Meson Reset Auxiliary Driver" + depends on ARCH_MESON || COMPILE_TEST + select AUXILIARY_BUS + select RESET_MESON_COMMON + help + This enables the reset auxiliary driver for Amlogic SoCs. + config RESET_MESON_AUDIO_ARB tristate "Meson Audio Memory Arbiter Reset Driver" depends on ARCH_MESON || COMPILE_TEST diff --git a/drivers/reset/amlogic/Makefile b/drivers/reset/amlogic/Makefile index 74aaa2fb5e13..ca99a691282c 100644 --- a/drivers/reset/amlogic/Makefile +++ b/drivers/reset/amlogic/Makefile @@ -1,3 +1,4 @@ obj-$(CONFIG_RESET_MESON) += reset-meson.o +obj-$(CONFIG_RESET_MESON_AUX) += reset-meson-aux.o obj-$(CONFIG_RESET_MESON_COMMON) += reset-meson-common.o obj-$(CONFIG_RESET_MESON_AUDIO_ARB) += reset-meson-audio-arb.o diff --git a/drivers/reset/amlogic/reset-meson-aux.c b/drivers/reset/amlogic/reset-meson-aux.c new file mode 100644 index 000000000000..dd8453001db9 --- /dev/null +++ b/drivers/reset/amlogic/reset-meson-aux.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +/* + * Amlogic Meson Reset Auxiliary driver + * + * Copyright (c) 2024 BayLibre, SAS. + * Author: Jerome Brunet + */ + +#include +#include +#include +#include +#include +#include + +#include "reset-meson.h" +#include + +static DEFINE_IDA(meson_rst_aux_ida); + +struct meson_reset_adev { + struct auxiliary_device adev; + struct regmap *map; +}; + +#define to_meson_reset_adev(_adev) \ + container_of((_adev), struct meson_reset_adev, adev) + +static const struct meson_reset_param meson_g12a_audio_param = { + .reset_ops = &meson_reset_toggle_ops, + .reset_num = 26, + .level_offset = 0x24, +}; + +static const struct meson_reset_param meson_sm1_audio_param = { + .reset_ops = &meson_reset_toggle_ops, + .reset_num = 39, + .level_offset = 0x28, +}; + +static const struct auxiliary_device_id meson_reset_aux_ids[] = { + { + .name = "axg-audio-clkc.rst-g12a", + .driver_data = (kernel_ulong_t)&meson_g12a_audio_param, + }, { + .name = "axg-audio-clkc.rst-sm1", + .driver_data = (kernel_ulong_t)&meson_sm1_audio_param, + }, {} +}; +MODULE_DEVICE_TABLE(auxiliary, meson_reset_aux_ids); + +static int meson_reset_aux_probe(struct auxiliary_device *adev, + const struct auxiliary_device_id *id) +{ + const struct meson_reset_param *param = + (const struct meson_reset_param *)(id->driver_data); + struct meson_reset_adev *raux = + to_meson_reset_adev(adev); + + return meson_reset_controller_register(&adev->dev, raux->map, param); +} + +static struct auxiliary_driver meson_reset_aux_driver = { + .probe = meson_reset_aux_probe, + .id_table = meson_reset_aux_ids, +}; +module_auxiliary_driver(meson_reset_aux_driver); + +static void meson_rst_aux_release(struct device *dev) +{ + struct auxiliary_device *adev = to_auxiliary_dev(dev); + struct meson_reset_adev *raux = + to_meson_reset_adev(adev); + + ida_free(&meson_rst_aux_ida, adev->id); + kfree(raux); +} + +static void meson_rst_aux_unregister_adev(void *_adev) +{ + struct auxiliary_device *adev = _adev; + + auxiliary_device_delete(adev); + auxiliary_device_uninit(adev); +} + +int devm_meson_rst_aux_register(struct device *dev, + struct regmap *map, + const char *adev_name) +{ + struct meson_reset_adev *raux; + struct auxiliary_device *adev; + int ret; + + raux = kzalloc(sizeof(*raux), GFP_KERNEL); + if (!raux) + return -ENOMEM; + + ret = ida_alloc(&meson_rst_aux_ida, GFP_KERNEL); + if (ret < 0) + goto raux_free; + + raux->map = map; + + adev = &raux->adev; + adev->id = ret; + adev->name = adev_name; + adev->dev.parent = dev; + adev->dev.release = meson_rst_aux_release; + device_set_of_node_from_dev(&adev->dev, dev); + + ret = auxiliary_device_init(adev); + if (ret) + goto ida_free; + + ret = __auxiliary_device_add(adev, dev->driver->name); + if (ret) { + auxiliary_device_uninit(adev); + return ret; + } + + return devm_add_action_or_reset(dev, meson_rst_aux_unregister_adev, + adev); + +ida_free: + ida_free(&meson_rst_aux_ida, adev->id); +raux_free: + kfree(raux); + return ret; +} +EXPORT_SYMBOL_GPL(devm_meson_rst_aux_register); + +MODULE_DESCRIPTION("Amlogic Meson Reset Auxiliary driver"); +MODULE_AUTHOR("Jerome Brunet "); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_IMPORT_NS(MESON_RESET); diff --git a/drivers/reset/amlogic/reset-meson-common.c b/drivers/reset/amlogic/reset-meson-common.c index d57544801ae9..a7b1b250a64d 100644 --- a/drivers/reset/amlogic/reset-meson-common.c +++ b/drivers/reset/amlogic/reset-meson-common.c @@ -87,12 +87,33 @@ static int meson_reset_deassert(struct reset_controller_dev *rcdev, return meson_reset_level(rcdev, id, false); } -static const struct reset_control_ops meson_reset_ops = { +static int meson_reset_level_toggle(struct reset_controller_dev *rcdev, + unsigned long id) +{ + int ret; + + ret = meson_reset_assert(rcdev, id); + if (ret) + return ret; + + return meson_reset_deassert(rcdev, id); +} + +const struct reset_control_ops meson_reset_ops = { .reset = meson_reset_reset, .assert = meson_reset_assert, .deassert = meson_reset_deassert, .status = meson_reset_status, }; +EXPORT_SYMBOL_NS_GPL(meson_reset_ops, MESON_RESET); + +const struct reset_control_ops meson_reset_toggle_ops = { + .reset = meson_reset_level_toggle, + .assert = meson_reset_assert, + .deassert = meson_reset_deassert, + .status = meson_reset_status, +}; +EXPORT_SYMBOL_NS_GPL(meson_reset_toggle_ops, MESON_RESET); int meson_reset_controller_register(struct device *dev, struct regmap *map, const struct meson_reset_param *param) @@ -107,7 +128,7 @@ int meson_reset_controller_register(struct device *dev, struct regmap *map, data->map = map; data->rcdev.owner = dev->driver->owner; data->rcdev.nr_resets = param->reset_num; - data->rcdev.ops = &meson_reset_ops; + data->rcdev.ops = data->param->reset_ops; data->rcdev.of_node = dev->of_node; return devm_reset_controller_register(dev, &data->rcdev); diff --git a/drivers/reset/amlogic/reset-meson.c b/drivers/reset/amlogic/reset-meson.c index feb19bf6da77..6ae4ed6b7f8b 100644 --- a/drivers/reset/amlogic/reset-meson.c +++ b/drivers/reset/amlogic/reset-meson.c @@ -18,6 +18,7 @@ #include "reset-meson.h" static const struct meson_reset_param meson8b_param = { + .reset_ops = &meson_reset_ops, .reset_num = 256, .reset_offset = 0x0, .level_offset = 0x7c, @@ -25,6 +26,7 @@ static const struct meson_reset_param meson8b_param = { }; static const struct meson_reset_param meson_a1_param = { + .reset_ops = &meson_reset_ops, .reset_num = 96, .reset_offset = 0x0, .level_offset = 0x40, @@ -32,6 +34,7 @@ static const struct meson_reset_param meson_a1_param = { }; static const struct meson_reset_param meson_s4_param = { + .reset_ops = &meson_reset_ops, .reset_num = 192, .reset_offset = 0x0, .level_offset = 0x40, diff --git a/drivers/reset/amlogic/reset-meson.h b/drivers/reset/amlogic/reset-meson.h index 4e1dbd7569c5..2051e126dc3a 100644 --- a/drivers/reset/amlogic/reset-meson.h +++ b/drivers/reset/amlogic/reset-meson.h @@ -12,6 +12,7 @@ #include struct meson_reset_param { + const struct reset_control_ops *reset_ops; unsigned int reset_num; unsigned int reset_offset; unsigned int level_offset; @@ -21,4 +22,7 @@ struct meson_reset_param { int meson_reset_controller_register(struct device *dev, struct regmap *map, const struct meson_reset_param *param); +extern const struct reset_control_ops meson_reset_ops; +extern const struct reset_control_ops meson_reset_toggle_ops; + #endif /* __MESON_RESET_H */ diff --git a/include/soc/amlogic/reset-meson-aux.h b/include/soc/amlogic/reset-meson-aux.h new file mode 100644 index 000000000000..d8a15d48c984 --- /dev/null +++ b/include/soc/amlogic/reset-meson-aux.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __SOC_RESET_MESON_AUX_H +#define __SOC_RESET_MESON_AUX_H + +#include + +struct device; +struct regmap; + +#if IS_ENABLED(CONFIG_RESET_MESON_AUX) +int devm_meson_rst_aux_register(struct device *dev, + struct regmap *map, + const char *adev_name); +#else +static inline int devm_meson_rst_aux_register(struct device *dev, + struct regmap *map, + const char *adev_name) +{ + return 0; +} +#endif + +#endif /* __SOC_RESET_MESON_AUX_H */ -- cgit v1.2.3 From 44badc908f2c85711cb18e45e13119c10ad3a05f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 24 Sep 2024 09:05:45 +0100 Subject: tcp: Fix spelling mistake "emtpy" -> "empty" There is a spelling mistake in a WARN_ONCE message. Fix it. Signed-off-by: Colin Ian King Reviewed-by: Jason Xing Link: https://patch.msgid.link/20240924080545.1324962-1-colin.i.king@gmail.com Signed-off-by: Paolo Abeni --- include/net/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index d1948d357dad..739a9fb83d0c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2442,7 +2442,7 @@ static inline s64 tcp_rto_delta_us(const struct sock *sk) return rto_time_stamp_us - tcp_sk(sk)->tcp_mstamp; } else { WARN_ONCE(1, - "rtx queue emtpy: " + "rtx queue empty: " "out:%u sacked:%u lost:%u retrans:%u " "tlp_high_seq:%u sk_state:%u ca_state:%u " "advmss:%u mss_cache:%u pmtu:%u\n", -- cgit v1.2.3 From 86b9ce0a8a6cf9397503920cd5412d207a93fae9 Mon Sep 17 00:00:00 2001 From: Sai Krishna Potthuri Date: Fri, 6 Sep 2024 16:31:12 +0530 Subject: firmware: xilinx: Add Pinctrl Get Attribute ID Add Pinctrl Get Attribute ID to the query ids list. Signed-off-by: Sai Krishna Potthuri Link: https://lore.kernel.org/20240906110113.3154327-3-sai.krishna.potthuri@amd.com Signed-off-by: Linus Walleij --- include/linux/firmware/xlnx-zynqmp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index d7d07afc0532..3b4ce4ec5d3f 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -238,6 +238,7 @@ enum pm_query_id { PM_QID_PINCTRL_GET_PIN_GROUPS = 11, PM_QID_CLOCK_GET_NUM_CLOCKS = 12, PM_QID_CLOCK_GET_MAX_DIVISOR = 13, + PM_QID_PINCTRL_GET_ATTRIBUTES = 15, }; enum rpu_oper_mode { -- cgit v1.2.3 From b875bd5b381e114115922944f7a01e31f8b07c2a Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Wed, 11 Sep 2024 15:43:00 -0400 Subject: exportfs: Remove EXPORT_OP_ASYNC_LOCK Now that GFS2 and OCFS2 are signalling async ->lock() support with FOP_ASYNC_LOCK and checks for support are converted, we can remove EXPORT_OP_ASYNC_LOCK. Signed-off-by: Benjamin Coddington Link: https://lore.kernel.org/r/0a114db814fec3086f937ae3d44a086f13b8de26.1726083391.git.bcodding@redhat.com Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- Documentation/filesystems/nfs/exporting.rst | 7 ------- fs/gfs2/export.c | 1 - fs/ocfs2/export.c | 1 - include/linux/exportfs.h | 13 ------------- 4 files changed, 22 deletions(-) (limited to 'include') diff --git a/Documentation/filesystems/nfs/exporting.rst b/Documentation/filesystems/nfs/exporting.rst index f04ce1215a03..de64d2d002a2 100644 --- a/Documentation/filesystems/nfs/exporting.rst +++ b/Documentation/filesystems/nfs/exporting.rst @@ -238,10 +238,3 @@ following flags are defined: all of an inode's dirty data on last close. Exports that behave this way should set EXPORT_OP_FLUSH_ON_CLOSE so that NFSD knows to skip waiting for writeback when closing such files. - - EXPORT_OP_ASYNC_LOCK - Indicates a capable filesystem to do async lock - requests from lockd. Only set EXPORT_OP_ASYNC_LOCK if the filesystem has - it's own ->lock() functionality as core posix_lock_file() implementation - has no async lock request handling yet. For more information about how to - indicate an async lock request from a ->lock() file_operations struct, see - fs/locks.c and comment for the function vfs_lock_file(). diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index d418d8b5367f..3334c394ce9c 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c @@ -190,6 +190,5 @@ const struct export_operations gfs2_export_ops = { .fh_to_parent = gfs2_fh_to_parent, .get_name = gfs2_get_name, .get_parent = gfs2_get_parent, - .flags = EXPORT_OP_ASYNC_LOCK, }; diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index 96b684763b39..b95724b767e1 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c @@ -280,5 +280,4 @@ const struct export_operations ocfs2_export_ops = { .fh_to_dentry = ocfs2_fh_to_dentry, .fh_to_parent = ocfs2_fh_to_parent, .get_parent = ocfs2_get_parent, - .flags = EXPORT_OP_ASYNC_LOCK, }; diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 893a1d21dc1c..1ab165c2939f 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -250,19 +250,6 @@ struct export_operations { unsigned long flags; }; -/** - * exportfs_lock_op_is_async() - export op supports async lock operation - * @export_ops: the nfs export operations to check - * - * Returns true if the nfs export_operations structure has - * EXPORT_OP_ASYNC_LOCK in their flags set - */ -static inline bool -exportfs_lock_op_is_async(const struct export_operations *export_ops) -{ - return export_ops->flags & EXPORT_OP_ASYNC_LOCK; -} - extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid, int *max_len, struct inode *parent, int flags); -- cgit v1.2.3 From dad35f7d2fc14e446669d4cab100597a6798eae5 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Wed, 25 Sep 2024 18:40:09 +0200 Subject: reset: replace boolean parameters with flags parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce enum reset_control_flags and replace the list of boolean parameters to the internal reset_control_get functions with a single flags parameter, before adding more boolean options. The separate boolean parameters have been shown to be error prone in the past. See for example commit a57f68ddc886 ("reset: Fix devm bulk optional exclusive control getter"). Acked-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20240925-reset-get-deasserted-v2-1-b3601bbd0458@pengutronix.de Signed-off-by: Philipp Zabel --- drivers/reset/core.c | 71 ++++++++++++---------- include/linux/reset.h | 161 +++++++++++++++++++++++++++++++------------------- 2 files changed, 139 insertions(+), 93 deletions(-) (limited to 'include') diff --git a/drivers/reset/core.c b/drivers/reset/core.c index 4d509d41456a..682d61812852 100644 --- a/drivers/reset/core.c +++ b/drivers/reset/core.c @@ -773,12 +773,19 @@ EXPORT_SYMBOL_GPL(reset_control_bulk_release); static struct reset_control * __reset_control_get_internal(struct reset_controller_dev *rcdev, - unsigned int index, bool shared, bool acquired) + unsigned int index, enum reset_control_flags flags) { + bool shared = flags & RESET_CONTROL_FLAGS_BIT_SHARED; + bool acquired = flags & RESET_CONTROL_FLAGS_BIT_ACQUIRED; struct reset_control *rstc; lockdep_assert_held(&reset_list_mutex); + /* Expect callers to filter out OPTIONAL and DEASSERTED bits */ + if (WARN_ON(flags & ~(RESET_CONTROL_FLAGS_BIT_SHARED | + RESET_CONTROL_FLAGS_BIT_ACQUIRED))) + return ERR_PTR(-EINVAL); + list_for_each_entry(rstc, &rcdev->reset_control_head, list) { if (rstc->id == index) { /* @@ -994,8 +1001,9 @@ static struct reset_controller_dev *__reset_find_rcdev(const struct of_phandle_a struct reset_control * __of_reset_control_get(struct device_node *node, const char *id, int index, - bool shared, bool optional, bool acquired) + enum reset_control_flags flags) { + bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL; bool gpio_fallback = false; struct reset_control *rstc; struct reset_controller_dev *rcdev; @@ -1059,8 +1067,10 @@ __of_reset_control_get(struct device_node *node, const char *id, int index, goto out_unlock; } + flags &= ~RESET_CONTROL_FLAGS_BIT_OPTIONAL; + /* reset_list_mutex also protects the rcdev's reset_control list */ - rstc = __reset_control_get_internal(rcdev, rstc_id, shared, acquired); + rstc = __reset_control_get_internal(rcdev, rstc_id, flags); out_unlock: mutex_unlock(&reset_list_mutex); @@ -1091,8 +1101,9 @@ __reset_controller_by_name(const char *name) static struct reset_control * __reset_control_get_from_lookup(struct device *dev, const char *con_id, - bool shared, bool optional, bool acquired) + enum reset_control_flags flags) { + bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL; const struct reset_control_lookup *lookup; struct reset_controller_dev *rcdev; const char *dev_id = dev_name(dev); @@ -1116,9 +1127,11 @@ __reset_control_get_from_lookup(struct device *dev, const char *con_id, return ERR_PTR(-EPROBE_DEFER); } + flags &= ~RESET_CONTROL_FLAGS_BIT_OPTIONAL; + rstc = __reset_control_get_internal(rcdev, lookup->index, - shared, acquired); + flags); mutex_unlock(&reset_list_mutex); break; } @@ -1133,30 +1146,29 @@ __reset_control_get_from_lookup(struct device *dev, const char *con_id, } struct reset_control *__reset_control_get(struct device *dev, const char *id, - int index, bool shared, bool optional, - bool acquired) + int index, enum reset_control_flags flags) { + bool shared = flags & RESET_CONTROL_FLAGS_BIT_SHARED; + bool acquired = flags & RESET_CONTROL_FLAGS_BIT_ACQUIRED; + if (WARN_ON(shared && acquired)) return ERR_PTR(-EINVAL); if (dev->of_node) - return __of_reset_control_get(dev->of_node, id, index, shared, - optional, acquired); + return __of_reset_control_get(dev->of_node, id, index, flags); - return __reset_control_get_from_lookup(dev, id, shared, optional, - acquired); + return __reset_control_get_from_lookup(dev, id, flags); } EXPORT_SYMBOL_GPL(__reset_control_get); int __reset_control_bulk_get(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs, - bool shared, bool optional, bool acquired) + enum reset_control_flags flags) { int ret, i; for (i = 0; i < num_rstcs; i++) { - rstcs[i].rstc = __reset_control_get(dev, rstcs[i].id, 0, - shared, optional, acquired); + rstcs[i].rstc = __reset_control_get(dev, rstcs[i].id, 0, flags); if (IS_ERR(rstcs[i].rstc)) { ret = PTR_ERR(rstcs[i].rstc); goto err; @@ -1226,7 +1238,7 @@ static void devm_reset_control_release(struct device *dev, void *res) struct reset_control * __devm_reset_control_get(struct device *dev, const char *id, int index, - bool shared, bool optional, bool acquired) + enum reset_control_flags flags) { struct reset_control **ptr, *rstc; @@ -1235,7 +1247,7 @@ __devm_reset_control_get(struct device *dev, const char *id, int index, if (!ptr) return ERR_PTR(-ENOMEM); - rstc = __reset_control_get(dev, id, index, shared, optional, acquired); + rstc = __reset_control_get(dev, id, index, flags); if (IS_ERR_OR_NULL(rstc)) { devres_free(ptr); return rstc; @@ -1262,7 +1274,7 @@ static void devm_reset_control_bulk_release(struct device *dev, void *res) int __devm_reset_control_bulk_get(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs, - bool shared, bool optional, bool acquired) + enum reset_control_flags flags) { struct reset_control_bulk_devres *ptr; int ret; @@ -1272,7 +1284,7 @@ int __devm_reset_control_bulk_get(struct device *dev, int num_rstcs, if (!ptr) return -ENOMEM; - ret = __reset_control_bulk_get(dev, num_rstcs, rstcs, shared, optional, acquired); + ret = __reset_control_bulk_get(dev, num_rstcs, rstcs, flags); if (ret < 0) { devres_free(ptr); return ret; @@ -1298,6 +1310,7 @@ EXPORT_SYMBOL_GPL(__devm_reset_control_bulk_get); */ int __device_reset(struct device *dev, bool optional) { + enum reset_control_flags flags; struct reset_control *rstc; int ret; @@ -1313,7 +1326,8 @@ int __device_reset(struct device *dev, bool optional) } #endif - rstc = __reset_control_get(dev, NULL, 0, 0, optional, true); + flags = optional ? RESET_CONTROL_OPTIONAL_EXCLUSIVE : RESET_CONTROL_EXCLUSIVE; + rstc = __reset_control_get(dev, NULL, 0, flags); if (IS_ERR(rstc)) return PTR_ERR(rstc); @@ -1356,17 +1370,14 @@ static int of_reset_control_get_count(struct device_node *node) * device node. * * @np: device node for the device that requests the reset controls array - * @shared: whether reset controls are shared or not - * @optional: whether it is optional to get the reset controls - * @acquired: only one reset control may be acquired for a given controller - * and ID + * @flags: whether reset controls are shared, optional, acquired * * Returns pointer to allocated reset_control on success or error on failure */ struct reset_control * -of_reset_control_array_get(struct device_node *np, bool shared, bool optional, - bool acquired) +of_reset_control_array_get(struct device_node *np, enum reset_control_flags flags) { + bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL; struct reset_control_array *resets; struct reset_control *rstc; int num, i; @@ -1381,8 +1392,7 @@ of_reset_control_array_get(struct device_node *np, bool shared, bool optional, resets->num_rstcs = num; for (i = 0; i < num; i++) { - rstc = __of_reset_control_get(np, NULL, i, shared, optional, - acquired); + rstc = __of_reset_control_get(np, NULL, i, flags); if (IS_ERR(rstc)) goto err_rst; resets->rstc[i] = rstc; @@ -1407,8 +1417,7 @@ EXPORT_SYMBOL_GPL(of_reset_control_array_get); * devm_reset_control_array_get - Resource managed reset control array get * * @dev: device that requests the list of reset controls - * @shared: whether reset controls are shared or not - * @optional: whether it is optional to get the reset controls + * @flags: whether reset controls are shared, optional, acquired * * The reset control array APIs are intended for a list of resets * that just have to be asserted or deasserted, without any @@ -1417,7 +1426,7 @@ EXPORT_SYMBOL_GPL(of_reset_control_array_get); * Returns pointer to allocated reset_control on success or error on failure */ struct reset_control * -devm_reset_control_array_get(struct device *dev, bool shared, bool optional) +devm_reset_control_array_get(struct device *dev, enum reset_control_flags flags) { struct reset_control **ptr, *rstc; @@ -1426,7 +1435,7 @@ devm_reset_control_array_get(struct device *dev, bool shared, bool optional) if (!ptr) return ERR_PTR(-ENOMEM); - rstc = of_reset_control_array_get(dev->of_node, shared, optional, true); + rstc = of_reset_control_array_get(dev->of_node, flags); if (IS_ERR_OR_NULL(rstc)) { devres_free(ptr); return rstc; diff --git a/include/linux/reset.h b/include/linux/reset.h index 514ddf003efc..99296af98f81 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -25,6 +25,33 @@ struct reset_control_bulk_data { struct reset_control *rstc; }; +#define RESET_CONTROL_FLAGS_BIT_SHARED BIT(0) /* not exclusive */ +#define RESET_CONTROL_FLAGS_BIT_OPTIONAL BIT(1) +#define RESET_CONTROL_FLAGS_BIT_ACQUIRED BIT(2) /* iff exclusive, not released */ + +/** + * enum reset_control_flags - Flags that can be passed to the reset_control_get functions + * to determine the type of reset control. + * These values cannot be OR'd. + * + * @RESET_CONTROL_EXCLUSIVE: exclusive, acquired, + * @RESET_CONTROL_EXCLUSIVE_RELEASED: exclusive, released, + * @RESET_CONTROL_SHARED: shared + * @RESET_CONTROL_OPTIONAL_EXCLUSIVE: optional, exclusive, acquired + * @RESET_CONTROL_OPTIONAL_EXCLUSIVE_RELEASED: optional, exclusive, released + * @RESET_CONTROL_OPTIONAL_SHARED: optional, shared + */ +enum reset_control_flags { + RESET_CONTROL_EXCLUSIVE = RESET_CONTROL_FLAGS_BIT_ACQUIRED, + RESET_CONTROL_EXCLUSIVE_RELEASED = 0, + RESET_CONTROL_SHARED = RESET_CONTROL_FLAGS_BIT_SHARED, + RESET_CONTROL_OPTIONAL_EXCLUSIVE = RESET_CONTROL_FLAGS_BIT_OPTIONAL | + RESET_CONTROL_FLAGS_BIT_ACQUIRED, + RESET_CONTROL_OPTIONAL_EXCLUSIVE_RELEASED = RESET_CONTROL_FLAGS_BIT_OPTIONAL, + RESET_CONTROL_OPTIONAL_SHARED = RESET_CONTROL_FLAGS_BIT_OPTIONAL | + RESET_CONTROL_FLAGS_BIT_SHARED, +}; + #ifdef CONFIG_RESET_CONTROLLER int reset_control_reset(struct reset_control *rstc); @@ -42,30 +69,25 @@ int reset_control_bulk_acquire(int num_rstcs, struct reset_control_bulk_data *rs void reset_control_bulk_release(int num_rstcs, struct reset_control_bulk_data *rstcs); struct reset_control *__of_reset_control_get(struct device_node *node, - const char *id, int index, bool shared, - bool optional, bool acquired); + const char *id, int index, enum reset_control_flags flags); struct reset_control *__reset_control_get(struct device *dev, const char *id, - int index, bool shared, - bool optional, bool acquired); + int index, enum reset_control_flags flags); void reset_control_put(struct reset_control *rstc); int __reset_control_bulk_get(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs, - bool shared, bool optional, bool acquired); + enum reset_control_flags flags); void reset_control_bulk_put(int num_rstcs, struct reset_control_bulk_data *rstcs); int __device_reset(struct device *dev, bool optional); struct reset_control *__devm_reset_control_get(struct device *dev, - const char *id, int index, bool shared, - bool optional, bool acquired); + const char *id, int index, enum reset_control_flags flags); int __devm_reset_control_bulk_get(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs, - bool shared, bool optional, bool acquired); + enum reset_control_flags flags); struct reset_control *devm_reset_control_array_get(struct device *dev, - bool shared, bool optional); -struct reset_control *of_reset_control_array_get(struct device_node *np, - bool shared, bool optional, - bool acquired); + enum reset_control_flags flags); +struct reset_control *of_reset_control_array_get(struct device_node *np, enum reset_control_flags); int reset_control_get_count(struct device *dev); @@ -116,17 +138,19 @@ static inline int __device_reset(struct device *dev, bool optional) static inline struct reset_control *__of_reset_control_get( struct device_node *node, - const char *id, int index, bool shared, - bool optional, bool acquired) + const char *id, int index, enum reset_control_flags flags) { + bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL; + return optional ? NULL : ERR_PTR(-ENOTSUPP); } static inline struct reset_control *__reset_control_get( struct device *dev, const char *id, - int index, bool shared, bool optional, - bool acquired) + int index, enum reset_control_flags flags) { + bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL; + return optional ? NULL : ERR_PTR(-ENOTSUPP); } @@ -162,8 +186,10 @@ reset_control_bulk_release(int num_rstcs, struct reset_control_bulk_data *rstcs) static inline int __reset_control_bulk_get(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs, - bool shared, bool optional, bool acquired) + enum reset_control_flags flags) { + bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL; + return optional ? 0 : -EOPNOTSUPP; } @@ -174,30 +200,36 @@ reset_control_bulk_put(int num_rstcs, struct reset_control_bulk_data *rstcs) static inline struct reset_control *__devm_reset_control_get( struct device *dev, const char *id, - int index, bool shared, bool optional, - bool acquired) + int index, enum reset_control_flags flags) { + bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL; + return optional ? NULL : ERR_PTR(-ENOTSUPP); } static inline int __devm_reset_control_bulk_get(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs, - bool shared, bool optional, bool acquired) + enum reset_control_flags flags) { + bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL; + return optional ? 0 : -EOPNOTSUPP; } static inline struct reset_control * -devm_reset_control_array_get(struct device *dev, bool shared, bool optional) +devm_reset_control_array_get(struct device *dev, enum reset_control_flags flags) { + bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL; + return optional ? NULL : ERR_PTR(-ENOTSUPP); } static inline struct reset_control * -of_reset_control_array_get(struct device_node *np, bool shared, bool optional, - bool acquired) +of_reset_control_array_get(struct device_node *np, enum reset_control_flags flags) { + bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL; + return optional ? NULL : ERR_PTR(-ENOTSUPP); } @@ -236,7 +268,7 @@ static inline int device_reset_optional(struct device *dev) static inline struct reset_control * __must_check reset_control_get_exclusive(struct device *dev, const char *id) { - return __reset_control_get(dev, id, 0, false, false, true); + return __reset_control_get(dev, id, 0, RESET_CONTROL_EXCLUSIVE); } /** @@ -253,7 +285,7 @@ static inline int __must_check reset_control_bulk_get_exclusive(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __reset_control_bulk_get(dev, num_rstcs, rstcs, false, false, true); + return __reset_control_bulk_get(dev, num_rstcs, rstcs, RESET_CONTROL_EXCLUSIVE); } /** @@ -274,7 +306,7 @@ static inline struct reset_control * __must_check reset_control_get_exclusive_released(struct device *dev, const char *id) { - return __reset_control_get(dev, id, 0, false, false, false); + return __reset_control_get(dev, id, 0, RESET_CONTROL_EXCLUSIVE_RELEASED); } /** @@ -295,7 +327,7 @@ static inline int __must_check reset_control_bulk_get_exclusive_released(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __reset_control_bulk_get(dev, num_rstcs, rstcs, false, false, false); + return __reset_control_bulk_get(dev, num_rstcs, rstcs, RESET_CONTROL_EXCLUSIVE_RELEASED); } /** @@ -316,7 +348,8 @@ static inline int __must_check reset_control_bulk_get_optional_exclusive_released(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __reset_control_bulk_get(dev, num_rstcs, rstcs, false, true, false); + return __reset_control_bulk_get(dev, num_rstcs, rstcs, + RESET_CONTROL_OPTIONAL_EXCLUSIVE_RELEASED); } /** @@ -344,7 +377,7 @@ reset_control_bulk_get_optional_exclusive_released(struct device *dev, int num_r static inline struct reset_control *reset_control_get_shared( struct device *dev, const char *id) { - return __reset_control_get(dev, id, 0, true, false, false); + return __reset_control_get(dev, id, 0, RESET_CONTROL_SHARED); } /** @@ -361,7 +394,7 @@ static inline int __must_check reset_control_bulk_get_shared(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __reset_control_bulk_get(dev, num_rstcs, rstcs, true, false, false); + return __reset_control_bulk_get(dev, num_rstcs, rstcs, RESET_CONTROL_SHARED); } /** @@ -378,7 +411,7 @@ reset_control_bulk_get_shared(struct device *dev, int num_rstcs, static inline struct reset_control *reset_control_get_optional_exclusive( struct device *dev, const char *id) { - return __reset_control_get(dev, id, 0, false, true, true); + return __reset_control_get(dev, id, 0, RESET_CONTROL_OPTIONAL_EXCLUSIVE); } /** @@ -398,7 +431,7 @@ static inline int __must_check reset_control_bulk_get_optional_exclusive(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __reset_control_bulk_get(dev, num_rstcs, rstcs, false, true, true); + return __reset_control_bulk_get(dev, num_rstcs, rstcs, RESET_CONTROL_OPTIONAL_EXCLUSIVE); } /** @@ -415,7 +448,7 @@ reset_control_bulk_get_optional_exclusive(struct device *dev, int num_rstcs, static inline struct reset_control *reset_control_get_optional_shared( struct device *dev, const char *id) { - return __reset_control_get(dev, id, 0, true, true, false); + return __reset_control_get(dev, id, 0, RESET_CONTROL_OPTIONAL_SHARED); } /** @@ -435,7 +468,7 @@ static inline int __must_check reset_control_bulk_get_optional_shared(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __reset_control_bulk_get(dev, num_rstcs, rstcs, true, true, false); + return __reset_control_bulk_get(dev, num_rstcs, rstcs, RESET_CONTROL_OPTIONAL_SHARED); } /** @@ -451,7 +484,7 @@ reset_control_bulk_get_optional_shared(struct device *dev, int num_rstcs, static inline struct reset_control *of_reset_control_get_exclusive( struct device_node *node, const char *id) { - return __of_reset_control_get(node, id, 0, false, false, true); + return __of_reset_control_get(node, id, 0, RESET_CONTROL_EXCLUSIVE); } /** @@ -471,7 +504,7 @@ static inline struct reset_control *of_reset_control_get_exclusive( static inline struct reset_control *of_reset_control_get_optional_exclusive( struct device_node *node, const char *id) { - return __of_reset_control_get(node, id, 0, false, true, true); + return __of_reset_control_get(node, id, 0, RESET_CONTROL_OPTIONAL_EXCLUSIVE); } /** @@ -496,7 +529,7 @@ static inline struct reset_control *of_reset_control_get_optional_exclusive( static inline struct reset_control *of_reset_control_get_shared( struct device_node *node, const char *id) { - return __of_reset_control_get(node, id, 0, true, false, false); + return __of_reset_control_get(node, id, 0, RESET_CONTROL_SHARED); } /** @@ -513,7 +546,7 @@ static inline struct reset_control *of_reset_control_get_shared( static inline struct reset_control *of_reset_control_get_exclusive_by_index( struct device_node *node, int index) { - return __of_reset_control_get(node, NULL, index, false, false, true); + return __of_reset_control_get(node, NULL, index, RESET_CONTROL_EXCLUSIVE); } /** @@ -541,7 +574,7 @@ static inline struct reset_control *of_reset_control_get_exclusive_by_index( static inline struct reset_control *of_reset_control_get_shared_by_index( struct device_node *node, int index) { - return __of_reset_control_get(node, NULL, index, true, false, false); + return __of_reset_control_get(node, NULL, index, RESET_CONTROL_SHARED); } /** @@ -560,7 +593,7 @@ static inline struct reset_control * __must_check devm_reset_control_get_exclusive(struct device *dev, const char *id) { - return __devm_reset_control_get(dev, id, 0, false, false, true); + return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_EXCLUSIVE); } /** @@ -580,7 +613,8 @@ static inline int __must_check devm_reset_control_bulk_get_exclusive(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, false, false, true); + return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, + RESET_CONTROL_EXCLUSIVE); } /** @@ -599,7 +633,7 @@ static inline struct reset_control * __must_check devm_reset_control_get_exclusive_released(struct device *dev, const char *id) { - return __devm_reset_control_get(dev, id, 0, false, false, false); + return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_EXCLUSIVE_RELEASED); } /** @@ -619,7 +653,8 @@ static inline int __must_check devm_reset_control_bulk_get_exclusive_released(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, false, false, false); + return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, + RESET_CONTROL_EXCLUSIVE_RELEASED); } /** @@ -638,7 +673,7 @@ static inline struct reset_control * __must_check devm_reset_control_get_optional_exclusive_released(struct device *dev, const char *id) { - return __devm_reset_control_get(dev, id, 0, false, true, false); + return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_OPTIONAL_EXCLUSIVE_RELEASED); } /** @@ -658,7 +693,8 @@ static inline int __must_check devm_reset_control_bulk_get_optional_exclusive_released(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, false, true, false); + return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, + RESET_CONTROL_OPTIONAL_EXCLUSIVE_RELEASED); } /** @@ -673,7 +709,7 @@ devm_reset_control_bulk_get_optional_exclusive_released(struct device *dev, int static inline struct reset_control *devm_reset_control_get_shared( struct device *dev, const char *id) { - return __devm_reset_control_get(dev, id, 0, true, false, false); + return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_SHARED); } /** @@ -693,7 +729,7 @@ static inline int __must_check devm_reset_control_bulk_get_shared(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, true, false, false); + return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, RESET_CONTROL_SHARED); } /** @@ -711,7 +747,7 @@ devm_reset_control_bulk_get_shared(struct device *dev, int num_rstcs, static inline struct reset_control *devm_reset_control_get_optional_exclusive( struct device *dev, const char *id) { - return __devm_reset_control_get(dev, id, 0, false, true, true); + return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_OPTIONAL_EXCLUSIVE); } /** @@ -731,7 +767,8 @@ static inline int __must_check devm_reset_control_bulk_get_optional_exclusive(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, false, true, true); + return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, + RESET_CONTROL_OPTIONAL_EXCLUSIVE); } /** @@ -749,7 +786,7 @@ devm_reset_control_bulk_get_optional_exclusive(struct device *dev, int num_rstcs static inline struct reset_control *devm_reset_control_get_optional_shared( struct device *dev, const char *id) { - return __devm_reset_control_get(dev, id, 0, true, true, false); + return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_OPTIONAL_SHARED); } /** @@ -769,7 +806,7 @@ static inline int __must_check devm_reset_control_bulk_get_optional_shared(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, true, true, false); + return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, RESET_CONTROL_OPTIONAL_SHARED); } /** @@ -787,7 +824,7 @@ devm_reset_control_bulk_get_optional_shared(struct device *dev, int num_rstcs, static inline struct reset_control * devm_reset_control_get_exclusive_by_index(struct device *dev, int index) { - return __devm_reset_control_get(dev, NULL, index, false, false, true); + return __devm_reset_control_get(dev, NULL, index, RESET_CONTROL_EXCLUSIVE); } /** @@ -803,7 +840,7 @@ devm_reset_control_get_exclusive_by_index(struct device *dev, int index) static inline struct reset_control * devm_reset_control_get_shared_by_index(struct device *dev, int index) { - return __devm_reset_control_get(dev, NULL, index, true, false, false); + return __devm_reset_control_get(dev, NULL, index, RESET_CONTROL_SHARED); } /* @@ -851,54 +888,54 @@ static inline struct reset_control *devm_reset_control_get_by_index( static inline struct reset_control * devm_reset_control_array_get_exclusive(struct device *dev) { - return devm_reset_control_array_get(dev, false, false); + return devm_reset_control_array_get(dev, RESET_CONTROL_EXCLUSIVE); } static inline struct reset_control * devm_reset_control_array_get_shared(struct device *dev) { - return devm_reset_control_array_get(dev, true, false); + return devm_reset_control_array_get(dev, RESET_CONTROL_SHARED); } static inline struct reset_control * devm_reset_control_array_get_optional_exclusive(struct device *dev) { - return devm_reset_control_array_get(dev, false, true); + return devm_reset_control_array_get(dev, RESET_CONTROL_OPTIONAL_EXCLUSIVE); } static inline struct reset_control * devm_reset_control_array_get_optional_shared(struct device *dev) { - return devm_reset_control_array_get(dev, true, true); + return devm_reset_control_array_get(dev, RESET_CONTROL_OPTIONAL_SHARED); } static inline struct reset_control * of_reset_control_array_get_exclusive(struct device_node *node) { - return of_reset_control_array_get(node, false, false, true); + return of_reset_control_array_get(node, RESET_CONTROL_EXCLUSIVE); } static inline struct reset_control * of_reset_control_array_get_exclusive_released(struct device_node *node) { - return of_reset_control_array_get(node, false, false, false); + return of_reset_control_array_get(node, RESET_CONTROL_EXCLUSIVE_RELEASED); } static inline struct reset_control * of_reset_control_array_get_shared(struct device_node *node) { - return of_reset_control_array_get(node, true, false, true); + return of_reset_control_array_get(node, RESET_CONTROL_SHARED); } static inline struct reset_control * of_reset_control_array_get_optional_exclusive(struct device_node *node) { - return of_reset_control_array_get(node, false, true, true); + return of_reset_control_array_get(node, RESET_CONTROL_OPTIONAL_EXCLUSIVE); } static inline struct reset_control * of_reset_control_array_get_optional_shared(struct device_node *node) { - return of_reset_control_array_get(node, true, true, true); + return of_reset_control_array_get(node, RESET_CONTROL_OPTIONAL_SHARED); } #endif -- cgit v1.2.3 From d872bed85036f5e60c66b0dd0994346b4ea6470c Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Wed, 25 Sep 2024 18:40:10 +0200 Subject: reset: Add devres helpers to request pre-deasserted reset controls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add devres helpers - devm_reset_control_bulk_get_exclusive_deasserted - devm_reset_control_bulk_get_optional_exclusive_deasserted - devm_reset_control_bulk_get_optional_shared_deasserted - devm_reset_control_bulk_get_shared_deasserted - devm_reset_control_get_exclusive_deasserted - devm_reset_control_get_optional_exclusive_deasserted - devm_reset_control_get_optional_shared_deasserted - devm_reset_control_get_shared_deasserted to request and immediately deassert reset controls. During cleanup, reset_control_assert() will be called automatically on the returned reset controls. Acked-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20240925-reset-get-deasserted-v2-2-b3601bbd0458@pengutronix.de Signed-off-by: Philipp Zabel --- drivers/reset/core.c | 48 ++++++++++++++++++++- include/linux/reset.h | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 159 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/reset/core.c b/drivers/reset/core.c index 682d61812852..22f67fc77ae5 100644 --- a/drivers/reset/core.c +++ b/drivers/reset/core.c @@ -1236,23 +1236,46 @@ static void devm_reset_control_release(struct device *dev, void *res) reset_control_put(*(struct reset_control **)res); } +static void devm_reset_control_release_deasserted(struct device *dev, void *res) +{ + struct reset_control *rstc = *(struct reset_control **)res; + + reset_control_assert(rstc); + reset_control_put(rstc); +} + struct reset_control * __devm_reset_control_get(struct device *dev, const char *id, int index, enum reset_control_flags flags) { struct reset_control **ptr, *rstc; + bool deasserted = flags & RESET_CONTROL_FLAGS_BIT_DEASSERTED; - ptr = devres_alloc(devm_reset_control_release, sizeof(*ptr), + ptr = devres_alloc(deasserted ? devm_reset_control_release_deasserted : + devm_reset_control_release, sizeof(*ptr), GFP_KERNEL); if (!ptr) return ERR_PTR(-ENOMEM); + flags &= ~RESET_CONTROL_FLAGS_BIT_DEASSERTED; + rstc = __reset_control_get(dev, id, index, flags); if (IS_ERR_OR_NULL(rstc)) { devres_free(ptr); return rstc; } + if (deasserted) { + int ret; + + ret = reset_control_deassert(rstc); + if (ret) { + reset_control_put(rstc); + devres_free(ptr); + return ERR_PTR(ret); + } + } + *ptr = rstc; devres_add(dev, ptr); @@ -1272,24 +1295,45 @@ static void devm_reset_control_bulk_release(struct device *dev, void *res) reset_control_bulk_put(devres->num_rstcs, devres->rstcs); } +static void devm_reset_control_bulk_release_deasserted(struct device *dev, void *res) +{ + struct reset_control_bulk_devres *devres = res; + + reset_control_bulk_assert(devres->num_rstcs, devres->rstcs); + reset_control_bulk_put(devres->num_rstcs, devres->rstcs); +} + int __devm_reset_control_bulk_get(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs, enum reset_control_flags flags) { struct reset_control_bulk_devres *ptr; + bool deasserted = flags & RESET_CONTROL_FLAGS_BIT_DEASSERTED; int ret; - ptr = devres_alloc(devm_reset_control_bulk_release, sizeof(*ptr), + ptr = devres_alloc(deasserted ? devm_reset_control_bulk_release_deasserted : + devm_reset_control_bulk_release, sizeof(*ptr), GFP_KERNEL); if (!ptr) return -ENOMEM; + flags &= ~RESET_CONTROL_FLAGS_BIT_DEASSERTED; + ret = __reset_control_bulk_get(dev, num_rstcs, rstcs, flags); if (ret < 0) { devres_free(ptr); return ret; } + if (deasserted) { + ret = reset_control_bulk_deassert(num_rstcs, rstcs); + if (ret) { + reset_control_bulk_put(num_rstcs, rstcs); + devres_free(ptr); + return ret; + } + } + ptr->num_rstcs = num_rstcs; ptr->rstcs = rstcs; devres_add(dev, ptr); diff --git a/include/linux/reset.h b/include/linux/reset.h index 99296af98f81..2986ced69a02 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -28,6 +28,7 @@ struct reset_control_bulk_data { #define RESET_CONTROL_FLAGS_BIT_SHARED BIT(0) /* not exclusive */ #define RESET_CONTROL_FLAGS_BIT_OPTIONAL BIT(1) #define RESET_CONTROL_FLAGS_BIT_ACQUIRED BIT(2) /* iff exclusive, not released */ +#define RESET_CONTROL_FLAGS_BIT_DEASSERTED BIT(3) /** * enum reset_control_flags - Flags that can be passed to the reset_control_get functions @@ -35,21 +36,35 @@ struct reset_control_bulk_data { * These values cannot be OR'd. * * @RESET_CONTROL_EXCLUSIVE: exclusive, acquired, + * @RESET_CONTROL_EXCLUSIVE_DEASSERTED: exclusive, acquired, deasserted * @RESET_CONTROL_EXCLUSIVE_RELEASED: exclusive, released, * @RESET_CONTROL_SHARED: shared + * @RESET_CONTROL_SHARED_DEASSERTED: shared, deasserted * @RESET_CONTROL_OPTIONAL_EXCLUSIVE: optional, exclusive, acquired + * @RESET_CONTROL_OPTIONAL_EXCLUSIVE_DEASSERTED: optional, exclusive, acquired, deasserted * @RESET_CONTROL_OPTIONAL_EXCLUSIVE_RELEASED: optional, exclusive, released * @RESET_CONTROL_OPTIONAL_SHARED: optional, shared + * @RESET_CONTROL_OPTIONAL_SHARED_DEASSERTED: optional, shared, deasserted */ enum reset_control_flags { RESET_CONTROL_EXCLUSIVE = RESET_CONTROL_FLAGS_BIT_ACQUIRED, + RESET_CONTROL_EXCLUSIVE_DEASSERTED = RESET_CONTROL_FLAGS_BIT_ACQUIRED | + RESET_CONTROL_FLAGS_BIT_DEASSERTED, RESET_CONTROL_EXCLUSIVE_RELEASED = 0, RESET_CONTROL_SHARED = RESET_CONTROL_FLAGS_BIT_SHARED, + RESET_CONTROL_SHARED_DEASSERTED = RESET_CONTROL_FLAGS_BIT_SHARED | + RESET_CONTROL_FLAGS_BIT_DEASSERTED, RESET_CONTROL_OPTIONAL_EXCLUSIVE = RESET_CONTROL_FLAGS_BIT_OPTIONAL | RESET_CONTROL_FLAGS_BIT_ACQUIRED, + RESET_CONTROL_OPTIONAL_EXCLUSIVE_DEASSERTED = RESET_CONTROL_FLAGS_BIT_OPTIONAL | + RESET_CONTROL_FLAGS_BIT_ACQUIRED | + RESET_CONTROL_FLAGS_BIT_DEASSERTED, RESET_CONTROL_OPTIONAL_EXCLUSIVE_RELEASED = RESET_CONTROL_FLAGS_BIT_OPTIONAL, RESET_CONTROL_OPTIONAL_SHARED = RESET_CONTROL_FLAGS_BIT_OPTIONAL | RESET_CONTROL_FLAGS_BIT_SHARED, + RESET_CONTROL_OPTIONAL_SHARED_DEASSERTED = RESET_CONTROL_FLAGS_BIT_OPTIONAL | + RESET_CONTROL_FLAGS_BIT_SHARED | + RESET_CONTROL_FLAGS_BIT_DEASSERTED, }; #ifdef CONFIG_RESET_CONTROLLER @@ -596,6 +611,25 @@ __must_check devm_reset_control_get_exclusive(struct device *dev, return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_EXCLUSIVE); } +/** + * devm_reset_control_get_exclusive_deasserted - resource managed + * reset_control_get_exclusive() + + * reset_control_deassert() + * @dev: device to be reset by the controller + * @id: reset line name + * + * Managed reset_control_get_exclusive() + reset_control_deassert(). For reset + * controllers returned from this function, reset_control_assert() + + * reset_control_put() is called automatically on driver detach. + * + * See reset_control_get_exclusive() for more information. + */ +static inline struct reset_control * __must_check +devm_reset_control_get_exclusive_deasserted(struct device *dev, const char *id) +{ + return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_EXCLUSIVE_DEASSERTED); +} + /** * devm_reset_control_bulk_get_exclusive - resource managed * reset_control_bulk_get_exclusive() @@ -712,6 +746,25 @@ static inline struct reset_control *devm_reset_control_get_shared( return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_SHARED); } +/** + * devm_reset_control_get_shared_deasserted - resource managed + * reset_control_get_shared() + + * reset_control_deassert() + * @dev: device to be reset by the controller + * @id: reset line name + * + * Managed reset_control_get_shared() + reset_control_deassert(). For reset + * controllers returned from this function, reset_control_assert() + + * reset_control_put() is called automatically on driver detach. + * + * See devm_reset_control_get_shared() for more information. + */ +static inline struct reset_control * __must_check +devm_reset_control_get_shared_deasserted(struct device *dev, const char *id) +{ + return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_SHARED_DEASSERTED); +} + /** * devm_reset_control_bulk_get_shared - resource managed * reset_control_bulk_get_shared() @@ -732,6 +785,28 @@ devm_reset_control_bulk_get_shared(struct device *dev, int num_rstcs, return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, RESET_CONTROL_SHARED); } +/** + * devm_reset_control_bulk_get_shared_deasserted - resource managed + * reset_control_bulk_get_shared() + + * reset_control_bulk_deassert() + * @dev: device to be reset by the controller + * @num_rstcs: number of entries in rstcs array + * @rstcs: array of struct reset_control_bulk_data with reset line names set + * + * Managed reset_control_bulk_get_shared() + reset_control_bulk_deassert(). For + * reset controllers returned from this function, reset_control_bulk_assert() + + * reset_control_bulk_put() are called automatically on driver detach. + * + * See devm_reset_control_bulk_get_shared() for more information. + */ +static inline int __must_check +devm_reset_control_bulk_get_shared_deasserted(struct device *dev, int num_rstcs, + struct reset_control_bulk_data *rstcs) +{ + return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, + RESET_CONTROL_SHARED_DEASSERTED); +} + /** * devm_reset_control_get_optional_exclusive - resource managed * reset_control_get_optional_exclusive() @@ -750,6 +825,25 @@ static inline struct reset_control *devm_reset_control_get_optional_exclusive( return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_OPTIONAL_EXCLUSIVE); } +/** + * devm_reset_control_get_optional_exclusive_deasserted - resource managed + * reset_control_get_optional_exclusive() + + * reset_control_deassert() + * @dev: device to be reset by the controller + * @id: reset line name + * + * Managed reset_control_get_optional_exclusive() + reset_control_deassert(). + * For reset controllers returned from this function, reset_control_assert() + + * reset_control_put() is called automatically on driver detach. + * + * See devm_reset_control_get_optional_exclusive() for more information. + */ +static inline struct reset_control * +devm_reset_control_get_optional_exclusive_deasserted(struct device *dev, const char *id) +{ + return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_OPTIONAL_EXCLUSIVE_DEASSERTED); +} + /** * devm_reset_control_bulk_get_optional_exclusive - resource managed * reset_control_bulk_get_optional_exclusive() @@ -789,6 +883,25 @@ static inline struct reset_control *devm_reset_control_get_optional_shared( return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_OPTIONAL_SHARED); } +/** + * devm_reset_control_get_optional_shared_deasserted - resource managed + * reset_control_get_optional_shared() + + * reset_control_deassert() + * @dev: device to be reset by the controller + * @id: reset line name + * + * Managed reset_control_get_optional_shared() + reset_control_deassert(). For + * reset controllers returned from this function, reset_control_assert() + + * reset_control_put() is called automatically on driver detach. + * + * See devm_reset_control_get_optional_shared() for more information. + */ +static inline struct reset_control * +devm_reset_control_get_optional_shared_deasserted(struct device *dev, const char *id) +{ + return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_OPTIONAL_SHARED_DEASSERTED); +} + /** * devm_reset_control_bulk_get_optional_shared - resource managed * reset_control_bulk_get_optional_shared() -- cgit v1.2.3 From dab9cd4b8e7f5fce4e7a0424991ec4714a780f3f Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Tue, 1 Oct 2024 10:51:39 +0200 Subject: pwm: Add kernel doc for members added to pwm_ops recently MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The callbacks for lowlevel pwm drivers were expanded to handle the new waveform abstraction. When doing that I missed to expand the kernel doc description. This is catched up here. Reported-by: Stephen Rothwell Link: https://lore.kernel.org/linux-next/20241001135207.125ca7af@canb.auug.org.au Fixes: 17e40c25158f ("pwm: New abstraction for PWM waveforms") Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20241001085138.1025818-2-u.kleine-koenig@baylibre.com Signed-off-by: Uwe Kleine-König --- include/linux/pwm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index c3d9ddeafa65..f1cb1e5b0a36 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -276,6 +276,11 @@ struct pwm_capture { * @request: optional hook for requesting a PWM * @free: optional hook for freeing a PWM * @capture: capture and report PWM signal + * @sizeof_wfhw: size (in bytes) of driver specific waveform presentation + * @round_waveform_tohw: convert a struct pwm_waveform to driver specific presentation + * @round_waveform_fromhw: convert a driver specific waveform presentation to struct pwm_waveform + * @read_waveform: read driver specific waveform presentation from hardware + * @write_waveform: write driver specific waveform presentation to hardware * @apply: atomically apply a new PWM config * @get_state: get the current PWM state. */ -- cgit v1.2.3 From 030ace430afcf847f537227afceb22dfe8fb8fc8 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Thu, 26 Sep 2024 22:19:52 +0800 Subject: spi: spi-mem: Allow specifying the byte order in Octal DTR mode There are NOR flashes (Macronix) that swap the bytes on a 16-bit boundary when configured in Octal DTR mode. The byte order of 16-bit words is swapped when read or written in Octal Double Transfer Rate (DTR) mode compared to Single Transfer Rate (STR) modes. If one writes D0 D1 D2 D3 bytes using 1-1-1 mode, and uses 8D-8D-8D SPI mode for reading, it will read back D1 D0 D3 D2. Swapping the bytes may introduce some endianness problems. It can affect the boot sequence if the entire boot sequence is not handled in either 8D-8D-8D mode or 1-1-1 mode. Therefore, it is necessary to swap the bytes back to ensure the same byte order as in STR modes. Fortunately there are controllers that could swap the bytes back at runtime, addressing the flash's endianness requirements. Provide a way for the upper layers to specify the byte order in Octal DTR mode. Merge Tudor's patch and add modifications for suiting newer version of Linux kernel. Suggested-by: Michael Walle Signed-off-by: JaimeLiao Signed-off-by: AlvinZhou Acked-by: Mark Brown Link: https://lore.kernel.org/r/20240926141956.2386374-3-alvinzhou.tw@gmail.com Signed-off-by: Tudor Ambarus --- drivers/spi/spi-mem.c | 3 +++ include/linux/spi/spi-mem.h | 8 +++++++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/spi/spi-mem.c b/drivers/spi/spi-mem.c index 17b8baf749e6..abc6792e738c 100644 --- a/drivers/spi/spi-mem.c +++ b/drivers/spi/spi-mem.c @@ -172,6 +172,9 @@ bool spi_mem_default_supports_op(struct spi_mem *mem, if (!spi_mem_controller_is_capable(ctlr, dtr)) return false; + if (op->data.swap16 && !spi_mem_controller_is_capable(ctlr, swap16)) + return false; + if (op->cmd.nbytes != 2) return false; } else { diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h index f866d5c8ed32..c46d2b8029be 100644 --- a/include/linux/spi/spi-mem.h +++ b/include/linux/spi/spi-mem.h @@ -90,6 +90,8 @@ enum spi_mem_data_dir { * @data.buswidth: number of IO lanes used to send/receive the data * @data.dtr: whether the data should be sent in DTR mode or not * @data.ecc: whether error correction is required or not + * @data.swap16: whether the byte order of 16-bit words is swapped when read + * or written in Octal DTR mode compared to STR mode. * @data.dir: direction of the transfer * @data.nbytes: number of data bytes to send/receive. Can be zero if the * operation does not involve transferring data @@ -124,7 +126,8 @@ struct spi_mem_op { u8 buswidth; u8 dtr : 1; u8 ecc : 1; - u8 __pad : 6; + u8 swap16 : 1; + u8 __pad : 5; enum spi_mem_data_dir dir; unsigned int nbytes; union { @@ -297,10 +300,13 @@ struct spi_controller_mem_ops { * struct spi_controller_mem_caps - SPI memory controller capabilities * @dtr: Supports DTR operations * @ecc: Supports operations with error correction + * @swap16: Supports swapping bytes on a 16 bit boundary when configured in + * Octal DTR */ struct spi_controller_mem_caps { bool dtr; bool ecc; + bool swap16; }; #define spi_mem_controller_is_capable(ctlr, cap) \ -- cgit v1.2.3 From 848f2bbb363d4cdb4202db328a103fd3c34e21a2 Mon Sep 17 00:00:00 2001 From: Ronak Jain Date: Tue, 30 Jul 2024 01:43:42 -0700 Subject: firmware: xilinx: Add missing debug firmware interfaces Add missing PM EEMI APIs interface in debug firmware driver. The debugfs firmware driver interface is intended for testing and debugging the EEMI APIs only. This interface does not contain any checking regarding improper usage, and the number, type and valid ranges of the arguments. This interface must be used with a lot of care. In fact, accessing this interface during normal PM operation will very likely cause unexpected problems. The debugfs interface shouldn't be used in the production system and hence it is disabled by default in defconfig. Signed-off-by: Ronak Jain Signed-off-by: Radhey Shyam Pandey Signed-off-by: Michal Simek Link: https://lore.kernel.org/r/20240730084342.1683231-1-ronak.jain@amd.com --- drivers/firmware/xilinx/zynqmp-debug.c | 162 ++++++++++++++++++++++++++++++++- include/linux/firmware/xlnx-zynqmp.h | 4 + 2 files changed, 165 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/firmware/xilinx/zynqmp-debug.c b/drivers/firmware/xilinx/zynqmp-debug.c index 8528850af889..22853ae0efdf 100644 --- a/drivers/firmware/xilinx/zynqmp-debug.c +++ b/drivers/firmware/xilinx/zynqmp-debug.c @@ -31,12 +31,50 @@ static char debugfs_buf[PAGE_SIZE]; #define PM_API(id) {id, #id, strlen(#id)} static struct pm_api_info pm_api_list[] = { + PM_API(PM_FORCE_POWERDOWN), + PM_API(PM_REQUEST_WAKEUP), + PM_API(PM_SYSTEM_SHUTDOWN), + PM_API(PM_REQUEST_NODE), + PM_API(PM_RELEASE_NODE), + PM_API(PM_SET_REQUIREMENT), PM_API(PM_GET_API_VERSION), + PM_API(PM_REGISTER_NOTIFIER), + PM_API(PM_RESET_ASSERT), + PM_API(PM_RESET_GET_STATUS), + PM_API(PM_GET_CHIPID), + PM_API(PM_PINCTRL_SET_FUNCTION), + PM_API(PM_PINCTRL_CONFIG_PARAM_GET), + PM_API(PM_PINCTRL_CONFIG_PARAM_SET), + PM_API(PM_IOCTL), + PM_API(PM_CLOCK_ENABLE), + PM_API(PM_CLOCK_DISABLE), + PM_API(PM_CLOCK_GETSTATE), + PM_API(PM_CLOCK_SETDIVIDER), + PM_API(PM_CLOCK_GETDIVIDER), + PM_API(PM_CLOCK_SETPARENT), + PM_API(PM_CLOCK_GETPARENT), PM_API(PM_QUERY_DATA), }; static struct dentry *firmware_debugfs_root; +/** + * zynqmp_pm_ioctl - PM IOCTL for device control and configs + * @node: Node ID of the device + * @ioctl: ID of the requested IOCTL + * @arg1: Argument 1 of requested IOCTL call + * @arg2: Argument 2 of requested IOCTL call + * @arg3: Argument 3 of requested IOCTL call + * @out: Returned output value + * + * Return: Returns status, either success or error+reason + */ +static int zynqmp_pm_ioctl(const u32 node, const u32 ioctl, const u32 arg1, + const u32 arg2, const u32 arg3, u32 *out) +{ + return zynqmp_pm_invoke_fn(PM_IOCTL, out, 5, node, ioctl, arg1, arg2, arg3); +} + /** * zynqmp_pm_argument_value() - Extract argument value from a PM-API request * @arg: Entered PM-API argument in string format @@ -95,6 +133,128 @@ static int process_api_request(u32 pm_id, u64 *pm_api_arg, u32 *pm_api_ret) sprintf(debugfs_buf, "PM-API Version = %d.%d\n", pm_api_version >> 16, pm_api_version & 0xffff); break; + case PM_FORCE_POWERDOWN: + ret = zynqmp_pm_force_pwrdwn(pm_api_arg[0], + pm_api_arg[1] ? pm_api_arg[1] : + ZYNQMP_PM_REQUEST_ACK_NO); + break; + case PM_REQUEST_WAKEUP: + ret = zynqmp_pm_request_wake(pm_api_arg[0], + pm_api_arg[1], pm_api_arg[2], + pm_api_arg[3] ? pm_api_arg[3] : + ZYNQMP_PM_REQUEST_ACK_NO); + break; + case PM_SYSTEM_SHUTDOWN: + ret = zynqmp_pm_system_shutdown(pm_api_arg[0], pm_api_arg[1]); + break; + case PM_REQUEST_NODE: + ret = zynqmp_pm_request_node(pm_api_arg[0], + pm_api_arg[1] ? pm_api_arg[1] : + ZYNQMP_PM_CAPABILITY_ACCESS, + pm_api_arg[2] ? pm_api_arg[2] : 0, + pm_api_arg[3] ? pm_api_arg[3] : + ZYNQMP_PM_REQUEST_ACK_BLOCKING); + break; + case PM_RELEASE_NODE: + ret = zynqmp_pm_release_node(pm_api_arg[0]); + break; + case PM_SET_REQUIREMENT: + ret = zynqmp_pm_set_requirement(pm_api_arg[0], + pm_api_arg[1] ? pm_api_arg[1] : + ZYNQMP_PM_CAPABILITY_CONTEXT, + pm_api_arg[2] ? + pm_api_arg[2] : 0, + pm_api_arg[3] ? pm_api_arg[3] : + ZYNQMP_PM_REQUEST_ACK_BLOCKING); + break; + case PM_REGISTER_NOTIFIER: + ret = zynqmp_pm_register_notifier(pm_api_arg[0], + pm_api_arg[1] ? + pm_api_arg[1] : 0, + pm_api_arg[2] ? + pm_api_arg[2] : 0, + pm_api_arg[3] ? + pm_api_arg[3] : 0); + break; + case PM_RESET_ASSERT: + ret = zynqmp_pm_reset_assert(pm_api_arg[0], pm_api_arg[1]); + break; + case PM_RESET_GET_STATUS: + ret = zynqmp_pm_reset_get_status(pm_api_arg[0], &pm_api_ret[0]); + if (!ret) + sprintf(debugfs_buf, "Reset status: %u\n", + pm_api_ret[0]); + break; + case PM_GET_CHIPID: + ret = zynqmp_pm_get_chipid(&pm_api_ret[0], &pm_api_ret[1]); + if (!ret) + sprintf(debugfs_buf, "Idcode: %#x, Version:%#x\n", + pm_api_ret[0], pm_api_ret[1]); + break; + case PM_PINCTRL_SET_FUNCTION: + ret = zynqmp_pm_pinctrl_set_function(pm_api_arg[0], + pm_api_arg[1]); + break; + case PM_PINCTRL_CONFIG_PARAM_GET: + ret = zynqmp_pm_pinctrl_get_config(pm_api_arg[0], pm_api_arg[1], + &pm_api_ret[0]); + if (!ret) + sprintf(debugfs_buf, + "Pin: %llu, Param: %llu, Value: %u\n", + pm_api_arg[0], pm_api_arg[1], + pm_api_ret[0]); + break; + case PM_PINCTRL_CONFIG_PARAM_SET: + ret = zynqmp_pm_pinctrl_set_config(pm_api_arg[0], + pm_api_arg[1], + pm_api_arg[2]); + break; + case PM_IOCTL: + ret = zynqmp_pm_ioctl(pm_api_arg[0], pm_api_arg[1], + pm_api_arg[2], pm_api_arg[3], + pm_api_arg[4], &pm_api_ret[0]); + if (!ret && (pm_api_arg[1] == IOCTL_GET_RPU_OPER_MODE || + pm_api_arg[1] == IOCTL_GET_PLL_FRAC_MODE || + pm_api_arg[1] == IOCTL_GET_PLL_FRAC_DATA || + pm_api_arg[1] == IOCTL_READ_GGS || + pm_api_arg[1] == IOCTL_READ_PGGS || + pm_api_arg[1] == IOCTL_READ_REG)) + sprintf(debugfs_buf, "IOCTL return value: %u\n", + pm_api_ret[1]); + if (!ret && pm_api_arg[1] == IOCTL_GET_QOS) + sprintf(debugfs_buf, "Default QoS: %u\nCurrent QoS: %u\n", + pm_api_ret[1], pm_api_ret[2]); + break; + case PM_CLOCK_ENABLE: + ret = zynqmp_pm_clock_enable(pm_api_arg[0]); + break; + case PM_CLOCK_DISABLE: + ret = zynqmp_pm_clock_disable(pm_api_arg[0]); + break; + case PM_CLOCK_GETSTATE: + ret = zynqmp_pm_clock_getstate(pm_api_arg[0], &pm_api_ret[0]); + if (!ret) + sprintf(debugfs_buf, "Clock state: %u\n", + pm_api_ret[0]); + break; + case PM_CLOCK_SETDIVIDER: + ret = zynqmp_pm_clock_setdivider(pm_api_arg[0], pm_api_arg[1]); + break; + case PM_CLOCK_GETDIVIDER: + ret = zynqmp_pm_clock_getdivider(pm_api_arg[0], &pm_api_ret[0]); + if (!ret) + sprintf(debugfs_buf, "Divider Value: %d\n", + pm_api_ret[0]); + break; + case PM_CLOCK_SETPARENT: + ret = zynqmp_pm_clock_setparent(pm_api_arg[0], pm_api_arg[1]); + break; + case PM_CLOCK_GETPARENT: + ret = zynqmp_pm_clock_getparent(pm_api_arg[0], &pm_api_ret[0]); + if (!ret) + sprintf(debugfs_buf, + "Clock parent Index: %u\n", pm_api_ret[0]); + break; case PM_QUERY_DATA: qdata.qid = pm_api_arg[0]; qdata.arg1 = pm_api_arg[1]; @@ -150,7 +310,7 @@ static ssize_t zynqmp_pm_debugfs_api_write(struct file *file, char *kern_buff, *tmp_buff; char *pm_api_req; u32 pm_id = 0; - u64 pm_api_arg[4] = {0, 0, 0, 0}; + u64 pm_api_arg[5] = {0, 0, 0, 0, 0}; /* Return values from PM APIs calls */ u32 pm_api_ret[4] = {0, 0, 0, 0}; diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index d7d07afc0532..563382cf16f2 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -218,9 +218,13 @@ enum pm_ioctl_id { /* Runtime feature configuration */ IOCTL_SET_FEATURE_CONFIG = 26, IOCTL_GET_FEATURE_CONFIG = 27, + /* IOCTL for Secure Read/Write Interface */ + IOCTL_READ_REG = 28, /* Dynamic SD/GEM configuration */ IOCTL_SET_SD_CONFIG = 30, IOCTL_SET_GEM_CONFIG = 31, + /* IOCTL to get default/current QoS */ + IOCTL_GET_QOS = 34, }; enum pm_query_id { -- cgit v1.2.3 From f33d6099edf78e3c97900c0173fedbfecc025a9e Mon Sep 17 00:00:00 2001 From: Ronak Jain Date: Fri, 30 Aug 2024 03:00:42 -0700 Subject: firmware: xilinx: use u32 for reset ID in reset APIs Refactors the reset handling mechanisms by replacing the reset ID's enum type with a u32. This update improves flexibility, allowing the reset ID to accommodate a broader range of values, including those that may not fit into predefined enum values. The use of u32 for reset ID enhances extensibility, especially for hardware platforms or features where more granular control of reset operations is required. By shifting to a general integer type, this change reduces constraints and simplifies integration with other system components that rely on non-enum-based reset IDs. Signed-off-by: Ronak Jain Link: https://lore.kernel.org/r/20240830100042.3163511-1-ronak.jain@amd.com Signed-off-by: Michal Simek --- drivers/firmware/xilinx/zynqmp.c | 4 ++-- include/linux/firmware/xlnx-zynqmp.h | 9 ++++----- 2 files changed, 6 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c index add8acf66a9c..c8be32d9c6af 100644 --- a/drivers/firmware/xilinx/zynqmp.c +++ b/drivers/firmware/xilinx/zynqmp.c @@ -920,7 +920,7 @@ int zynqmp_pm_set_boot_health_status(u32 value) * * Return: Returns status, either success or error+reason */ -int zynqmp_pm_reset_assert(const enum zynqmp_pm_reset reset, +int zynqmp_pm_reset_assert(const u32 reset, const enum zynqmp_pm_reset_action assert_flag) { return zynqmp_pm_invoke_fn(PM_RESET_ASSERT, NULL, 2, reset, assert_flag); @@ -934,7 +934,7 @@ EXPORT_SYMBOL_GPL(zynqmp_pm_reset_assert); * * Return: Returns status, either success or error+reason */ -int zynqmp_pm_reset_get_status(const enum zynqmp_pm_reset reset, u32 *status) +int zynqmp_pm_reset_get_status(const u32 reset, u32 *status) { u32 ret_payload[PAYLOAD_ARG_CNT]; int ret; diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 563382cf16f2..5b938fc2adad 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -557,9 +557,9 @@ int zynqmp_pm_get_pll_frac_data(u32 clk_id, u32 *data); int zynqmp_pm_set_sd_tapdelay(u32 node_id, u32 type, u32 value); int zynqmp_pm_sd_dll_reset(u32 node_id, u32 type); int zynqmp_pm_ospi_mux_select(u32 dev_id, u32 select); -int zynqmp_pm_reset_assert(const enum zynqmp_pm_reset reset, +int zynqmp_pm_reset_assert(const u32 reset, const enum zynqmp_pm_reset_action assert_flag); -int zynqmp_pm_reset_get_status(const enum zynqmp_pm_reset reset, u32 *status); +int zynqmp_pm_reset_get_status(const u32 reset, u32 *status); unsigned int zynqmp_pm_bootmode_read(u32 *ps_mode); int zynqmp_pm_bootmode_write(u32 ps_mode); int zynqmp_pm_init_finalize(void); @@ -702,14 +702,13 @@ static inline int zynqmp_pm_ospi_mux_select(u32 dev_id, u32 select) return -ENODEV; } -static inline int zynqmp_pm_reset_assert(const enum zynqmp_pm_reset reset, +static inline int zynqmp_pm_reset_assert(const u32 reset, const enum zynqmp_pm_reset_action assert_flag) { return -ENODEV; } -static inline int zynqmp_pm_reset_get_status(const enum zynqmp_pm_reset reset, - u32 *status) +static inline int zynqmp_pm_reset_get_status(const u32 reset, u32 *status) { return -ENODEV; } -- cgit v1.2.3 From 92fb71333d5737d0296fb968a653dfda4b225175 Mon Sep 17 00:00:00 2001 From: Ronak Jain Date: Thu, 19 Sep 2024 22:55:01 -0700 Subject: firmware: xilinx: add support for new SMC call format Added zynqmp_pm_invoke_fw_fn() to use new SMC format in which lower 12 bits of SMC id are fixed and firmware header is moved to subsequent SMC arguments. The new SMC format supports full request and response buffers. Added zynqmp_pm_get_sip_svc_version() to get SiP SVC version number to check if TF-A is newer or older and use the SMC format accordingly to handle backward compatibility. Used new SMC format for PM_QUERY_DATA API as more response values are required in it. Signed-off-by: Ronak Jain Signed-off-by: Jay Buddhabhatti Link: https://lore.kernel.org/r/20240920055501.2658642-1-ronak.jain@amd.com Signed-off-by: Michal Simek --- drivers/firmware/xilinx/zynqmp.c | 137 ++++++++++++++++++++++++++++++++++- include/linux/firmware/xlnx-zynqmp.h | 26 ++++++- 2 files changed, 157 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c index 920aba92feee..5022bd717054 100644 --- a/drivers/firmware/xilinx/zynqmp.c +++ b/drivers/firmware/xilinx/zynqmp.c @@ -3,7 +3,7 @@ * Xilinx Zynq MPSoC Firmware layer * * Copyright (C) 2014-2022 Xilinx, Inc. - * Copyright (C) 2022 - 2023, Advanced Micro Devices, Inc. + * Copyright (C) 2022 - 2024, Advanced Micro Devices, Inc. * * Michal Simek * Davorin Mista @@ -46,6 +46,7 @@ static DEFINE_HASHTABLE(pm_api_features_map, PM_API_FEATURE_CHECK_MAX_ORDER); static u32 ioctl_features[FEATURE_PAYLOAD_SIZE]; static u32 query_features[FEATURE_PAYLOAD_SIZE]; +static u32 sip_svc_version; static struct platform_device *em_dev; /** @@ -151,6 +152,9 @@ static noinline int do_fw_call_smc(u32 *ret_payload, u32 num_args, ...) ret_payload[1] = upper_32_bits(res.a0); ret_payload[2] = lower_32_bits(res.a1); ret_payload[3] = upper_32_bits(res.a1); + ret_payload[4] = lower_32_bits(res.a2); + ret_payload[5] = upper_32_bits(res.a2); + ret_payload[6] = lower_32_bits(res.a3); } return zynqmp_pm_ret_code((enum pm_ret_status)res.a0); @@ -191,6 +195,9 @@ static noinline int do_fw_call_hvc(u32 *ret_payload, u32 num_args, ...) ret_payload[1] = upper_32_bits(res.a0); ret_payload[2] = lower_32_bits(res.a1); ret_payload[3] = upper_32_bits(res.a1); + ret_payload[4] = lower_32_bits(res.a2); + ret_payload[5] = upper_32_bits(res.a2); + ret_payload[6] = lower_32_bits(res.a3); } return zynqmp_pm_ret_code((enum pm_ret_status)res.a0); @@ -331,6 +338,70 @@ int zynqmp_pm_is_function_supported(const u32 api_id, const u32 id) } EXPORT_SYMBOL_GPL(zynqmp_pm_is_function_supported); +/** + * zynqmp_pm_invoke_fw_fn() - Invoke the system-level platform management layer + * caller function depending on the configuration + * @pm_api_id: Requested PM-API call + * @ret_payload: Returned value array + * @num_args: Number of arguments to requested PM-API call + * + * Invoke platform management function for SMC or HVC call, depending on + * configuration. + * Following SMC Calling Convention (SMCCC) for SMC64: + * Pm Function Identifier, + * PM_SIP_SVC + PASS_THROUGH_FW_CMD_ID = + * ((SMC_TYPE_FAST << FUNCID_TYPE_SHIFT) + * ((SMC_64) << FUNCID_CC_SHIFT) + * ((SIP_START) << FUNCID_OEN_SHIFT) + * (PASS_THROUGH_FW_CMD_ID)) + * + * PM_SIP_SVC - Registered ZynqMP SIP Service Call. + * PASS_THROUGH_FW_CMD_ID - Fixed SiP SVC call ID for FW specific calls. + * + * Return: Returns status, either success or error+reason + */ +int zynqmp_pm_invoke_fw_fn(u32 pm_api_id, u32 *ret_payload, u32 num_args, ...) +{ + /* + * Added SIP service call Function Identifier + * Make sure to stay in x0 register + */ + u64 smc_arg[SMC_ARG_CNT_64]; + int ret, i; + va_list arg_list; + u32 args[SMC_ARG_CNT_32] = {0}; + u32 module_id; + + if (num_args > SMC_ARG_CNT_32) + return -EINVAL; + + va_start(arg_list, num_args); + + /* Check if feature is supported or not */ + ret = zynqmp_pm_feature(pm_api_id); + if (ret < 0) + return ret; + + for (i = 0; i < num_args; i++) + args[i] = va_arg(arg_list, u32); + + va_end(arg_list); + + module_id = FIELD_GET(PLM_MODULE_ID_MASK, pm_api_id); + + if (module_id == 0) + module_id = XPM_MODULE_ID; + + smc_arg[0] = PM_SIP_SVC | PASS_THROUGH_FW_CMD_ID; + smc_arg[1] = ((u64)args[0] << 32U) | FIELD_PREP(PLM_MODULE_ID_MASK, module_id) | + (pm_api_id & API_ID_MASK); + for (i = 1; i < (SMC_ARG_CNT_64 - 1); i++) + smc_arg[i + 1] = ((u64)args[(i * 2)] << 32U) | args[(i * 2) - 1]; + + return do_fw_call(ret_payload, 8, smc_arg[0], smc_arg[1], smc_arg[2], smc_arg[3], + smc_arg[4], smc_arg[5], smc_arg[6], smc_arg[7]); +} + /** * zynqmp_pm_invoke_fn() - Invoke the system-level platform management layer * caller function depending on the configuration @@ -488,6 +559,35 @@ int zynqmp_pm_get_family_info(u32 *family, u32 *subfamily) } EXPORT_SYMBOL_GPL(zynqmp_pm_get_family_info); +/** + * zynqmp_pm_get_sip_svc_version() - Get SiP service call version + * @version: Returned version value + * + * Return: Returns status, either success or error+reason + */ +static int zynqmp_pm_get_sip_svc_version(u32 *version) +{ + struct arm_smccc_res res; + u64 args[SMC_ARG_CNT_64] = {0}; + + if (!version) + return -EINVAL; + + /* Check if SiP SVC version already verified */ + if (sip_svc_version > 0) { + *version = sip_svc_version; + return 0; + } + + args[0] = GET_SIP_SVC_VERSION; + + arm_smccc_smc(args[0], args[1], args[2], args[3], args[4], args[5], args[6], args[7], &res); + + *version = ((lower_32_bits(res.a0) << 16U) | lower_32_bits(res.a1)); + + return zynqmp_pm_ret_code(XST_PM_SUCCESS); +} + /** * zynqmp_pm_get_trustzone_version() - Get secure trustzone firmware version * @version: Returned version value @@ -552,10 +652,34 @@ static int get_set_conduit_method(struct device_node *np) */ int zynqmp_pm_query_data(struct zynqmp_pm_query_data qdata, u32 *out) { - int ret; + int ret, i = 0; + u32 ret_payload[PAYLOAD_ARG_CNT] = {0}; + + if (sip_svc_version >= SIP_SVC_PASSTHROUGH_VERSION) { + ret = zynqmp_pm_invoke_fw_fn(PM_QUERY_DATA, ret_payload, 4, + qdata.qid, qdata.arg1, + qdata.arg2, qdata.arg3); + /* To support backward compatibility */ + if (!ret && !ret_payload[0]) { + /* + * TF-A passes return status on 0th index but + * api to get clock name reads data from 0th + * index so pass data at 0th index instead of + * return status + */ + if (qdata.qid == PM_QID_CLOCK_GET_NAME || + qdata.qid == PM_QID_PINCTRL_GET_FUNCTION_NAME) + i = 1; + + for (; i < PAYLOAD_ARG_CNT; i++, out++) + *out = ret_payload[i]; - ret = zynqmp_pm_invoke_fn(PM_QUERY_DATA, out, 4, qdata.qid, qdata.arg1, qdata.arg2, - qdata.arg3); + return ret; + } + } + + ret = zynqmp_pm_invoke_fn(PM_QUERY_DATA, out, 4, qdata.qid, + qdata.arg1, qdata.arg2, qdata.arg3); /* * For clock name query, all bytes in SMC response are clock name @@ -1890,6 +2014,11 @@ static int zynqmp_firmware_probe(struct platform_device *pdev) if (ret) return ret; + /* Get SiP SVC version number */ + ret = zynqmp_pm_get_sip_svc_version(&sip_svc_version); + if (ret) + return ret; + ret = do_feature_check_call(PM_FEATURE_CHECK); if (ret >= 0 && ((ret & FIRMWARE_VERSION_MASK) >= PM_API_VERSION_1)) feature_check_enabled = true; diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 5b938fc2adad..76d85ad82ec0 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -3,7 +3,7 @@ * Xilinx Zynq MPSoC Firmware layer * * Copyright (C) 2014-2021 Xilinx - * Copyright (C) 2022 - 2023, Advanced Micro Devices, Inc. + * Copyright (C) 2022 - 2024, Advanced Micro Devices, Inc. * * Michal Simek * Davorin Mista @@ -32,6 +32,19 @@ /* SMC SIP service Call Function Identifier Prefix */ #define PM_SIP_SVC 0xC2000000 +/* SMC function ID to get SiP SVC version */ +#define GET_SIP_SVC_VERSION (0x8200ff03U) + +/* SiP Service Calls version numbers */ +#define SIP_SVC_VERSION_MAJOR (0U) +#define SIP_SVC_VERSION_MINOR (2U) + +#define SIP_SVC_PASSTHROUGH_VERSION ((SIP_SVC_VERSION_MAJOR << 16) | \ + SIP_SVC_VERSION_MINOR) + +/* Fixed ID for FW specific APIs */ +#define PASS_THROUGH_FW_CMD_ID GENMASK(11, 0) + /* PM API versions */ #define PM_API_VERSION_1 1 #define PM_API_VERSION_2 2 @@ -51,6 +64,7 @@ #define API_ID_MASK GENMASK(7, 0) #define MODULE_ID_MASK GENMASK(11, 8) +#define PLM_MODULE_ID_MASK GENMASK(15, 8) /* Firmware feature check version mask */ #define FIRMWARE_VERSION_MASK 0xFFFFU @@ -62,7 +76,13 @@ #define GET_CALLBACK_DATA 0xa01 /* Number of 32bits values in payload */ -#define PAYLOAD_ARG_CNT 4U +#define PAYLOAD_ARG_CNT 7U + +/* Number of 64bits arguments for SMC call */ +#define SMC_ARG_CNT_64 8U + +/* Number of 32bits arguments for SMC call */ +#define SMC_ARG_CNT_32 13U /* Number of arguments for a callback */ #define CB_ARG_CNT 4 @@ -130,6 +150,7 @@ enum pm_module_id { PM_MODULE_ID = 0x0, + XPM_MODULE_ID = 0x2, XSEM_MODULE_ID = 0x3, TF_A_MODULE_ID = 0xa, }; @@ -537,6 +558,7 @@ struct zynqmp_pm_query_data { }; int zynqmp_pm_invoke_fn(u32 pm_api_id, u32 *ret_payload, u32 num_args, ...); +int zynqmp_pm_invoke_fw_fn(u32 pm_api_id, u32 *ret_payload, u32 num_args, ...); #if IS_REACHABLE(CONFIG_ZYNQMP_FIRMWARE) int zynqmp_pm_get_api_version(u32 *version); -- cgit v1.2.3 From a849a0273d0f73a252d14d31c5003a8059ea51fc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 11 Sep 2024 15:17:37 +0200 Subject: ntp: Remove unused tick_nsec tick_nsec is only updated in the NTP core, but there are no users. Remove it. Signed-off-by: Thomas Gleixner Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/all/20240911-devel-anna-maria-b4-timers-ptp-ntp-v1-1-2d52f4e13476@linutronix.de --- arch/x86/include/asm/timer.h | 2 -- include/linux/timex.h | 1 - kernel/time/ntp.c | 8 ++------ 3 files changed, 2 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index 7365dd4acffb..23baf8c9b34c 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h @@ -6,8 +6,6 @@ #include #include -#define TICK_SIZE (tick_nsec / 1000) - unsigned long long native_sched_clock(void); extern void recalibrate_cpu_khz(void); diff --git a/include/linux/timex.h b/include/linux/timex.h index 3871b06bd302..7f7a12fd8200 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -145,7 +145,6 @@ unsigned long random_get_entropy_fallback(void); * estimated error = NTP dispersion. */ extern unsigned long tick_usec; /* USER_HZ period (usec) */ -extern unsigned long tick_nsec; /* SHIFTED_HZ period (nsec) */ /* Required to safely shift negative values */ #define shift_right(x, s) ({ \ diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 802b336f4b8c..c17cc9d857bc 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -33,9 +33,6 @@ /* USER_HZ period (usecs): */ unsigned long tick_usec = USER_TICK_USEC; -/* SHIFTED_HZ period (nsecs): */ -unsigned long tick_nsec; - static u64 tick_length; static u64 tick_length_base; @@ -253,8 +250,8 @@ static inline int ntp_synced(void) */ /* - * Update (tick_length, tick_length_base, tick_nsec), based - * on (tick_usec, ntp_tick_adj, time_freq): + * Update tick_length and tick_length_base, based on tick_usec, ntp_tick_adj and + * time_freq: */ static void ntp_update_frequency(void) { @@ -267,7 +264,6 @@ static void ntp_update_frequency(void) second_length += ntp_tick_adj; second_length += time_freq; - tick_nsec = div_u64(second_length, HZ) >> NTP_SCALE_SHIFT; new_base = div_u64(second_length, NTP_INTERVAL_FREQ); /* -- cgit v1.2.3 From 66606a93849bfe3cbe9f0b801b40f60b87c54e11 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 11 Sep 2024 15:17:38 +0200 Subject: ntp: Make tick_usec static There are no users of tick_usec outside of the NTP core code. Therefore make tick_usec static. Signed-off-by: Thomas Gleixner Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/all/20240911-devel-anna-maria-b4-timers-ptp-ntp-v1-2-2d52f4e13476@linutronix.de --- include/linux/timex.h | 7 ------- kernel/time/ntp.c | 5 ++++- 2 files changed, 4 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/timex.h b/include/linux/timex.h index 7f7a12fd8200..4ee32eff3f22 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -139,13 +139,6 @@ unsigned long random_get_entropy_fallback(void); #define MAXSEC 2048 /* max interval between updates (s) */ #define NTP_PHASE_LIMIT ((MAXPHASE / NSEC_PER_USEC) << 5) /* beyond max. dispersion */ -/* - * kernel variables - * Note: maximum error = NTP sync distance = dispersion + delay / 2; - * estimated error = NTP dispersion. - */ -extern unsigned long tick_usec; /* USER_HZ period (usec) */ - /* Required to safely shift negative values */ #define shift_right(x, s) ({ \ __typeof__(x) __x = (x); \ diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index c17cc9d857bc..ed15ec993a82 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -31,7 +31,7 @@ /* USER_HZ period (usecs): */ -unsigned long tick_usec = USER_TICK_USEC; +static unsigned long tick_usec = USER_TICK_USEC; static u64 tick_length; static u64 tick_length_base; @@ -44,6 +44,9 @@ static u64 tick_length_base; /* * phase-lock loop variables + * + * Note: maximum error = NTP sync distance = dispersion + delay / 2; + * estimated error = NTP dispersion. */ /* -- cgit v1.2.3 From 8102c4daf44ab86c2d2226a8136bec905d6e2bd1 Mon Sep 17 00:00:00 2001 From: Vincent Donnefort Date: Wed, 11 Sep 2024 10:30:20 +0100 Subject: timekeeping: Add the boot clock to system time snapshot For tracing purpose, the boot clock is interesting as it doesn't stop on suspend. Export it as part of the time snapshot. This will later allow the hypervisor to add boot clock timestamps to its events. Signed-off-by: Vincent Donnefort Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/all/20240911093029.3279154-5-vdonnefort@google.com --- include/linux/timekeeping.h | 2 ++ kernel/time/timekeeping.c | 4 ++++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index fc12a9ba2c88..e85c27347e44 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -275,6 +275,7 @@ struct ktime_timestamps { * counter value * @cycles: Clocksource counter value to produce the system times * @real: Realtime system time + * @boot: Boot time * @raw: Monotonic raw system time * @cs_id: Clocksource ID * @clock_was_set_seq: The sequence number of clock-was-set events @@ -283,6 +284,7 @@ struct ktime_timestamps { struct system_time_snapshot { u64 cycles; ktime_t real; + ktime_t boot; ktime_t raw; enum clocksource_ids cs_id; unsigned int clock_was_set_seq; diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 7e6f409bf311..47e44b9d2671 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1060,6 +1060,7 @@ void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot) unsigned int seq; ktime_t base_raw; ktime_t base_real; + ktime_t base_boot; u64 nsec_raw; u64 nsec_real; u64 now; @@ -1074,6 +1075,8 @@ void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot) systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq; base_real = ktime_add(tk->tkr_mono.base, tk_core.timekeeper.offs_real); + base_boot = ktime_add(tk->tkr_mono.base, + tk_core.timekeeper.offs_boot); base_raw = tk->tkr_raw.base; nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, now); nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw, now); @@ -1081,6 +1084,7 @@ void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot) systime_snapshot->cycles = now; systime_snapshot->real = ktime_add_ns(base_real, nsec_real); + systime_snapshot->boot = ktime_add_ns(base_boot, nsec_real); systime_snapshot->raw = ktime_add_ns(base_raw, nsec_raw); } EXPORT_SYMBOL_GPL(ktime_get_snapshot); -- cgit v1.2.3 From f69767a1ada3ac74be2e1ac0795a05e1d1384eff Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 2 Oct 2024 11:59:50 -0500 Subject: PCI: Add TLP Processing Hints (TPH) support Add support for PCIe TLP Processing Hints (TPH) support (see PCIe r6.2, sec 6.17). Add TPH register definitions in pci_regs.h, including the TPH Requester capability register, TPH Requester control register, TPH Completer capability, and the ST fields of MSI-X entry. Introduce pcie_enable_tph() and pcie_disable_tph(), enabling drivers to toggle TPH support and configure specific ST mode as needed. Also add a new kernel parameter, "pci=notph", allowing users to disable TPH support across the entire system. Link: https://lore.kernel.org/r/20241002165954.128085-2-wei.huang2@amd.com Co-developed-by: Jing Liu Co-developed-by: Paul Luse Co-developed-by: Eric Van Tassell Signed-off-by: Jing Liu Signed-off-by: Paul Luse Signed-off-by: Eric Van Tassell Signed-off-by: Wei Huang Signed-off-by: Bjorn Helgaas Reviewed-by: Ajit Khaparde Reviewed-by: Somnath Kotur Reviewed-by: Andy Gospodarek Reviewed-by: Jonathan Cameron Reviewed-by: Lukas Wunner --- Documentation/admin-guide/kernel-parameters.txt | 4 + drivers/pci/Kconfig | 9 ++ drivers/pci/Makefile | 1 + drivers/pci/pci.c | 4 + drivers/pci/pci.h | 12 ++ drivers/pci/probe.c | 1 + drivers/pci/tph.c | 197 ++++++++++++++++++++++++ include/linux/pci-tph.h | 21 +++ include/linux/pci.h | 7 + include/uapi/linux/pci_regs.h | 37 ++++- 10 files changed, 285 insertions(+), 8 deletions(-) create mode 100644 drivers/pci/tph.c create mode 100644 include/linux/pci-tph.h (limited to 'include') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 1518343bbe22..178995b07451 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4678,6 +4678,10 @@ nomio [S390] Do not use MIO instructions. norid [S390] ignore the RID field and force use of one PCI domain per PCI function + notph [PCIE] If the PCIE_TPH kernel config parameter + is enabled, this kernel boot option can be used + to disable PCIe TLP Processing Hints support + system-wide. pcie_aspm= [PCIE] Forcibly enable or ignore PCIe Active State Power Management. diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 0d94e4a967d8..2f270e4414b3 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -173,6 +173,15 @@ config PCI_PASID If unsure, say N. +config PCIE_TPH + bool "TLP Processing Hints" + help + This option adds support for PCIe TLP Processing Hints (TPH). + TPH allows endpoint devices to provide optimization hints, such as + desired caching behavior, for requests that target memory space. + These hints, called Steering Tags, can empower the system hardware + to optimize the utilization of platform resources. + config PCI_P2PDMA bool "PCI peer-to-peer transfer support" depends on ZONE_DEVICE diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index 374c5c06d92f..b2a100f2e24a 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -36,6 +36,7 @@ obj-$(CONFIG_VGA_ARB) += vgaarb.o obj-$(CONFIG_PCI_DOE) += doe.o obj-$(CONFIG_PCI_DYNAMIC_OF_NODES) += of_property.o obj-$(CONFIG_PCI_NPEM) += npem.o +obj-$(CONFIG_PCIE_TPH) += tph.o # Endpoint library must be initialized before its users obj-$(CONFIG_PCI_ENDPOINT) += endpoint/ diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 7d85c04fbba2..89dafecc869b 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1828,6 +1828,7 @@ int pci_save_state(struct pci_dev *dev) pci_save_dpc_state(dev); pci_save_aer_state(dev); pci_save_ptm_state(dev); + pci_save_tph_state(dev); return pci_save_vc_state(dev); } EXPORT_SYMBOL(pci_save_state); @@ -1933,6 +1934,7 @@ void pci_restore_state(struct pci_dev *dev) pci_restore_rebar_state(dev); pci_restore_dpc_state(dev); pci_restore_ptm_state(dev); + pci_restore_tph_state(dev); pci_aer_clear_status(dev); pci_restore_aer_state(dev); @@ -6896,6 +6898,8 @@ static int __init pci_setup(char *str) pci_no_domains(); } else if (!strncmp(str, "noari", 5)) { pcie_ari_disabled = true; + } else if (!strncmp(str, "notph", 5)) { + pci_no_tph(); } else if (!strncmp(str, "cbiosize=", 9)) { pci_cardbus_io_size = memparse(str + 9, &str); } else if (!strncmp(str, "cbmemsize=", 10)) { diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 14d00ce45bfa..d89fdbf04f36 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -597,6 +597,18 @@ static inline int pci_iov_bus_range(struct pci_bus *bus) #endif /* CONFIG_PCI_IOV */ +#ifdef CONFIG_PCIE_TPH +void pci_restore_tph_state(struct pci_dev *dev); +void pci_save_tph_state(struct pci_dev *dev); +void pci_no_tph(void); +void pci_tph_init(struct pci_dev *dev); +#else +static inline void pci_restore_tph_state(struct pci_dev *dev) { } +static inline void pci_save_tph_state(struct pci_dev *dev) { } +static inline void pci_no_tph(void) { } +static inline void pci_tph_init(struct pci_dev *dev) { } +#endif + #ifdef CONFIG_PCIE_PTM void pci_ptm_init(struct pci_dev *dev); void pci_save_ptm_state(struct pci_dev *dev); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 4f68414c3086..b086d53a9048 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -2495,6 +2495,7 @@ static void pci_init_capabilities(struct pci_dev *dev) pci_dpc_init(dev); /* Downstream Port Containment */ pci_rcec_init(dev); /* Root Complex Event Collector */ pci_doe_init(dev); /* Data Object Exchange */ + pci_tph_init(dev); /* TLP Processing Hints */ pcie_report_downtraining(dev); pci_init_reset_methods(dev); diff --git a/drivers/pci/tph.c b/drivers/pci/tph.c new file mode 100644 index 000000000000..4d6317cbf8a6 --- /dev/null +++ b/drivers/pci/tph.c @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * TPH (TLP Processing Hints) support + * + * Copyright (C) 2024 Advanced Micro Devices, Inc. + * Eric Van Tassell + * Wei Huang + */ +#include +#include +#include + +#include "pci.h" + +/* System-wide TPH disabled */ +static bool pci_tph_disabled; + +static u8 get_st_modes(struct pci_dev *pdev) +{ + u32 reg; + + pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CAP, ®); + reg &= PCI_TPH_CAP_ST_NS | PCI_TPH_CAP_ST_IV | PCI_TPH_CAP_ST_DS; + + return reg; +} + +/* Return device's Root Port completer capability */ +static u8 get_rp_completer_type(struct pci_dev *pdev) +{ + struct pci_dev *rp; + u32 reg; + int ret; + + rp = pcie_find_root_port(pdev); + if (!rp) + return 0; + + ret = pcie_capability_read_dword(rp, PCI_EXP_DEVCAP2, ®); + if (ret) + return 0; + + return FIELD_GET(PCI_EXP_DEVCAP2_TPH_COMP_MASK, reg); +} + +/** + * pcie_disable_tph - Turn off TPH support for device + * @pdev: PCI device + * + * Return: none + */ +void pcie_disable_tph(struct pci_dev *pdev) +{ + if (!pdev->tph_cap) + return; + + if (!pdev->tph_enabled) + return; + + pci_write_config_dword(pdev, pdev->tph_cap + PCI_TPH_CTRL, 0); + + pdev->tph_mode = 0; + pdev->tph_req_type = 0; + pdev->tph_enabled = 0; +} +EXPORT_SYMBOL(pcie_disable_tph); + +/** + * pcie_enable_tph - Enable TPH support for device using a specific ST mode + * @pdev: PCI device + * @mode: ST mode to enable. Current supported modes include: + * + * - PCI_TPH_ST_NS_MODE: NO ST Mode + * - PCI_TPH_ST_IV_MODE: Interrupt Vector Mode + * - PCI_TPH_ST_DS_MODE: Device Specific Mode + * + * Check whether the mode is actually supported by the device before enabling + * and return an error if not. Additionally determine what types of requests, + * TPH or extended TPH, can be issued by the device based on its TPH requester + * capability and the Root Port's completer capability. + * + * Return: 0 on success, otherwise negative value (-errno) + */ +int pcie_enable_tph(struct pci_dev *pdev, int mode) +{ + u32 reg; + u8 dev_modes; + u8 rp_req_type; + + /* Honor "notph" kernel parameter */ + if (pci_tph_disabled) + return -EINVAL; + + if (!pdev->tph_cap) + return -EINVAL; + + if (pdev->tph_enabled) + return -EBUSY; + + /* Sanitize and check ST mode compatibility */ + mode &= PCI_TPH_CTRL_MODE_SEL_MASK; + dev_modes = get_st_modes(pdev); + if (!((1 << mode) & dev_modes)) + return -EINVAL; + + pdev->tph_mode = mode; + + /* Get req_type supported by device and its Root Port */ + pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CAP, ®); + if (FIELD_GET(PCI_TPH_CAP_EXT_TPH, reg)) + pdev->tph_req_type = PCI_TPH_REQ_EXT_TPH; + else + pdev->tph_req_type = PCI_TPH_REQ_TPH_ONLY; + + rp_req_type = get_rp_completer_type(pdev); + + /* Final req_type is the smallest value of two */ + pdev->tph_req_type = min(pdev->tph_req_type, rp_req_type); + + if (pdev->tph_req_type == PCI_TPH_REQ_DISABLE) + return -EINVAL; + + /* Write them into TPH control register */ + pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CTRL, ®); + + reg &= ~PCI_TPH_CTRL_MODE_SEL_MASK; + reg |= FIELD_PREP(PCI_TPH_CTRL_MODE_SEL_MASK, pdev->tph_mode); + + reg &= ~PCI_TPH_CTRL_REQ_EN_MASK; + reg |= FIELD_PREP(PCI_TPH_CTRL_REQ_EN_MASK, pdev->tph_req_type); + + pci_write_config_dword(pdev, pdev->tph_cap + PCI_TPH_CTRL, reg); + + pdev->tph_enabled = 1; + + return 0; +} +EXPORT_SYMBOL(pcie_enable_tph); + +void pci_restore_tph_state(struct pci_dev *pdev) +{ + struct pci_cap_saved_state *save_state; + u32 *cap; + + if (!pdev->tph_cap) + return; + + if (!pdev->tph_enabled) + return; + + save_state = pci_find_saved_ext_cap(pdev, PCI_EXT_CAP_ID_TPH); + if (!save_state) + return; + + /* Restore control register and all ST entries */ + cap = &save_state->cap.data[0]; + pci_write_config_dword(pdev, pdev->tph_cap + PCI_TPH_CTRL, *cap++); +} + +void pci_save_tph_state(struct pci_dev *pdev) +{ + struct pci_cap_saved_state *save_state; + u32 *cap; + + if (!pdev->tph_cap) + return; + + if (!pdev->tph_enabled) + return; + + save_state = pci_find_saved_ext_cap(pdev, PCI_EXT_CAP_ID_TPH); + if (!save_state) + return; + + /* Save control register */ + cap = &save_state->cap.data[0]; + pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CTRL, cap++); +} + +void pci_no_tph(void) +{ + pci_tph_disabled = true; + + pr_info("PCIe TPH is disabled\n"); +} + +void pci_tph_init(struct pci_dev *pdev) +{ + u32 save_size; + + pdev->tph_cap = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_TPH); + if (!pdev->tph_cap) + return; + + save_size = sizeof(u32); + pci_add_ext_cap_save_buffer(pdev, PCI_EXT_CAP_ID_TPH, save_size); +} diff --git a/include/linux/pci-tph.h b/include/linux/pci-tph.h new file mode 100644 index 000000000000..58654a334ffb --- /dev/null +++ b/include/linux/pci-tph.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * TPH (TLP Processing Hints) + * + * Copyright (C) 2024 Advanced Micro Devices, Inc. + * Eric Van Tassell + * Wei Huang + */ +#ifndef LINUX_PCI_TPH_H +#define LINUX_PCI_TPH_H + +#ifdef CONFIG_PCIE_TPH +void pcie_disable_tph(struct pci_dev *pdev); +int pcie_enable_tph(struct pci_dev *pdev, int mode); +#else +static inline void pcie_disable_tph(struct pci_dev *pdev) { } +static inline int pcie_enable_tph(struct pci_dev *pdev, int mode) +{ return -EINVAL; } +#endif + +#endif /* LINUX_PCI_TPH_H */ diff --git a/include/linux/pci.h b/include/linux/pci.h index 573b4c4c2be6..8351d76b6e12 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -434,6 +434,7 @@ struct pci_dev { unsigned int ats_enabled:1; /* Address Translation Svc */ unsigned int pasid_enabled:1; /* Process Address Space ID */ unsigned int pri_enabled:1; /* Page Request Interface */ + unsigned int tph_enabled:1; /* TLP Processing Hints */ unsigned int is_managed:1; /* Managed via devres */ unsigned int is_msi_managed:1; /* MSI release via devres installed */ unsigned int needs_freset:1; /* Requires fundamental reset */ @@ -534,6 +535,12 @@ struct pci_dev { /* These methods index pci_reset_fn_methods[] */ u8 reset_methods[PCI_NUM_RESET_METHODS]; /* In priority order */ + +#ifdef CONFIG_PCIE_TPH + u16 tph_cap; /* TPH capability offset */ + u8 tph_mode; /* TPH mode */ + u8 tph_req_type; /* TPH requester type */ +#endif }; static inline struct pci_dev *pci_physfn(struct pci_dev *dev) diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 12323b3334a9..155dea741615 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -340,7 +340,8 @@ #define PCI_MSIX_ENTRY_UPPER_ADDR 0x4 /* Message Upper Address */ #define PCI_MSIX_ENTRY_DATA 0x8 /* Message Data */ #define PCI_MSIX_ENTRY_VECTOR_CTRL 0xc /* Vector Control */ -#define PCI_MSIX_ENTRY_CTRL_MASKBIT 0x00000001 +#define PCI_MSIX_ENTRY_CTRL_MASKBIT 0x00000001 /* Mask Bit */ +#define PCI_MSIX_ENTRY_CTRL_ST 0xffff0000 /* Steering Tag */ /* CompactPCI Hotswap Register */ @@ -659,6 +660,7 @@ #define PCI_EXP_DEVCAP2_ATOMIC_COMP64 0x00000100 /* 64b AtomicOp completion */ #define PCI_EXP_DEVCAP2_ATOMIC_COMP128 0x00000200 /* 128b AtomicOp completion */ #define PCI_EXP_DEVCAP2_LTR 0x00000800 /* Latency tolerance reporting */ +#define PCI_EXP_DEVCAP2_TPH_COMP_MASK 0x00003000 /* TPH completer support */ #define PCI_EXP_DEVCAP2_OBFF_MASK 0x000c0000 /* OBFF support mechanism */ #define PCI_EXP_DEVCAP2_OBFF_MSG 0x00040000 /* New message signaling */ #define PCI_EXP_DEVCAP2_OBFF_WAKE 0x00080000 /* Re-use WAKE# for OBFF */ @@ -1023,15 +1025,34 @@ #define PCI_DPA_CAP_SUBSTATE_MASK 0x1F /* # substates - 1 */ #define PCI_DPA_BASE_SIZEOF 16 /* size with 0 substates */ +/* TPH Completer Support */ +#define PCI_EXP_DEVCAP2_TPH_COMP_NONE 0x0 /* None */ +#define PCI_EXP_DEVCAP2_TPH_COMP_TPH_ONLY 0x1 /* TPH only */ +#define PCI_EXP_DEVCAP2_TPH_COMP_EXT_TPH 0x3 /* TPH and Extended TPH */ + /* TPH Requester */ #define PCI_TPH_CAP 4 /* capability register */ -#define PCI_TPH_CAP_LOC_MASK 0x600 /* location mask */ -#define PCI_TPH_LOC_NONE 0x000 /* no location */ -#define PCI_TPH_LOC_CAP 0x200 /* in capability */ -#define PCI_TPH_LOC_MSIX 0x400 /* in MSI-X */ -#define PCI_TPH_CAP_ST_MASK 0x07FF0000 /* ST table mask */ -#define PCI_TPH_CAP_ST_SHIFT 16 /* ST table shift */ -#define PCI_TPH_BASE_SIZEOF 0xc /* size with no ST table */ +#define PCI_TPH_CAP_ST_NS 0x00000001 /* No ST Mode Supported */ +#define PCI_TPH_CAP_ST_IV 0x00000002 /* Interrupt Vector Mode Supported */ +#define PCI_TPH_CAP_ST_DS 0x00000004 /* Device Specific Mode Supported */ +#define PCI_TPH_CAP_EXT_TPH 0x00000100 /* Ext TPH Requester Supported */ +#define PCI_TPH_CAP_LOC_MASK 0x00000600 /* ST Table Location */ +#define PCI_TPH_LOC_NONE 0x00000000 /* Not present */ +#define PCI_TPH_LOC_CAP 0x00000200 /* In capability */ +#define PCI_TPH_LOC_MSIX 0x00000400 /* In MSI-X */ +#define PCI_TPH_CAP_ST_MASK 0x07FF0000 /* ST Table Size */ +#define PCI_TPH_CAP_ST_SHIFT 16 /* ST Table Size shift */ +#define PCI_TPH_BASE_SIZEOF 0xc /* Size with no ST table */ + +#define PCI_TPH_CTRL 8 /* control register */ +#define PCI_TPH_CTRL_MODE_SEL_MASK 0x00000007 /* ST Mode Select */ +#define PCI_TPH_ST_NS_MODE 0x0 /* No ST Mode */ +#define PCI_TPH_ST_IV_MODE 0x1 /* Interrupt Vector Mode */ +#define PCI_TPH_ST_DS_MODE 0x2 /* Device Specific Mode */ +#define PCI_TPH_CTRL_REQ_EN_MASK 0x00000300 /* TPH Requester Enable */ +#define PCI_TPH_REQ_DISABLE 0x0 /* No TPH requests allowed */ +#define PCI_TPH_REQ_TPH_ONLY 0x1 /* TPH only requests allowed */ +#define PCI_TPH_REQ_EXT_TPH 0x3 /* Extended TPH requests allowed */ /* Downstream Port Containment */ #define PCI_EXP_DPC_CAP 0x04 /* DPC Capability */ -- cgit v1.2.3 From d2e8a34876ce69b27f450eebfc550ab8e316f752 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 2 Oct 2024 11:59:51 -0500 Subject: PCI/TPH: Add Steering Tag support Add pcie_tph_get_cpu_st() to allow a caller to retrieve Steering Tags for a target memory associated with a specific CPU. The ST tag is retrieved by invoking PCI ACPI "_DSM to Query Cache Locality TPH Features" method (rev=0x7, func=0xF) of the device's Root Port device. Add pcie_tph_set_st_entry() to update the device's Steering Tags. The tags will be written into the device's MSI-X table or the ST table located in the TPH Extended Capability space. Co-developed-by: Eric Van Tassell Link: https://lore.kernel.org/r/20241002165954.128085-3-wei.huang2@amd.com Signed-off-by: Eric Van Tassell Signed-off-by: Wei Huang Signed-off-by: Bjorn Helgaas Reviewed-by: Ajit Khaparde Reviewed-by: Somnath Kotur Reviewed-by: Andy Gospodarek --- drivers/pci/tph.c | 352 +++++++++++++++++++++++++++++++++++++++++++++++- include/linux/pci-tph.h | 23 ++++ 2 files changed, 374 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/pci/tph.c b/drivers/pci/tph.c index 4d6317cbf8a6..1e604fbbda65 100644 --- a/drivers/pci/tph.c +++ b/drivers/pci/tph.c @@ -7,6 +7,8 @@ * Wei Huang */ #include +#include +#include #include #include @@ -15,6 +17,134 @@ /* System-wide TPH disabled */ static bool pci_tph_disabled; +#ifdef CONFIG_ACPI +/* + * The st_info struct defines the Steering Tag (ST) info returned by the + * firmware PCI ACPI _DSM method (rev=0x7, func=0xF, "_DSM to Query Cache + * Locality TPH Features"), as specified in the approved ECN for PCI Firmware + * Spec and available at https://members.pcisig.com/wg/PCI-SIG/document/15470. + * + * @vm_st_valid: 8-bit ST for volatile memory is valid + * @vm_xst_valid: 16-bit extended ST for volatile memory is valid + * @vm_ph_ignore: 1 => PH was and will be ignored, 0 => PH should be supplied + * @vm_st: 8-bit ST for volatile mem + * @vm_xst: 16-bit extended ST for volatile mem + * @pm_st_valid: 8-bit ST for persistent memory is valid + * @pm_xst_valid: 16-bit extended ST for persistent memory is valid + * @pm_ph_ignore: 1 => PH was and will be ignored, 0 => PH should be supplied + * @pm_st: 8-bit ST for persistent mem + * @pm_xst: 16-bit extended ST for persistent mem + */ +union st_info { + struct { + u64 vm_st_valid : 1; + u64 vm_xst_valid : 1; + u64 vm_ph_ignore : 1; + u64 rsvd1 : 5; + u64 vm_st : 8; + u64 vm_xst : 16; + u64 pm_st_valid : 1; + u64 pm_xst_valid : 1; + u64 pm_ph_ignore : 1; + u64 rsvd2 : 5; + u64 pm_st : 8; + u64 pm_xst : 16; + }; + u64 value; +}; + +static u16 tph_extract_tag(enum tph_mem_type mem_type, u8 req_type, + union st_info *info) +{ + switch (req_type) { + case PCI_TPH_REQ_TPH_ONLY: /* 8-bit tag */ + switch (mem_type) { + case TPH_MEM_TYPE_VM: + if (info->vm_st_valid) + return info->vm_st; + break; + case TPH_MEM_TYPE_PM: + if (info->pm_st_valid) + return info->pm_st; + break; + } + break; + case PCI_TPH_REQ_EXT_TPH: /* 16-bit tag */ + switch (mem_type) { + case TPH_MEM_TYPE_VM: + if (info->vm_xst_valid) + return info->vm_xst; + break; + case TPH_MEM_TYPE_PM: + if (info->pm_xst_valid) + return info->pm_xst; + break; + } + break; + default: + return 0; + } + + return 0; +} + +#define TPH_ST_DSM_FUNC_INDEX 0xF +static acpi_status tph_invoke_dsm(acpi_handle handle, u32 cpu_uid, + union st_info *st_out) +{ + union acpi_object arg3[3], in_obj, *out_obj; + + if (!acpi_check_dsm(handle, &pci_acpi_dsm_guid, 7, + BIT(TPH_ST_DSM_FUNC_INDEX))) + return AE_ERROR; + + /* DWORD: feature ID (0 for processor cache ST query) */ + arg3[0].integer.type = ACPI_TYPE_INTEGER; + arg3[0].integer.value = 0; + + /* DWORD: target UID */ + arg3[1].integer.type = ACPI_TYPE_INTEGER; + arg3[1].integer.value = cpu_uid; + + /* QWORD: properties, all 0's */ + arg3[2].integer.type = ACPI_TYPE_INTEGER; + arg3[2].integer.value = 0; + + in_obj.type = ACPI_TYPE_PACKAGE; + in_obj.package.count = ARRAY_SIZE(arg3); + in_obj.package.elements = arg3; + + out_obj = acpi_evaluate_dsm(handle, &pci_acpi_dsm_guid, 7, + TPH_ST_DSM_FUNC_INDEX, &in_obj); + if (!out_obj) + return AE_ERROR; + + if (out_obj->type != ACPI_TYPE_BUFFER) { + ACPI_FREE(out_obj); + return AE_ERROR; + } + + st_out->value = *((u64 *)(out_obj->buffer.pointer)); + + ACPI_FREE(out_obj); + + return AE_OK; +} +#endif + +/* Update the TPH Requester Enable field of TPH Control Register */ +static void set_ctrl_reg_req_en(struct pci_dev *pdev, u8 req_type) +{ + u32 reg; + + pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CTRL, ®); + + reg &= ~PCI_TPH_CTRL_REQ_EN_MASK; + reg |= FIELD_PREP(PCI_TPH_CTRL_REQ_EN_MASK, req_type); + + pci_write_config_dword(pdev, pdev->tph_cap + PCI_TPH_CTRL, reg); +} + static u8 get_st_modes(struct pci_dev *pdev) { u32 reg; @@ -25,6 +155,37 @@ static u8 get_st_modes(struct pci_dev *pdev) return reg; } +static u32 get_st_table_loc(struct pci_dev *pdev) +{ + u32 reg; + + pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CAP, ®); + + return FIELD_GET(PCI_TPH_CAP_LOC_MASK, reg); +} + +/* + * Return the size of ST table. If ST table is not in TPH Requester Extended + * Capability space, return 0. Otherwise return the ST Table Size + 1. + */ +static u16 get_st_table_size(struct pci_dev *pdev) +{ + u32 reg; + u32 loc; + + /* Check ST table location first */ + loc = get_st_table_loc(pdev); + + /* Convert loc to match with PCI_TPH_LOC_* defined in pci_regs.h */ + loc = FIELD_PREP(PCI_TPH_CAP_LOC_MASK, loc); + if (loc != PCI_TPH_LOC_CAP) + return 0; + + pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CAP, ®); + + return FIELD_GET(PCI_TPH_CAP_ST_MASK, reg) + 1; +} + /* Return device's Root Port completer capability */ static u8 get_rp_completer_type(struct pci_dev *pdev) { @@ -43,6 +204,171 @@ static u8 get_rp_completer_type(struct pci_dev *pdev) return FIELD_GET(PCI_EXP_DEVCAP2_TPH_COMP_MASK, reg); } +/* Write ST to MSI-X vector control reg - Return 0 if OK, otherwise -errno */ +static int write_tag_to_msix(struct pci_dev *pdev, int msix_idx, u16 tag) +{ +#ifdef CONFIG_PCI_MSI + struct msi_desc *msi_desc = NULL; + void __iomem *vec_ctrl; + u32 val; + int err = 0; + + msi_lock_descs(&pdev->dev); + + /* Find the msi_desc entry with matching msix_idx */ + msi_for_each_desc(msi_desc, &pdev->dev, MSI_DESC_ASSOCIATED) { + if (msi_desc->msi_index == msix_idx) + break; + } + + if (!msi_desc) { + err = -ENXIO; + goto err_out; + } + + /* Get the vector control register (offset 0xc) pointed by msix_idx */ + vec_ctrl = pdev->msix_base + msix_idx * PCI_MSIX_ENTRY_SIZE; + vec_ctrl += PCI_MSIX_ENTRY_VECTOR_CTRL; + + val = readl(vec_ctrl); + val &= ~PCI_MSIX_ENTRY_CTRL_ST; + val |= FIELD_PREP(PCI_MSIX_ENTRY_CTRL_ST, tag); + writel(val, vec_ctrl); + + /* Read back to flush the update */ + val = readl(vec_ctrl); + +err_out: + msi_unlock_descs(&pdev->dev); + return err; +#else + return -ENODEV; +#endif +} + +/* Write tag to ST table - Return 0 if OK, otherwise -errno */ +static int write_tag_to_st_table(struct pci_dev *pdev, int index, u16 tag) +{ + int st_table_size; + int offset; + + /* Check if index is out of bound */ + st_table_size = get_st_table_size(pdev); + if (index >= st_table_size) + return -ENXIO; + + offset = pdev->tph_cap + PCI_TPH_BASE_SIZEOF + index * sizeof(u16); + + return pci_write_config_word(pdev, offset, tag); +} + +/** + * pcie_tph_get_cpu_st() - Retrieve Steering Tag for a target memory associated + * with a specific CPU + * @pdev: PCI device + * @mem_type: target memory type (volatile or persistent RAM) + * @cpu_uid: associated CPU id + * @tag: Steering Tag to be returned + * + * Return the Steering Tag for a target memory that is associated with a + * specific CPU as indicated by cpu_uid. + * + * Return: 0 if success, otherwise negative value (-errno) + */ +int pcie_tph_get_cpu_st(struct pci_dev *pdev, enum tph_mem_type mem_type, + unsigned int cpu_uid, u16 *tag) +{ +#ifdef CONFIG_ACPI + struct pci_dev *rp; + acpi_handle rp_acpi_handle; + union st_info info; + + rp = pcie_find_root_port(pdev); + if (!rp || !rp->bus || !rp->bus->bridge) + return -ENODEV; + + rp_acpi_handle = ACPI_HANDLE(rp->bus->bridge); + + if (tph_invoke_dsm(rp_acpi_handle, cpu_uid, &info) != AE_OK) { + *tag = 0; + return -EINVAL; + } + + *tag = tph_extract_tag(mem_type, pdev->tph_req_type, &info); + + pci_dbg(pdev, "get steering tag: mem_type=%s, cpu_uid=%d, tag=%#04x\n", + (mem_type == TPH_MEM_TYPE_VM) ? "volatile" : "persistent", + cpu_uid, *tag); + + return 0; +#else + return -ENODEV; +#endif +} +EXPORT_SYMBOL(pcie_tph_get_cpu_st); + +/** + * pcie_tph_set_st_entry() - Set Steering Tag in the ST table entry + * @pdev: PCI device + * @index: ST table entry index + * @tag: Steering Tag to be written + * + * Figure out the proper location of ST table, either in the MSI-X table or + * in the TPH Extended Capability space, and write the Steering Tag into + * the ST entry pointed by index. + * + * Return: 0 if success, otherwise negative value (-errno) + */ +int pcie_tph_set_st_entry(struct pci_dev *pdev, unsigned int index, u16 tag) +{ + u32 loc; + int err = 0; + + if (!pdev->tph_cap) + return -EINVAL; + + if (!pdev->tph_enabled) + return -EINVAL; + + /* No need to write tag if device is in "No ST Mode" */ + if (pdev->tph_mode == PCI_TPH_ST_NS_MODE) + return 0; + + /* + * Disable TPH before updating ST to avoid potential instability as + * cautioned in PCIe r6.2, sec 6.17.3, "ST Modes of Operation" + */ + set_ctrl_reg_req_en(pdev, PCI_TPH_REQ_DISABLE); + + loc = get_st_table_loc(pdev); + /* Convert loc to match with PCI_TPH_LOC_* */ + loc = FIELD_PREP(PCI_TPH_CAP_LOC_MASK, loc); + + switch (loc) { + case PCI_TPH_LOC_MSIX: + err = write_tag_to_msix(pdev, index, tag); + break; + case PCI_TPH_LOC_CAP: + err = write_tag_to_st_table(pdev, index, tag); + break; + default: + err = -EINVAL; + } + + if (err) { + pcie_disable_tph(pdev); + return err; + } + + set_ctrl_reg_req_en(pdev, pdev->tph_mode); + + pci_dbg(pdev, "set steering tag: %s table, index=%d, tag=%#04x\n", + (loc == PCI_TPH_LOC_MSIX) ? "MSI-X" : "ST", index, tag); + + return 0; +} +EXPORT_SYMBOL(pcie_tph_set_st_entry); + /** * pcie_disable_tph - Turn off TPH support for device * @pdev: PCI device @@ -140,6 +466,8 @@ EXPORT_SYMBOL(pcie_enable_tph); void pci_restore_tph_state(struct pci_dev *pdev) { struct pci_cap_saved_state *save_state; + int num_entries, i, offset; + u16 *st_entry; u32 *cap; if (!pdev->tph_cap) @@ -155,11 +483,21 @@ void pci_restore_tph_state(struct pci_dev *pdev) /* Restore control register and all ST entries */ cap = &save_state->cap.data[0]; pci_write_config_dword(pdev, pdev->tph_cap + PCI_TPH_CTRL, *cap++); + st_entry = (u16 *)cap; + offset = PCI_TPH_BASE_SIZEOF; + num_entries = get_st_table_size(pdev); + for (i = 0; i < num_entries; i++) { + pci_write_config_word(pdev, pdev->tph_cap + offset, + *st_entry++); + offset += sizeof(u16); + } } void pci_save_tph_state(struct pci_dev *pdev) { struct pci_cap_saved_state *save_state; + int num_entries, i, offset; + u16 *st_entry; u32 *cap; if (!pdev->tph_cap) @@ -175,6 +513,16 @@ void pci_save_tph_state(struct pci_dev *pdev) /* Save control register */ cap = &save_state->cap.data[0]; pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CTRL, cap++); + + /* Save all ST entries in extended capability structure */ + st_entry = (u16 *)cap; + offset = PCI_TPH_BASE_SIZEOF; + num_entries = get_st_table_size(pdev); + for (i = 0; i < num_entries; i++) { + pci_read_config_word(pdev, pdev->tph_cap + offset, + st_entry++); + offset += sizeof(u16); + } } void pci_no_tph(void) @@ -186,12 +534,14 @@ void pci_no_tph(void) void pci_tph_init(struct pci_dev *pdev) { + int num_entries; u32 save_size; pdev->tph_cap = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_TPH); if (!pdev->tph_cap) return; - save_size = sizeof(u32); + num_entries = get_st_table_size(pdev); + save_size = sizeof(u32) + num_entries * sizeof(u16); pci_add_ext_cap_save_buffer(pdev, PCI_EXT_CAP_ID_TPH, save_size); } diff --git a/include/linux/pci-tph.h b/include/linux/pci-tph.h index 58654a334ffb..c3e806c13d64 100644 --- a/include/linux/pci-tph.h +++ b/include/linux/pci-tph.h @@ -9,10 +9,33 @@ #ifndef LINUX_PCI_TPH_H #define LINUX_PCI_TPH_H +/* + * According to the ECN for PCI Firmware Spec, Steering Tag can be different + * depending on the memory type: Volatile Memory or Persistent Memory. When a + * caller query about a target's Steering Tag, it must provide the target's + * tph_mem_type. ECN link: https://members.pcisig.com/wg/PCI-SIG/document/15470. + */ +enum tph_mem_type { + TPH_MEM_TYPE_VM, /* volatile memory */ + TPH_MEM_TYPE_PM /* persistent memory */ +}; + #ifdef CONFIG_PCIE_TPH +int pcie_tph_set_st_entry(struct pci_dev *pdev, + unsigned int index, u16 tag); +int pcie_tph_get_cpu_st(struct pci_dev *dev, + enum tph_mem_type mem_type, + unsigned int cpu_uid, u16 *tag); void pcie_disable_tph(struct pci_dev *pdev); int pcie_enable_tph(struct pci_dev *pdev, int mode); #else +static inline int pcie_tph_set_st_entry(struct pci_dev *pdev, + unsigned int index, u16 tag) +{ return -EINVAL; } +static inline int pcie_tph_get_cpu_st(struct pci_dev *dev, + enum tph_mem_type mem_type, + unsigned int cpu_uid, u16 *tag) +{ return -EINVAL; } static inline void pcie_disable_tph(struct pci_dev *pdev) { } static inline int pcie_enable_tph(struct pci_dev *pdev, int mode) { return -EINVAL; } -- cgit v1.2.3 From 277b339c4ba5c3d51f86892efd7bfbb012318ed8 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 1 Oct 2024 17:04:10 +0100 Subject: net: pcs: xpcs: move PCS reset to .pcs_pre_config() Move the PCS reset to .pcs_pre_config() rather than at creation time, which means we call the reset function with the interface that we're actually going to be using to talk to the downstream device. Reviewed-by: Vladimir Oltean Tested-by: Vladimir Oltean # sja1105 Signed-off-by: Russell King (Oracle) Tested-by: for them? Link: https://patch.msgid.link/E1svfMA-005ZI3-Va@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/pcs/pcs-xpcs.c | 39 +++++++++++++++++++++++++++++---------- include/linux/pcs/pcs-xpcs.h | 1 + 2 files changed, 30 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index 82463f9d50c8..7c6c40ddf722 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -659,6 +659,30 @@ int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns, int enable) } EXPORT_SYMBOL_GPL(xpcs_config_eee); +static void xpcs_pre_config(struct phylink_pcs *pcs, phy_interface_t interface) +{ + struct dw_xpcs *xpcs = phylink_pcs_to_xpcs(pcs); + const struct dw_xpcs_compat *compat; + int ret; + + if (!xpcs->need_reset) + return; + + compat = xpcs_find_compat(xpcs->desc, interface); + if (!compat) { + dev_err(&xpcs->mdiodev->dev, "unsupported interface %s\n", + phy_modes(interface)); + return; + } + + ret = xpcs_soft_reset(xpcs, compat); + if (ret) + dev_err(&xpcs->mdiodev->dev, "soft reset failed: %pe\n", + ERR_PTR(ret)); + + xpcs->need_reset = false; +} + static int xpcs_config_aneg_c37_sgmii(struct dw_xpcs *xpcs, unsigned int neg_mode) { @@ -1365,6 +1389,7 @@ static const struct dw_xpcs_desc xpcs_desc_list[] = { static const struct phylink_pcs_ops xpcs_phylink_ops = { .pcs_validate = xpcs_validate, + .pcs_pre_config = xpcs_pre_config, .pcs_config = xpcs_config, .pcs_get_state = xpcs_get_state, .pcs_an_restart = xpcs_an_restart, @@ -1460,18 +1485,12 @@ static int xpcs_init_id(struct dw_xpcs *xpcs) static int xpcs_init_iface(struct dw_xpcs *xpcs, phy_interface_t interface) { - const struct dw_xpcs_compat *compat; - - compat = xpcs_find_compat(xpcs->desc, interface); - if (!compat) - return -EINVAL; - - if (xpcs->info.pma == WX_TXGBE_XPCS_PMA_10G_ID) { + if (xpcs->info.pma == WX_TXGBE_XPCS_PMA_10G_ID) xpcs->pcs.poll = false; - return 0; - } + else + xpcs->need_reset = true; - return xpcs_soft_reset(xpcs, compat); + return 0; } static struct dw_xpcs *xpcs_create(struct mdio_device *mdiodev, diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index b4a4eb6c8866..fd75d0605bb6 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -61,6 +61,7 @@ struct dw_xpcs { struct clk_bulk_data clks[DW_XPCS_NUM_CLKS]; struct phylink_pcs pcs; phy_interface_t interface; + bool need_reset; }; int xpcs_get_an_mode(struct dw_xpcs *xpcs, phy_interface_t interface); -- cgit v1.2.3 From bedea1539acb808c870ad85b16f8d8f0bef5e0bb Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 1 Oct 2024 17:04:26 +0100 Subject: net: pcs: xpcs: add xpcs_destroy_pcs() and xpcs_create_pcs_mdiodev() Provide xpcs create/destroy functions that return and take a phylink_pcs pointer instead of an xpcs pointer. This will be used by drivers that have been converted to use phylink_pcs pointers internally, rather than dw_xpcs pointers. As xpcs_create_mdiodev() no longer makes use of its interface argument, pass PHY_INTERFACE_MODE_NA into xpcs_create_mdiodev() until it is removed later in the series. Reviewed-by: Vladimir Oltean Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1svfMQ-005ZIL-Bi@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/pcs/pcs-xpcs.c | 18 ++++++++++++++++++ include/linux/pcs/pcs-xpcs.h | 3 +++ 2 files changed, 21 insertions(+) (limited to 'include') diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index 8765b01c0b5d..9b61f97222b9 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -1550,6 +1550,18 @@ struct dw_xpcs *xpcs_create_mdiodev(struct mii_bus *bus, int addr, } EXPORT_SYMBOL_GPL(xpcs_create_mdiodev); +struct phylink_pcs *xpcs_create_pcs_mdiodev(struct mii_bus *bus, int addr) +{ + struct dw_xpcs *xpcs; + + xpcs = xpcs_create_mdiodev(bus, addr, PHY_INTERFACE_MODE_NA); + if (IS_ERR(xpcs)) + return ERR_CAST(xpcs); + + return &xpcs->pcs; +} +EXPORT_SYMBOL_GPL(xpcs_create_pcs_mdiodev); + /** * xpcs_create_fwnode() - Create a DW xPCS instance from @fwnode * @fwnode: fwnode handle poining to the DW XPCS device @@ -1599,5 +1611,11 @@ void xpcs_destroy(struct dw_xpcs *xpcs) } EXPORT_SYMBOL_GPL(xpcs_destroy); +void xpcs_destroy_pcs(struct phylink_pcs *pcs) +{ + xpcs_destroy(phylink_pcs_to_xpcs(pcs)); +} +EXPORT_SYMBOL_GPL(xpcs_destroy_pcs); + MODULE_DESCRIPTION("Synopsys DesignWare XPCS library"); MODULE_LICENSE("GPL v2"); diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index fd75d0605bb6..a4e2243ce647 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -78,4 +78,7 @@ struct dw_xpcs *xpcs_create_fwnode(struct fwnode_handle *fwnode, phy_interface_t interface); void xpcs_destroy(struct dw_xpcs *xpcs); +struct phylink_pcs *xpcs_create_pcs_mdiodev(struct mii_bus *bus, int addr); +void xpcs_destroy_pcs(struct phylink_pcs *pcs); + #endif /* __LINUX_PCS_XPCS_H */ -- cgit v1.2.3 From bf5a61645bb2d51be53da5b26f948045b571147c Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 1 Oct 2024 17:04:51 +0100 Subject: net: pcs: xpcs: drop interface argument from xpcs_create*() The XPCS sub-driver no longer uses the "interface" argument to the xpcs_create_mdiodev() and xpcs_create_fwnode() functions. Remove this now unnecessary argument, updating the stmmac driver appropriately. Reviewed-by: Vladimir Oltean Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1svfMp-005ZIp-UX@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 7 +++---- drivers/net/pcs/pcs-xpcs.c | 10 +++------- include/linux/pcs/pcs-xpcs.h | 6 ++---- 3 files changed, 8 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index 03f90676b3ad..0c7d81ddd440 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -500,23 +500,22 @@ int stmmac_pcs_setup(struct net_device *ndev) struct fwnode_handle *devnode, *pcsnode; struct dw_xpcs *xpcs = NULL; struct stmmac_priv *priv; - int addr, mode, ret; + int addr, ret; priv = netdev_priv(ndev); - mode = priv->plat->phy_interface; devnode = priv->plat->port_node; if (priv->plat->pcs_init) { ret = priv->plat->pcs_init(priv); } else if (fwnode_property_present(devnode, "pcs-handle")) { pcsnode = fwnode_find_reference(devnode, "pcs-handle", 0); - xpcs = xpcs_create_fwnode(pcsnode, mode); + xpcs = xpcs_create_fwnode(pcsnode); fwnode_handle_put(pcsnode); ret = PTR_ERR_OR_ZERO(xpcs); } else if (priv->plat->mdio_bus_data && priv->plat->mdio_bus_data->pcs_mask) { addr = ffs(priv->plat->mdio_bus_data->pcs_mask) - 1; - xpcs = xpcs_create_mdiodev(priv->mii, addr, mode); + xpcs = xpcs_create_mdiodev(priv->mii, addr); ret = PTR_ERR_OR_ZERO(xpcs); } else { return 0; diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index 9b61f97222b9..f25e7afdfdf5 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -1520,14 +1520,12 @@ out_free_data: * xpcs_create_mdiodev() - create a DW xPCS instance with the MDIO @addr * @bus: pointer to the MDIO-bus descriptor for the device to be looked at * @addr: device MDIO-bus ID - * @interface: requested PHY interface * * Return: a pointer to the DW XPCS handle if successful, otherwise -ENODEV if * the PCS device couldn't be found on the bus and other negative errno related * to the data allocation and MDIO-bus communications. */ -struct dw_xpcs *xpcs_create_mdiodev(struct mii_bus *bus, int addr, - phy_interface_t interface) +struct dw_xpcs *xpcs_create_mdiodev(struct mii_bus *bus, int addr) { struct mdio_device *mdiodev; struct dw_xpcs *xpcs; @@ -1554,7 +1552,7 @@ struct phylink_pcs *xpcs_create_pcs_mdiodev(struct mii_bus *bus, int addr) { struct dw_xpcs *xpcs; - xpcs = xpcs_create_mdiodev(bus, addr, PHY_INTERFACE_MODE_NA); + xpcs = xpcs_create_mdiodev(bus, addr); if (IS_ERR(xpcs)) return ERR_CAST(xpcs); @@ -1565,7 +1563,6 @@ EXPORT_SYMBOL_GPL(xpcs_create_pcs_mdiodev); /** * xpcs_create_fwnode() - Create a DW xPCS instance from @fwnode * @fwnode: fwnode handle poining to the DW XPCS device - * @interface: requested PHY interface * * Return: a pointer to the DW XPCS handle if successful, otherwise -ENODEV if * the fwnode device is unavailable or the PCS device couldn't be found on the @@ -1573,8 +1570,7 @@ EXPORT_SYMBOL_GPL(xpcs_create_pcs_mdiodev); * other negative errno related to the data allocations and MDIO-bus * communications. */ -struct dw_xpcs *xpcs_create_fwnode(struct fwnode_handle *fwnode, - phy_interface_t interface) +struct dw_xpcs *xpcs_create_fwnode(struct fwnode_handle *fwnode) { struct mdio_device *mdiodev; struct dw_xpcs *xpcs; diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index a4e2243ce647..758daabb76c7 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -72,10 +72,8 @@ int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface, void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces); int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns, int enable); -struct dw_xpcs *xpcs_create_mdiodev(struct mii_bus *bus, int addr, - phy_interface_t interface); -struct dw_xpcs *xpcs_create_fwnode(struct fwnode_handle *fwnode, - phy_interface_t interface); +struct dw_xpcs *xpcs_create_mdiodev(struct mii_bus *bus, int addr); +struct dw_xpcs *xpcs_create_fwnode(struct fwnode_handle *fwnode); void xpcs_destroy(struct dw_xpcs *xpcs); struct phylink_pcs *xpcs_create_pcs_mdiodev(struct mii_bus *bus, int addr); -- cgit v1.2.3 From faefc9730d073e88a490e37b00e3e196b823abcb Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 1 Oct 2024 17:04:57 +0100 Subject: net: pcs: xpcs: make xpcs_do_config() and xpcs_link_up() internal As nothing outside pcs-xpcs.c calls neither xpcs_do_config() nor xpcs_link_up(), remove their exports and prototypes. Reviewed-by: Vladimir Oltean Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1svfMv-005ZIv-2M@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/pcs/pcs-xpcs.c | 11 +++++------ include/linux/pcs/pcs-xpcs.h | 4 ---- 2 files changed, 5 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index f25e7afdfdf5..0a01c552f591 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -851,8 +851,9 @@ static int xpcs_config_2500basex(struct dw_xpcs *xpcs) return xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_MMD_CTRL, ret); } -int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface, - const unsigned long *advertising, unsigned int neg_mode) +static int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface, + const unsigned long *advertising, + unsigned int neg_mode) { const struct dw_xpcs_compat *compat; int ret; @@ -905,7 +906,6 @@ int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface, return 0; } -EXPORT_SYMBOL_GPL(xpcs_do_config); static int xpcs_config(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, @@ -1207,8 +1207,8 @@ static void xpcs_link_up_1000basex(struct dw_xpcs *xpcs, unsigned int neg_mode, pr_err("%s: xpcs_write returned %pe\n", __func__, ERR_PTR(ret)); } -void xpcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode, - phy_interface_t interface, int speed, int duplex) +static void xpcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode, + phy_interface_t interface, int speed, int duplex) { struct dw_xpcs *xpcs = phylink_pcs_to_xpcs(pcs); @@ -1219,7 +1219,6 @@ void xpcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode, if (interface == PHY_INTERFACE_MODE_1000BASEX) return xpcs_link_up_1000basex(xpcs, neg_mode, speed, duplex); } -EXPORT_SYMBOL_GPL(xpcs_link_up); static void xpcs_an_restart(struct phylink_pcs *pcs) { diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index 758daabb76c7..abda475111d1 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -65,10 +65,6 @@ struct dw_xpcs { }; int xpcs_get_an_mode(struct dw_xpcs *xpcs, phy_interface_t interface); -void xpcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode, - phy_interface_t interface, int speed, int duplex); -int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface, - const unsigned long *advertising, unsigned int neg_mode); void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces); int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns, int enable); -- cgit v1.2.3 From ced20ea315fe8591093f19574ec32222c1ab71ba Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Tue, 24 Sep 2024 13:48:43 +0530 Subject: soundwire: amd: pass acp pci revision id as resource data Pass ACP pci revision id as resource data and store it in amd SoundWire manager private data structure. This field will be used to differentiate ACP variants. Signed-off-by: Vijendar Mukunda Link: https://lore.kernel.org/r/20240924081846.1834612-2-Vijendar.Mukunda@amd.com Signed-off-by: Vinod Koul --- drivers/soundwire/amd_init.c | 1 + drivers/soundwire/amd_manager.c | 1 + include/linux/soundwire/sdw_amd.h | 5 +++++ 3 files changed, 7 insertions(+) (limited to 'include') diff --git a/drivers/soundwire/amd_init.c b/drivers/soundwire/amd_init.c index db040f435059..4f6e356e6bd2 100644 --- a/drivers/soundwire/amd_init.c +++ b/drivers/soundwire/amd_init.c @@ -121,6 +121,7 @@ static struct sdw_amd_ctx *sdw_amd_probe_controller(struct sdw_amd_res *res) sdw_pdata[index].instance = index; sdw_pdata[index].acp_sdw_lock = res->acp_lock; + sdw_pdata[index].acp_rev = res->acp_rev; pdevinfo[index].name = "amd_sdw_manager"; pdevinfo[index].id = index; pdevinfo[index].parent = res->parent; diff --git a/drivers/soundwire/amd_manager.c b/drivers/soundwire/amd_manager.c index 0d01849c3586..4a1966fb01f6 100644 --- a/drivers/soundwire/amd_manager.c +++ b/drivers/soundwire/amd_manager.c @@ -910,6 +910,7 @@ static int amd_sdw_manager_probe(struct platform_device *pdev) amd_manager->mmio = amd_manager->acp_mmio + (amd_manager->instance * SDW_MANAGER_REG_OFFSET); amd_manager->acp_sdw_lock = pdata->acp_sdw_lock; + amd_manager->acp_rev = pdata->acp_rev; amd_manager->cols_index = sdw_find_col_index(AMD_SDW_DEFAULT_COLUMNS); amd_manager->rows_index = sdw_find_row_index(AMD_SDW_DEFAULT_ROWS); amd_manager->dev = dev; diff --git a/include/linux/soundwire/sdw_amd.h b/include/linux/soundwire/sdw_amd.h index 28a4eb77717f..e0abc59d4748 100644 --- a/include/linux/soundwire/sdw_amd.h +++ b/include/linux/soundwire/sdw_amd.h @@ -30,6 +30,7 @@ struct acp_sdw_pdata { u16 instance; + u32 acp_rev; /* mutex to protect acp common register access */ struct mutex *acp_sdw_lock; }; @@ -66,6 +67,7 @@ struct sdw_amd_dai_runtime { * @instance: SoundWire manager instance * @quirks: SoundWire manager quirks * @wake_en_mask: wake enable mask per SoundWire manager + * @acp_rev: acp pci device revision id * @clk_stopped: flag set to true when clock is stopped * @power_mode_mask: flag interprets amd SoundWire manager power mode * @dai_runtime_array: dai runtime array @@ -94,6 +96,7 @@ struct amd_sdw_manager { u32 quirks; u32 wake_en_mask; u32 power_mode_mask; + u32 acp_rev; bool clk_stopped; struct sdw_amd_dai_runtime **dai_runtime_array; @@ -134,6 +137,7 @@ struct sdw_amd_ctx { * struct sdw_amd_res - Soundwire AMD global resource structure, * typically populated by the DSP driver/Legacy driver * + * @acp_rev: acp pci device revision id * @addr: acp pci device resource start address * @reg_range: ACP register range * @link_mask: bit-wise mask listing links selected by the DSP driver/ @@ -146,6 +150,7 @@ struct sdw_amd_ctx { * @acp_lock: mutex protecting acp common registers access */ struct sdw_amd_res { + u32 acp_rev; u32 addr; u32 reg_range; u32 link_mask; -- cgit v1.2.3 From 7b54323dde29452dd06e6acd2701d9b489c9547d Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Tue, 24 Sep 2024 13:48:44 +0530 Subject: soundwire: amd: refactor existing code for acp 6.3 platform Refactor existing code by adding acp pci revision id coditional checks for ACP 6.3 platform. Rename the macros and structure names with ACP63 tag. Signed-off-by: Vijendar Mukunda Link: https://lore.kernel.org/r/20240924081846.1834612-3-Vijendar.Mukunda@amd.com Signed-off-by: Vinod Koul --- drivers/soundwire/amd_manager.c | 98 +++++++++++++++++++++++++-------------- drivers/soundwire/amd_manager.h | 16 +++---- include/linux/soundwire/sdw_amd.h | 1 + 3 files changed, 72 insertions(+), 43 deletions(-) (limited to 'include') diff --git a/drivers/soundwire/amd_manager.c b/drivers/soundwire/amd_manager.c index 4a1966fb01f6..5a4bfaef65fb 100644 --- a/drivers/soundwire/amd_manager.c +++ b/drivers/soundwire/amd_manager.c @@ -433,12 +433,18 @@ static int amd_sdw_port_params(struct sdw_bus *bus, struct sdw_port_params *p_pa u32 frame_fmt_reg, dpn_frame_fmt; dev_dbg(amd_manager->dev, "p_params->num:0x%x\n", p_params->num); - switch (amd_manager->instance) { - case ACP_SDW0: - frame_fmt_reg = sdw0_manager_dp_reg[p_params->num].frame_fmt_reg; - break; - case ACP_SDW1: - frame_fmt_reg = sdw1_manager_dp_reg[p_params->num].frame_fmt_reg; + switch (amd_manager->acp_rev) { + case ACP63_PCI_REV_ID: + switch (amd_manager->instance) { + case ACP_SDW0: + frame_fmt_reg = acp63_sdw0_dp_reg[p_params->num].frame_fmt_reg; + break; + case ACP_SDW1: + frame_fmt_reg = acp63_sdw1_dp_reg[p_params->num].frame_fmt_reg; + break; + default: + return -EINVAL; + } break; default: return -EINVAL; @@ -465,20 +471,28 @@ static int amd_sdw_transport_params(struct sdw_bus *bus, u32 frame_fmt_reg, sample_int_reg, hctrl_dp0_reg; u32 offset_reg, lane_ctrl_ch_en_reg; - switch (amd_manager->instance) { - case ACP_SDW0: - frame_fmt_reg = sdw0_manager_dp_reg[params->port_num].frame_fmt_reg; - sample_int_reg = sdw0_manager_dp_reg[params->port_num].sample_int_reg; - hctrl_dp0_reg = sdw0_manager_dp_reg[params->port_num].hctrl_dp0_reg; - offset_reg = sdw0_manager_dp_reg[params->port_num].offset_reg; - lane_ctrl_ch_en_reg = sdw0_manager_dp_reg[params->port_num].lane_ctrl_ch_en_reg; - break; - case ACP_SDW1: - frame_fmt_reg = sdw1_manager_dp_reg[params->port_num].frame_fmt_reg; - sample_int_reg = sdw1_manager_dp_reg[params->port_num].sample_int_reg; - hctrl_dp0_reg = sdw1_manager_dp_reg[params->port_num].hctrl_dp0_reg; - offset_reg = sdw1_manager_dp_reg[params->port_num].offset_reg; - lane_ctrl_ch_en_reg = sdw1_manager_dp_reg[params->port_num].lane_ctrl_ch_en_reg; + switch (amd_manager->acp_rev) { + case ACP63_PCI_REV_ID: + switch (amd_manager->instance) { + case ACP_SDW0: + frame_fmt_reg = acp63_sdw0_dp_reg[params->port_num].frame_fmt_reg; + sample_int_reg = acp63_sdw0_dp_reg[params->port_num].sample_int_reg; + hctrl_dp0_reg = acp63_sdw0_dp_reg[params->port_num].hctrl_dp0_reg; + offset_reg = acp63_sdw0_dp_reg[params->port_num].offset_reg; + lane_ctrl_ch_en_reg = + acp63_sdw0_dp_reg[params->port_num].lane_ctrl_ch_en_reg; + break; + case ACP_SDW1: + frame_fmt_reg = acp63_sdw1_dp_reg[params->port_num].frame_fmt_reg; + sample_int_reg = acp63_sdw1_dp_reg[params->port_num].sample_int_reg; + hctrl_dp0_reg = acp63_sdw1_dp_reg[params->port_num].hctrl_dp0_reg; + offset_reg = acp63_sdw1_dp_reg[params->port_num].offset_reg; + lane_ctrl_ch_en_reg = + acp63_sdw1_dp_reg[params->port_num].lane_ctrl_ch_en_reg; + break; + default: + return -EINVAL; + } break; default: return -EINVAL; @@ -520,12 +534,20 @@ static int amd_sdw_port_enable(struct sdw_bus *bus, u32 dpn_ch_enable; u32 lane_ctrl_ch_en_reg; - switch (amd_manager->instance) { - case ACP_SDW0: - lane_ctrl_ch_en_reg = sdw0_manager_dp_reg[enable_ch->port_num].lane_ctrl_ch_en_reg; - break; - case ACP_SDW1: - lane_ctrl_ch_en_reg = sdw1_manager_dp_reg[enable_ch->port_num].lane_ctrl_ch_en_reg; + switch (amd_manager->acp_rev) { + case ACP63_PCI_REV_ID: + switch (amd_manager->instance) { + case ACP_SDW0: + lane_ctrl_ch_en_reg = + acp63_sdw0_dp_reg[enable_ch->port_num].lane_ctrl_ch_en_reg; + break; + case ACP_SDW1: + lane_ctrl_ch_en_reg = + acp63_sdw1_dp_reg[enable_ch->port_num].lane_ctrl_ch_en_reg; + break; + default: + return -EINVAL; + } break; default: return -EINVAL; @@ -927,15 +949,21 @@ static int amd_sdw_manager_probe(struct platform_device *pdev) * information. */ amd_manager->bus.controller_id = 0; - - switch (amd_manager->instance) { - case ACP_SDW0: - amd_manager->num_dout_ports = AMD_SDW0_MAX_TX_PORTS; - amd_manager->num_din_ports = AMD_SDW0_MAX_RX_PORTS; - break; - case ACP_SDW1: - amd_manager->num_dout_ports = AMD_SDW1_MAX_TX_PORTS; - amd_manager->num_din_ports = AMD_SDW1_MAX_RX_PORTS; + dev_dbg(dev, "acp_rev:0x%x\n", amd_manager->acp_rev); + switch (amd_manager->acp_rev) { + case ACP63_PCI_REV_ID: + switch (amd_manager->instance) { + case ACP_SDW0: + amd_manager->num_dout_ports = AMD_ACP63_SDW0_MAX_TX_PORTS; + amd_manager->num_din_ports = AMD_ACP63_SDW0_MAX_RX_PORTS; + break; + case ACP_SDW1: + amd_manager->num_dout_ports = AMD_ACP63_SDW1_MAX_TX_PORTS; + amd_manager->num_din_ports = AMD_ACP63_SDW1_MAX_RX_PORTS; + break; + default: + return -EINVAL; + } break; default: return -EINVAL; diff --git a/drivers/soundwire/amd_manager.h b/drivers/soundwire/amd_manager.h index 707065468e05..cc2170e4521e 100644 --- a/drivers/soundwire/amd_manager.h +++ b/drivers/soundwire/amd_manager.h @@ -155,12 +155,12 @@ #define AMD_SDW_IRQ_MASK_8TO11 0x000c7777 #define AMD_SDW_IRQ_ERROR_MASK 0xff #define AMD_SDW_MAX_FREQ_NUM 1 -#define AMD_SDW0_MAX_TX_PORTS 3 -#define AMD_SDW0_MAX_RX_PORTS 3 -#define AMD_SDW1_MAX_TX_PORTS 1 -#define AMD_SDW1_MAX_RX_PORTS 1 -#define AMD_SDW0_MAX_DAI 6 -#define AMD_SDW1_MAX_DAI 2 +#define AMD_ACP63_SDW0_MAX_TX_PORTS 3 +#define AMD_ACP63_SDW0_MAX_RX_PORTS 3 +#define AMD_ACP63_SDW1_MAX_TX_PORTS 1 +#define AMD_ACP63_SDW1_MAX_RX_PORTS 1 +#define AMD_ACP63_SDW0_MAX_DAI 6 +#define AMD_ACP63_SDW1_MAX_DAI 2 #define AMD_SDW_SLAVE_0_ATTACHED 5 #define AMD_SDW_SSP_COUNTER_VAL 3 @@ -222,7 +222,7 @@ struct sdw_manager_dp_reg { * in SoundWire DMA driver. */ -static struct sdw_manager_dp_reg sdw0_manager_dp_reg[AMD_SDW0_MAX_DAI] = { +static struct sdw_manager_dp_reg acp63_sdw0_dp_reg[AMD_ACP63_SDW0_MAX_DAI] = { {ACP_SW_AUDIO0_TX_FRAME_FORMAT, ACP_SW_AUDIO0_TX_SAMPLEINTERVAL, ACP_SW_AUDIO0_TX_HCTRL_DP0, ACP_SW_AUDIO0_TX_OFFSET_DP0, ACP_SW_AUDIO0_TX_CHANNEL_ENABLE_DP0}, {ACP_SW_AUDIO1_TX_FRAME_FORMAT, ACP_SW_AUDIO1_TX_SAMPLEINTERVAL, ACP_SW_AUDIO1_TX_HCTRL, @@ -237,7 +237,7 @@ static struct sdw_manager_dp_reg sdw0_manager_dp_reg[AMD_SDW0_MAX_DAI] = { ACP_SW_AUDIO2_RX_OFFSET, ACP_SW_AUDIO2_RX_CHANNEL_ENABLE_DP0}, }; -static struct sdw_manager_dp_reg sdw1_manager_dp_reg[AMD_SDW1_MAX_DAI] = { +static struct sdw_manager_dp_reg acp63_sdw1_dp_reg[AMD_ACP63_SDW1_MAX_DAI] = { {ACP_SW_AUDIO1_TX_FRAME_FORMAT, ACP_SW_AUDIO1_TX_SAMPLEINTERVAL, ACP_SW_AUDIO1_TX_HCTRL, ACP_SW_AUDIO1_TX_OFFSET, ACP_SW_AUDIO1_TX_CHANNEL_ENABLE_DP0}, {ACP_SW_AUDIO1_RX_FRAME_FORMAT, ACP_SW_AUDIO1_RX_SAMPLEINTERVAL, ACP_SW_AUDIO1_RX_HCTRL, diff --git a/include/linux/soundwire/sdw_amd.h b/include/linux/soundwire/sdw_amd.h index e0abc59d4748..c9586f22c5a9 100644 --- a/include/linux/soundwire/sdw_amd.h +++ b/include/linux/soundwire/sdw_amd.h @@ -27,6 +27,7 @@ #define ACP_SDW0 0 #define ACP_SDW1 1 #define AMD_SDW_MAX_MANAGER_COUNT 2 +#define ACP63_PCI_REV_ID 0x63 struct acp_sdw_pdata { u16 instance; -- cgit v1.2.3 From 444d6824a4feca142b0a57095a2f1f1bda98e2ab Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 3 Oct 2024 15:06:38 +0800 Subject: soundwire: optimize sdw_stream_runtime memory layout pahole suggestion: swap position of 'm_rt_count' before: pahole -C sdw_stream_runtime drivers/soundwire/soundwire-bus.ko struct sdw_stream_runtime { const char * name; /* 0 8 */ struct sdw_stream_params params; /* 8 12 */ enum sdw_stream_state state; /* 20 4 */ enum sdw_stream_type type; /* 24 4 */ /* XXX 4 bytes hole, try to pack */ struct list_head master_list; /* 32 16 */ int m_rt_count; /* 48 4 */ /* size: 56, cachelines: 1, members: 6 */ /* sum members: 48, holes: 1, sum holes: 4 */ /* padding: 4 */ /* last cacheline: 56 bytes */ }; after: pahole --reorganize -C sdw_stream_runtime drivers/soundwire/soundwire-bus.ko struct sdw_stream_runtime { const char * name; /* 0 8 */ struct sdw_stream_params params; /* 8 12 */ enum sdw_stream_state state; /* 20 4 */ enum sdw_stream_type type; /* 24 4 */ int m_rt_count; /* 28 4 */ struct list_head master_list; /* 32 16 */ /* size: 48, cachelines: 1, members: 6 */ /* last cacheline: 48 bytes */ }; /* saved 8 bytes! */ Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20241003070650.62787-3-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 5e0dd47a0412..a4fa45132030 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -820,15 +820,15 @@ struct sdw_master_port_ops { struct sdw_msg; /** - * struct sdw_defer - SDW deffered message - * @length: message length + * struct sdw_defer - SDW deferred message * @complete: message completion * @msg: SDW message + * @length: message length */ struct sdw_defer { + struct sdw_msg *msg; int length; struct completion complete; - struct sdw_msg *msg; }; /** @@ -1010,18 +1010,18 @@ struct sdw_stream_params { * @params: Stream parameters * @state: Current state of the stream * @type: Stream type PCM or PDM + * @m_rt_count: Count of Master runtime(s) in this stream * @master_list: List of Master runtime(s) in this stream. * master_list can contain only one m_rt per Master instance * for a stream - * @m_rt_count: Count of Master runtime(s) in this stream */ struct sdw_stream_runtime { const char *name; struct sdw_stream_params params; enum sdw_stream_state state; enum sdw_stream_type type; - struct list_head master_list; int m_rt_count; + struct list_head master_list; }; struct sdw_stream_runtime *sdw_alloc_stream(const char *stream_name); -- cgit v1.2.3 From 6cb2c156439430a7f9db2e1f71a7dccf1ca978bf Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 3 Oct 2024 15:06:39 +0800 Subject: soundwire: optimize sdw_master_prop Make pahole happy by moving pointers and u64 first instead of interleaving them. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20241003070650.62787-4-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index a4fa45132030..2caea7345c3e 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -406,13 +406,14 @@ struct sdw_slave_prop { /** * struct sdw_master_prop - Master properties + * @clk_gears: Clock gears supported + * @clk_freq: Clock frequencies supported, in Hz + * @quirks: bitmask identifying optional behavior beyond the scope of the MIPI specification * @revision: MIPI spec version of the implementation * @clk_stop_modes: Bitmap, bit N set when clock-stop-modeN supported * @max_clk_freq: Maximum Bus clock frequency, in Hz * @num_clk_gears: Number of clock gears supported - * @clk_gears: Clock gears supported * @num_clk_freq: Number of clock frequencies supported, in Hz - * @clk_freq: Clock frequencies supported, in Hz * @default_frame_rate: Controller default Frame rate, in Hz * @default_row: Number of rows * @default_col: Number of columns @@ -421,24 +422,23 @@ struct sdw_slave_prop { * command * @mclk_freq: clock reference passed to SoundWire Master, in Hz. * @hw_disabled: if true, the Master is not functional, typically due to pin-mux - * @quirks: bitmask identifying optional behavior beyond the scope of the MIPI specification */ struct sdw_master_prop { + u32 *clk_gears; + u32 *clk_freq; + u64 quirks; u32 revision; u32 clk_stop_modes; u32 max_clk_freq; u32 num_clk_gears; - u32 *clk_gears; u32 num_clk_freq; - u32 *clk_freq; u32 default_frame_rate; u32 default_row; u32 default_col; - bool dynamic_frame; u32 err_threshold; u32 mclk_freq; + bool dynamic_frame; bool hw_disabled; - u64 quirks; }; /* Definitions for Master quirks */ -- cgit v1.2.3 From 0a323dad1c4e04048988cd04c60eaffd6ae61b1a Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 3 Oct 2024 15:06:40 +0800 Subject: soundwire: optimize sdw_bus structure The sdw_bus structure has seen multiple additions over the years. It's one of the most used structures in this subsystem, so there's merit in reshuffling the members a bit with 'pahole' to reduce holes and structures across cache lines. before: struct sdw_bus { struct device * dev; /* 0 8 */ struct sdw_master_device * md; /* 8 8 */ int controller_id; /* 16 4 */ unsigned int link_id; /* 20 4 */ int id; /* 24 4 */ /* XXX 4 bytes hole, try to pack */ struct list_head slaves; /* 32 16 */ long unsigned int assigned[1]; /* 48 8 */ struct mutex bus_lock; /* 56 160 */ /* --- cacheline 3 boundary (192 bytes) was 24 bytes ago --- */ struct lock_class_key bus_lock_key; /* 216 16 */ struct mutex msg_lock; /* 232 160 */ /* --- cacheline 6 boundary (384 bytes) was 8 bytes ago --- */ struct lock_class_key msg_lock_key; /* 392 16 */ int (*compute_params)(struct sdw_bus *); /* 408 8 */ const struct sdw_master_ops * ops; /* 416 8 */ const struct sdw_master_port_ops * port_ops; /* 424 8 */ struct sdw_bus_params params; /* 432 36 */ /* XXX 4 bytes hole, try to pack */ /* --- cacheline 7 boundary (448 bytes) was 24 bytes ago --- */ struct sdw_master_prop prop; /* 472 72 */ /* XXX last struct has 6 bytes of padding */ /* --- cacheline 8 boundary (512 bytes) was 32 bytes ago --- */ void * vendor_specific_prop; /* 544 8 */ struct list_head m_rt_list; /* 552 16 */ struct dentry * debugfs; /* 568 8 */ /* --- cacheline 9 boundary (576 bytes) --- */ struct irq_chip irq_chip; /* 576 264 */ /* --- cacheline 13 boundary (832 bytes) was 8 bytes ago --- */ struct irq_domain * domain; /* 840 8 */ struct sdw_defer defer_msg; /* 848 112 */ /* --- cacheline 15 boundary (960 bytes) --- */ unsigned int clk_stop_timeout; /* 960 4 */ u32 bank_switch_timeout; /* 964 4 */ bool multi_link; /* 968 1 */ /* XXX 3 bytes hole, try to pack */ int hw_sync_min_links; /* 972 4 */ int stream_refcount; /* 976 4 */ /* size: 984, cachelines: 16, members: 27 */ /* sum members: 969, holes: 3, sum holes: 11 */ /* padding: 4 */ /* paddings: 1, sum paddings: 6 */ /* last cacheline: 24 bytes */ }; after: struct sdw_bus { struct device * dev; /* 0 8 */ struct sdw_master_device * md; /* 8 8 */ struct lock_class_key bus_lock_key; /* 16 16 */ struct mutex bus_lock; /* 32 160 */ /* --- cacheline 3 boundary (192 bytes) --- */ struct list_head slaves; /* 192 16 */ struct lock_class_key msg_lock_key; /* 208 16 */ struct mutex msg_lock; /* 224 160 */ /* --- cacheline 6 boundary (384 bytes) --- */ struct list_head m_rt_list; /* 384 16 */ struct sdw_defer defer_msg; /* 400 112 */ /* --- cacheline 8 boundary (512 bytes) --- */ struct sdw_bus_params params; /* 512 36 */ int stream_refcount; /* 548 4 */ const struct sdw_master_ops * ops; /* 552 8 */ const struct sdw_master_port_ops * port_ops; /* 560 8 */ struct sdw_master_prop prop; /* 568 72 */ /* XXX last struct has 6 bytes of padding */ /* --- cacheline 10 boundary (640 bytes) --- */ void * vendor_specific_prop; /* 640 8 */ int hw_sync_min_links; /* 648 4 */ int controller_id; /* 652 4 */ unsigned int link_id; /* 656 4 */ int id; /* 660 4 */ int (*compute_params)(struct sdw_bus *); /* 664 8 */ long unsigned int assigned[1]; /* 672 8 */ unsigned int clk_stop_timeout; /* 680 4 */ u32 bank_switch_timeout; /* 684 4 */ struct irq_chip irq_chip; /* 688 264 */ /* --- cacheline 14 boundary (896 bytes) was 56 bytes ago --- */ struct irq_domain * domain; /* 952 8 */ /* --- cacheline 15 boundary (960 bytes) --- */ struct dentry * debugfs; /* 960 8 */ bool multi_link; /* 968 1 */ /* size: 976, cachelines: 16, members: 27 */ /* padding: 7 */ /* paddings: 1, sum paddings: 6 */ /* last cacheline: 16 bytes */ }; Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20241003070650.62787-5-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 77 ++++++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 2caea7345c3e..6fcf122c1831 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -871,68 +871,71 @@ struct sdw_master_ops { * struct sdw_bus - SoundWire bus * @dev: Shortcut to &bus->md->dev to avoid changing the entire code. * @md: Master device - * @controller_id: system-unique controller ID. If set to -1, the bus @id will be used. - * @link_id: Link id number, can be 0 to N, unique for each Controller - * @id: bus system-wide unique id - * @slaves: list of Slaves on this bus - * @assigned: Bitmap for Slave device numbers. - * Bit set implies used number, bit clear implies unused number. + * @bus_lock_key: bus lock key associated to @bus_lock * @bus_lock: bus lock + * @slaves: list of Slaves on this bus + * @msg_lock_key: message lock key associated to @msg_lock * @msg_lock: message lock - * @compute_params: points to Bus resource management implementation - * @ops: Master callback ops - * @port_ops: Master port callback ops - * @params: Current bus parameters - * @prop: Master properties - * @vendor_specific_prop: pointer to non-standard properties * @m_rt_list: List of Master instance of all stream(s) running on Bus. This * is used to compute and program bus bandwidth, clock, frame shape, * transport and port parameters - * @debugfs: Bus debugfs - * @domain: IRQ domain * @defer_msg: Defer message - * @clk_stop_timeout: Clock stop timeout computed - * @bank_switch_timeout: Bank switch timeout computed - * @multi_link: Store bus property that indicates if multi links - * are supported. This flag is populated by drivers after reading - * appropriate firmware (ACPI/DT). + * @params: Current bus parameters + * @stream_refcount: number of streams currently using this bus + * @ops: Master callback ops + * @port_ops: Master port callback ops + * @prop: Master properties + * @vendor_specific_prop: pointer to non-standard properties * @hw_sync_min_links: Number of links used by a stream above which * hardware-based synchronization is required. This value is only * meaningful if multi_link is set. If set to 1, hardware-based * synchronization will be used even if a stream only uses a single * SoundWire segment. - * @stream_refcount: number of streams currently using this bus + * @controller_id: system-unique controller ID. If set to -1, the bus @id will be used. + * @link_id: Link id number, can be 0 to N, unique for each Controller + * @id: bus system-wide unique id + * @compute_params: points to Bus resource management implementation + * @assigned: Bitmap for Slave device numbers. + * Bit set implies used number, bit clear implies unused number. + * @clk_stop_timeout: Clock stop timeout computed + * @bank_switch_timeout: Bank switch timeout computed + * @domain: IRQ domain + * @irq_chip: IRQ chip + * @debugfs: Bus debugfs (optional) + * @multi_link: Store bus property that indicates if multi links + * are supported. This flag is populated by drivers after reading + * appropriate firmware (ACPI/DT). */ struct sdw_bus { struct device *dev; struct sdw_master_device *md; - int controller_id; - unsigned int link_id; - int id; - struct list_head slaves; - DECLARE_BITMAP(assigned, SDW_MAX_DEVICES); - struct mutex bus_lock; struct lock_class_key bus_lock_key; - struct mutex msg_lock; + struct mutex bus_lock; + struct list_head slaves; struct lock_class_key msg_lock_key; - int (*compute_params)(struct sdw_bus *bus); + struct mutex msg_lock; + struct list_head m_rt_list; + struct sdw_defer defer_msg; + struct sdw_bus_params params; + int stream_refcount; const struct sdw_master_ops *ops; const struct sdw_master_port_ops *port_ops; - struct sdw_bus_params params; struct sdw_master_prop prop; void *vendor_specific_prop; - struct list_head m_rt_list; + int hw_sync_min_links; + int controller_id; + unsigned int link_id; + int id; + int (*compute_params)(struct sdw_bus *bus); + DECLARE_BITMAP(assigned, SDW_MAX_DEVICES); + unsigned int clk_stop_timeout; + u32 bank_switch_timeout; + struct irq_chip irq_chip; + struct irq_domain *domain; #ifdef CONFIG_DEBUG_FS struct dentry *debugfs; #endif - struct irq_chip irq_chip; - struct irq_domain *domain; - struct sdw_defer defer_msg; - unsigned int clk_stop_timeout; - u32 bank_switch_timeout; bool multi_link; - int hw_sync_min_links; - int stream_refcount; }; int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent, -- cgit v1.2.3 From 1c758df5a83ea0c9b5055536336d8a586b5010b0 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 3 Oct 2024 15:06:41 +0800 Subject: soundwire: optimize sdw_slave_prop move pointers first, and move booleans together. before: struct sdw_slave_prop { u32 mipi_revision; /* 0 4 */ bool wake_capable; /* 4 1 */ bool test_mode_capable; /* 5 1 */ bool clk_stop_mode1; /* 6 1 */ bool simple_clk_stop_capable; /* 7 1 */ u32 clk_stop_timeout; /* 8 4 */ u32 ch_prep_timeout; /* 12 4 */ enum sdw_clk_stop_reset_behave reset_behave; /* 16 4 */ bool high_PHY_capable; /* 20 1 */ bool paging_support; /* 21 1 */ bool bank_delay_support; /* 22 1 */ /* XXX 1 byte hole, try to pack */ enum sdw_p15_behave p15_behave; /* 24 4 */ bool lane_control_support; /* 28 1 */ /* XXX 3 bytes hole, try to pack */ u32 master_count; /* 32 4 */ u32 source_ports; /* 36 4 */ u32 sink_ports; /* 40 4 */ /* XXX 4 bytes hole, try to pack */ struct sdw_dp0_prop * dp0_prop; /* 48 8 */ struct sdw_dpn_prop * src_dpn_prop; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ struct sdw_dpn_prop * sink_dpn_prop; /* 64 8 */ u8 scp_int1_mask; /* 72 1 */ /* XXX 3 bytes hole, try to pack */ u32 quirks; /* 76 4 */ bool clock_reg_supported; /* 80 1 */ bool use_domain_irq; /* 81 1 */ /* size: 88, cachelines: 2, members: 23 */ /* sum members: 71, holes: 4, sum holes: 11 */ /* padding: 6 */ /* last cacheline: 24 bytes */ }; after: truct sdw_slave_prop { struct sdw_dp0_prop * dp0_prop; /* 0 8 */ struct sdw_dpn_prop * src_dpn_prop; /* 8 8 */ struct sdw_dpn_prop * sink_dpn_prop; /* 16 8 */ u32 mipi_revision; /* 24 4 */ bool wake_capable; /* 28 1 */ bool test_mode_capable; /* 29 1 */ bool clk_stop_mode1; /* 30 1 */ bool simple_clk_stop_capable; /* 31 1 */ u32 clk_stop_timeout; /* 32 4 */ u32 ch_prep_timeout; /* 36 4 */ enum sdw_clk_stop_reset_behave reset_behave; /* 40 4 */ bool high_PHY_capable; /* 44 1 */ bool paging_support; /* 45 1 */ bool bank_delay_support; /* 46 1 */ bool lane_control_support; /* 47 1 */ enum sdw_p15_behave p15_behave; /* 48 4 */ u32 master_count; /* 52 4 */ u32 source_ports; /* 56 4 */ u32 sink_ports; /* 60 4 */ /* --- cacheline 1 boundary (64 bytes) --- */ u32 quirks; /* 64 4 */ u8 scp_int1_mask; /* 68 1 */ bool clock_reg_supported; /* 69 1 */ bool use_domain_irq; /* 70 1 */ /* size: 72, cachelines: 2, members: 23 */ /* padding: 1 */ /* last cacheline: 8 bytes */ }; Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20241003070650.62787-6-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 6fcf122c1831..38db81f5bdb9 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -344,6 +344,9 @@ struct sdw_dpn_prop { /** * struct sdw_slave_prop - SoundWire Slave properties + * @dp0_prop: Data Port 0 properties + * @src_dpn_prop: Source Data Port N properties + * @sink_dpn_prop: Sink Data Port N properties * @mipi_revision: Spec version of the implementation * @wake_capable: Wake-up events are supported * @test_mode_capable: If test mode is supported @@ -360,15 +363,12 @@ struct sdw_dpn_prop { * SCP_AddrPage2 * @bank_delay_support: Slave implements bank delay/bridge support registers * SCP_BankDelay and SCP_NextFrame + * @lane_control_support: Slave supports lane control * @p15_behave: Slave behavior when the Master attempts a read to the Port15 * alias - * @lane_control_support: Slave supports lane control * @master_count: Number of Masters present on this Slave * @source_ports: Bitmap identifying source ports * @sink_ports: Bitmap identifying sink ports - * @dp0_prop: Data Port 0 properties - * @src_dpn_prop: Source Data Port N properties - * @sink_dpn_prop: Sink Data Port N properties * @scp_int1_mask: SCP_INT1_MASK desired settings * @quirks: bitmask identifying deltas from the MIPI specification * @clock_reg_supported: the Peripheral implements the clock base and scale @@ -377,6 +377,9 @@ struct sdw_dpn_prop { * @use_domain_irq: call actual IRQ handler on slave, as well as callback */ struct sdw_slave_prop { + struct sdw_dp0_prop *dp0_prop; + struct sdw_dpn_prop *src_dpn_prop; + struct sdw_dpn_prop *sink_dpn_prop; u32 mipi_revision; bool wake_capable; bool test_mode_capable; @@ -388,16 +391,13 @@ struct sdw_slave_prop { bool high_PHY_capable; bool paging_support; bool bank_delay_support; - enum sdw_p15_behave p15_behave; bool lane_control_support; + enum sdw_p15_behave p15_behave; u32 master_count; u32 source_ports; u32 sink_ports; - struct sdw_dp0_prop *dp0_prop; - struct sdw_dpn_prop *src_dpn_prop; - struct sdw_dpn_prop *sink_dpn_prop; - u8 scp_int1_mask; u32 quirks; + u8 scp_int1_mask; bool clock_reg_supported; bool use_domain_irq; }; -- cgit v1.2.3 From 557e28f8b53243097162cf4d3e59bcee9fb9713b Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 3 Oct 2024 15:06:42 +0800 Subject: soundwire: optimize sdw_dp0_prop Move pointers and booleans. Before: struct sdw_dp0_prop { u32 max_word; /* 0 4 */ u32 min_word; /* 4 4 */ u32 num_words; /* 8 4 */ /* XXX 4 bytes hole, try to pack */ u32 * words; /* 16 8 */ bool BRA_flow_controlled; /* 24 1 */ bool simple_ch_prep_sm; /* 25 1 */ /* XXX 2 bytes hole, try to pack */ u32 ch_prep_timeout; /* 28 4 */ bool imp_def_interrupts; /* 32 1 */ /* size: 40, cachelines: 1, members: 8 */ /* sum members: 27, holes: 2, sum holes: 6 */ /* padding: 7 */ /* last cacheline: 40 bytes */ }; after: struct sdw_dp0_prop { u32 * words; /* 0 8 */ u32 max_word; /* 8 4 */ u32 min_word; /* 12 4 */ u32 num_words; /* 16 4 */ u32 ch_prep_timeout; /* 20 4 */ bool BRA_flow_controlled; /* 24 1 */ bool simple_ch_prep_sm; /* 25 1 */ bool imp_def_interrupts; /* 26 1 */ /* size: 32, cachelines: 1, members: 8 */ /* padding: 5 */ /* last cacheline: 32 bytes */ }; Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20241003070650.62787-7-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 38db81f5bdb9..c72095137a35 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -226,16 +226,16 @@ enum sdw_clk_stop_mode { /** * struct sdw_dp0_prop - DP0 properties + * @words: wordlengths supported * @max_word: Maximum number of bits in a Payload Channel Sample, 1 to 64 * (inclusive) * @min_word: Minimum number of bits in a Payload Channel Sample, 1 to 64 * (inclusive) * @num_words: number of wordlengths supported - * @words: wordlengths supported + * @ch_prep_timeout: Port-specific timeout value, in milliseconds * @BRA_flow_controlled: Slave implementation results in an OK_NotReady * response * @simple_ch_prep_sm: If channel prepare sequence is required - * @ch_prep_timeout: Port-specific timeout value, in milliseconds * @imp_def_interrupts: If set, each bit corresponds to support for * implementation-defined interrupts * @@ -244,13 +244,13 @@ enum sdw_clk_stop_mode { * support */ struct sdw_dp0_prop { + u32 *words; u32 max_word; u32 min_word; u32 num_words; - u32 *words; + u32 ch_prep_timeout; bool BRA_flow_controlled; bool simple_ch_prep_sm; - u32 ch_prep_timeout; bool imp_def_interrupts; }; -- cgit v1.2.3 From 9942f90bdcc035eb5f01d7343dac99bd805ef3ec Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 3 Oct 2024 15:06:43 +0800 Subject: soundwire: optimize sdw_dpn_prop before: struct sdw_dpn_prop { u32 num; /* 0 4 */ u32 max_word; /* 4 4 */ u32 min_word; /* 8 4 */ u32 num_words; /* 12 4 */ u32 * words; /* 16 8 */ enum sdw_dpn_type type; /* 24 4 */ u32 max_grouping; /* 28 4 */ bool simple_ch_prep_sm; /* 32 1 */ /* XXX 3 bytes hole, try to pack */ u32 ch_prep_timeout; /* 36 4 */ u32 imp_def_interrupts; /* 40 4 */ u32 max_ch; /* 44 4 */ u32 min_ch; /* 48 4 */ u32 num_channels; /* 52 4 */ u32 * channels; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ u32 num_ch_combinations; /* 64 4 */ /* XXX 4 bytes hole, try to pack */ u32 * ch_combinations; /* 72 8 */ u32 modes; /* 80 4 */ u32 max_async_buffer; /* 84 4 */ bool block_pack_mode; /* 88 1 */ bool read_only_wordlength; /* 89 1 */ /* XXX 2 bytes hole, try to pack */ u32 port_encoding; /* 92 4 */ struct sdw_dpn_audio_mode * audio_modes; /* 96 8 */ /* size: 104, cachelines: 2, members: 22 */ /* sum members: 95, holes: 3, sum holes: 9 */ /* last cacheline: 40 bytes */ }; after: struct sdw_dpn_prop { struct sdw_dpn_audio_mode * audio_modes; /* 0 8 */ u32 num; /* 8 4 */ u32 max_word; /* 12 4 */ u32 min_word; /* 16 4 */ u32 num_words; /* 20 4 */ u32 * words; /* 24 8 */ enum sdw_dpn_type type; /* 32 4 */ u32 max_grouping; /* 36 4 */ u32 ch_prep_timeout; /* 40 4 */ u32 imp_def_interrupts; /* 44 4 */ u32 max_ch; /* 48 4 */ u32 min_ch; /* 52 4 */ u32 num_channels; /* 56 4 */ u32 num_ch_combinations; /* 60 4 */ /* --- cacheline 1 boundary (64 bytes) --- */ u32 * channels; /* 64 8 */ u32 * ch_combinations; /* 72 8 */ u32 modes; /* 80 4 */ u32 max_async_buffer; /* 84 4 */ u32 port_encoding; /* 88 4 */ bool block_pack_mode; /* 92 1 */ bool read_only_wordlength; /* 93 1 */ bool simple_ch_prep_sm; /* 94 1 */ /* size: 96, cachelines: 2, members: 22 */ /* padding: 1 */ /* last cacheline: 32 bytes */ }; Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20241003070650.62787-8-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index c72095137a35..cc0afb8af333 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -288,6 +288,7 @@ struct sdw_dpn_audio_mode { /** * struct sdw_dpn_prop - Data Port DPn properties + * @audio_modes: Audio modes supported * @num: port number * @max_word: Maximum number of bits in a Payload Channel Sample, 1 to 64 * (inclusive) @@ -298,26 +299,26 @@ struct sdw_dpn_audio_mode { * @type: Data port type. Full, Simplified or Reduced * @max_grouping: Maximum number of samples that can be grouped together for * a full data port - * @simple_ch_prep_sm: If the port supports simplified channel prepare state - * machine * @ch_prep_timeout: Port-specific timeout value, in milliseconds * @imp_def_interrupts: If set, each bit corresponds to support for * implementation-defined interrupts * @max_ch: Maximum channels supported * @min_ch: Minimum channels supported * @num_channels: Number of discrete channels supported - * @channels: Discrete channels supported * @num_ch_combinations: Number of channel combinations supported + * @channels: Discrete channels supported * @ch_combinations: Channel combinations supported * @modes: SDW mode supported * @max_async_buffer: Number of samples that this port can buffer in * asynchronous modes + * @port_encoding: Payload Channel Sample encoding schemes supported * @block_pack_mode: Type of block port mode supported * @read_only_wordlength: Read Only wordlength field in DPN_BlockCtrl1 register - * @port_encoding: Payload Channel Sample encoding schemes supported - * @audio_modes: Audio modes supported + * @simple_ch_prep_sm: If the port supports simplified channel prepare state + * machine */ struct sdw_dpn_prop { + struct sdw_dpn_audio_mode *audio_modes; u32 num; u32 max_word; u32 min_word; @@ -325,21 +326,20 @@ struct sdw_dpn_prop { u32 *words; enum sdw_dpn_type type; u32 max_grouping; - bool simple_ch_prep_sm; u32 ch_prep_timeout; u32 imp_def_interrupts; u32 max_ch; u32 min_ch; u32 num_channels; - u32 *channels; u32 num_ch_combinations; + u32 *channels; u32 *ch_combinations; u32 modes; u32 max_async_buffer; + u32 port_encoding; bool block_pack_mode; bool read_only_wordlength; - u32 port_encoding; - struct sdw_dpn_audio_mode *audio_modes; + bool simple_ch_prep_sm; }; /** -- cgit v1.2.3 From 1ae4aa59d79399be0591c8d78c44e280406e2c34 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 3 Oct 2024 15:06:44 +0800 Subject: soundwire: mipi-disco: remove DPn audio-modes The concept of DPn audio-modes was never used by anyone, and was removed from the DisCo for SoundWire 2.0 specification. Remove the definitions and TODO. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20241003070650.62787-9-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/mipi_disco.c | 2 -- include/linux/soundwire/sdw.h | 34 ---------------------------------- 2 files changed, 36 deletions(-) (limited to 'include') diff --git a/drivers/soundwire/mipi_disco.c b/drivers/soundwire/mipi_disco.c index fdab3d4a1379..79cf8212f97a 100644 --- a/drivers/soundwire/mipi_disco.c +++ b/drivers/soundwire/mipi_disco.c @@ -304,8 +304,6 @@ static int sdw_slave_read_dpn(struct sdw_slave *slave, fwnode_property_read_u32(node, "mipi-sdw-port-encoding-type", &dpn[i].port_encoding); - /* TODO: Read audio mode */ - fwnode_handle_put(node); i++; diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index cc0afb8af333..66feaa79ecfc 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -254,41 +254,8 @@ struct sdw_dp0_prop { bool imp_def_interrupts; }; -/** - * struct sdw_dpn_audio_mode - Audio mode properties for DPn - * @bus_min_freq: Minimum bus frequency, in Hz - * @bus_max_freq: Maximum bus frequency, in Hz - * @bus_num_freq: Number of discrete frequencies supported - * @bus_freq: Discrete bus frequencies, in Hz - * @min_freq: Minimum sampling frequency, in Hz - * @max_freq: Maximum sampling bus frequency, in Hz - * @num_freq: Number of discrete sampling frequency supported - * @freq: Discrete sampling frequencies, in Hz - * @prep_ch_behave: Specifies the dependencies between Channel Prepare - * sequence and bus clock configuration - * If 0, Channel Prepare can happen at any Bus clock rate - * If 1, Channel Prepare sequence shall happen only after Bus clock is - * changed to a frequency supported by this mode or compatible modes - * described by the next field - * @glitchless: Bitmap describing possible glitchless transitions from this - * Audio Mode to other Audio Modes - */ -struct sdw_dpn_audio_mode { - u32 bus_min_freq; - u32 bus_max_freq; - u32 bus_num_freq; - u32 *bus_freq; - u32 max_freq; - u32 min_freq; - u32 num_freq; - u32 *freq; - u32 prep_ch_behave; - u32 glitchless; -}; - /** * struct sdw_dpn_prop - Data Port DPn properties - * @audio_modes: Audio modes supported * @num: port number * @max_word: Maximum number of bits in a Payload Channel Sample, 1 to 64 * (inclusive) @@ -318,7 +285,6 @@ struct sdw_dpn_audio_mode { * machine */ struct sdw_dpn_prop { - struct sdw_dpn_audio_mode *audio_modes; u32 num; u32 max_word; u32 min_word; -- cgit v1.2.3 From 543bd28a3bfeff31f748ba83348b63313dd37ff9 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 3 Oct 2024 15:06:49 +0800 Subject: soundwire: mipi-disco: add new properties from 2.0 spec The DisCo for SoundWire 2.0 spec adds support for new 'mipi-sdw-sdca-interrupt-register-list' and 'mipi-sdw-commit-register-supported'. This patch only adds the definitions and property reads, but the use of these properties will come at some point in the future when needed. Note a slight conceptual disconnect between the MIPI DisCo definition of a boolean property and the Linux implementation. The latter only checks the presence of the property to set its value to 'true', whereas the MIPI definitions allow for a property with a 'false' value. This patch uses the new introduced mipi_device_property_read_bool() to handle it. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20241003070650.62787-14-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/mipi_disco.c | 6 ++++++ include/linux/soundwire/sdw.h | 7 ++++++- 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/soundwire/mipi_disco.c b/drivers/soundwire/mipi_disco.c index d6eb63bf1252..36e734751225 100644 --- a/drivers/soundwire/mipi_disco.c +++ b/drivers/soundwire/mipi_disco.c @@ -398,6 +398,12 @@ int sdw_slave_read_prop(struct sdw_slave *slave) device_property_read_u32(dev, "mipi-sdw-sink-port-list", &prop->sink_ports); + device_property_read_u32(dev, "mipi-sdw-sdca-interrupt-register-list", + &prop->sdca_interrupt_register_list); + + prop->commit_register_supported = mipi_device_property_read_bool(dev, + "mipi-sdw-commit-register-supported"); + /* * Read dp0 properties - we don't rely on the 'mipi-sdw-dp-0-supported' * property since the 'mipi-sdw-dp0-subproperties' property is logically diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 66feaa79ecfc..952514f044f0 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -335,8 +335,11 @@ struct sdw_dpn_prop { * @master_count: Number of Masters present on this Slave * @source_ports: Bitmap identifying source ports * @sink_ports: Bitmap identifying sink ports - * @scp_int1_mask: SCP_INT1_MASK desired settings * @quirks: bitmask identifying deltas from the MIPI specification + * @sdca_interrupt_register_list: indicates which sets of SDCA interrupt status + * and masks are supported + * @commit_register_supported: is PCP_Commit register supported + * @scp_int1_mask: SCP_INT1_MASK desired settings * @clock_reg_supported: the Peripheral implements the clock base and scale * registers introduced with the SoundWire 1.2 specification. SDCA devices * do not need to set this boolean property as the registers are required. @@ -363,6 +366,8 @@ struct sdw_slave_prop { u32 source_ports; u32 sink_ports; u32 quirks; + u32 sdca_interrupt_register_list; + u8 commit_register_supported; u8 scp_int1_mask; bool clock_reg_supported; bool use_domain_irq; -- cgit v1.2.3 From 71b405b184449fffcb76ea0814104b71dfdb2aee Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 3 Oct 2024 15:06:50 +0800 Subject: soundwire: mipi-disco: add support for DP0/DPn 'lane-list' property The SoundWire specification did not clearly require that ports could use all Lanes. Some SoundWire/SDCA peripheral adopters added restrictions on which lanes can be used by what port, and the DisCo for SoundWire 2.1 specification added a 'lane-list' property to model this hardware limitation. When not specified, the ports can use all Lanes. Otherwise, the 'lane-list' indicates which Lanes can be used, sorted by order of preference (most-preferred-first). This patch only reads the properties, the use of this property will come at a later time with multi-lane support. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20241003070650.62787-15-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/mipi_disco.c | 32 ++++++++++++++++++++++++++++++++ include/linux/soundwire/sdw.h | 8 ++++++++ 2 files changed, 40 insertions(+) (limited to 'include') diff --git a/drivers/soundwire/mipi_disco.c b/drivers/soundwire/mipi_disco.c index 36e734751225..9d59f486edbe 100644 --- a/drivers/soundwire/mipi_disco.c +++ b/drivers/soundwire/mipi_disco.c @@ -197,6 +197,22 @@ static int sdw_slave_read_dp0(struct sdw_slave *slave, dp0->imp_def_interrupts = mipi_fwnode_property_read_bool(port, "mipi-sdw-imp-def-dp0-interrupts-supported"); + nval = fwnode_property_count_u32(port, "mipi-sdw-lane-list"); + if (nval > 0) { + dp0->num_lanes = nval; + dp0->lane_list = devm_kcalloc(&slave->dev, + dp0->num_lanes, sizeof(*dp0->lane_list), + GFP_KERNEL); + if (!dp0->lane_list) + return -ENOMEM; + + ret = fwnode_property_read_u32_array(port, + "mipi-sdw-lane-list", + dp0->lane_list, dp0->num_lanes); + if (ret < 0) + return ret; + } + return 0; } @@ -326,6 +342,22 @@ static int sdw_slave_read_dpn(struct sdw_slave *slave, fwnode_property_read_u32(node, "mipi-sdw-port-encoding-type", &dpn[i].port_encoding); + nval = fwnode_property_count_u32(node, "mipi-sdw-lane-list"); + if (nval > 0) { + dpn[i].num_lanes = nval; + dpn[i].lane_list = devm_kcalloc(&slave->dev, + dpn[i].num_lanes, sizeof(*dpn[i].lane_list), + GFP_KERNEL); + if (!dpn[i].lane_list) + return -ENOMEM; + + ret = fwnode_property_read_u32_array(node, + "mipi-sdw-lane-list", + dpn[i].lane_list, dpn[i].num_lanes); + if (ret < 0) + return ret; + } + fwnode_handle_put(node); i++; diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 952514f044f0..73f655334fe9 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -238,6 +238,8 @@ enum sdw_clk_stop_mode { * @simple_ch_prep_sm: If channel prepare sequence is required * @imp_def_interrupts: If set, each bit corresponds to support for * implementation-defined interrupts + * @num_lanes: array size of @lane_list + * @lane_list: indicates which Lanes can be used by DP0 * * The wordlengths are specified by Spec as max, min AND number of * discrete values, implementation can define based on the wordlengths they @@ -252,6 +254,8 @@ struct sdw_dp0_prop { bool BRA_flow_controlled; bool simple_ch_prep_sm; bool imp_def_interrupts; + int num_lanes; + u32 *lane_list; }; /** @@ -275,6 +279,8 @@ struct sdw_dp0_prop { * @num_ch_combinations: Number of channel combinations supported * @channels: Discrete channels supported * @ch_combinations: Channel combinations supported + * @lane_list: indicates which Lanes can be used by DPn + * @num_lanes: array size of @lane_list * @modes: SDW mode supported * @max_async_buffer: Number of samples that this port can buffer in * asynchronous modes @@ -300,6 +306,8 @@ struct sdw_dpn_prop { u32 num_ch_combinations; u32 *channels; u32 *ch_combinations; + u32 *lane_list; + int num_lanes; u32 modes; u32 max_async_buffer; u32 port_encoding; -- cgit v1.2.3 From e26a0c5d828b225b88f534e2fcf10bf617f85f23 Mon Sep 17 00:00:00 2001 From: Shradha Gupta Date: Sun, 29 Sep 2024 20:44:35 -0700 Subject: net: mana: Increase the DEF_RX_BUFFERS_PER_QUEUE to 1024 Through some experiments, we found out that increasing the default RX buffers count from 512 to 1024, gives slightly better throughput and significantly reduces the no_wqe_rx errs on the receiver side. Along with these, other parameters like cpu usage, retrans seg etc also show some improvement with 1024 value. Following are some snippets from the experiments ntttcp tests with 512 Rx buffers --------------------------------------- connections| throughput| no_wqe errs| --------------------------------------- 1 | 40.93Gbps | 123,211 | 16 | 180.15Gbps | 190,120 | 128 | 180.20Gbps | 173,508 | 256 | 180.27Gbps | 189,884 | ntttcp tests with 1024 Rx buffers --------------------------------------- connections| throughput| no_wqe errs| --------------------------------------- 1 | 44.22Gbps | 19,864 | 16 | 180.19Gbps | 4,430 | 128 | 180.21Gbps | 2,560 | 256 | 180.29Gbps | 1,529 | So, increasing the default RX buffers per queue count to 1024 Signed-off-by: Shradha Gupta Reviewed-by: Haiyang Zhang Reviewed-by: Pavan Chebbi Link: https://patch.msgid.link/1727667875-29908-1-git-send-email-shradhagupta@linux.microsoft.com Signed-off-by: Paolo Abeni --- include/net/mana/mana.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index f2a5200d8a0f..9b0faa24b758 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -43,7 +43,7 @@ enum TRI_STATE { * size beyond this value gets rejected by __alloc_page() call. */ #define MAX_RX_BUFFERS_PER_QUEUE 8192 -#define DEF_RX_BUFFERS_PER_QUEUE 512 +#define DEF_RX_BUFFERS_PER_QUEUE 1024 #define MIN_RX_BUFFERS_PER_QUEUE 128 /* This max value for TX buffers is derived as the maximum allocatable -- cgit v1.2.3 From aa3ab3336e6052dba95dac9f5c268eb7d1823002 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 30 Sep 2024 14:33:19 +0200 Subject: scsi: libfcoe: Include instead of Substitute the inclusion of header with to allow the removal of legacy inclusion of from . Signed-off-by: Uros Bizjak Reviewed-by: Bart Van Assche Cc: Hannes Reinecke Cc: James E.J. Bottomley Cc: Martin K. Petersen Signed-off-by: Jason A. Donenfeld --- include/scsi/libfcoe.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/scsi/libfcoe.h b/include/scsi/libfcoe.h index 3c5899290aed..6616348e59b9 100644 --- a/include/scsi/libfcoe.h +++ b/include/scsi/libfcoe.h @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include -- cgit v1.2.3 From 5b3fdc9f2ff10d3cee106ddaa0ee6636c7de381e Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 30 Sep 2024 14:33:29 +0200 Subject: random: Do not include in Files that use prandom infrastructure are now converted to use header instead of . Remove the legacy inclusion of from . This is the "nice cleanup" part, wished in c0842fbc1b18. Signed-off-by: Uros Bizjak Fixes: c0842fbc1b18 ("random32: move the pseudo-random 32-bit definitions to prandom.h") Cc: Theodore Ts'o Cc: Jason A. Donenfeld Cc: Linus Torvalds Signed-off-by: Jason A. Donenfeld --- include/linux/random.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/random.h b/include/linux/random.h index b0a940af4fff..333cecfca93f 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -145,13 +145,6 @@ declare_get_random_var_wait(u64, u32) declare_get_random_var_wait(long, unsigned long) #undef declare_get_random_var -/* - * This is designed to be standalone for just prandom - * users, but for now we include it from - * for legacy reasons. - */ -#include - #ifdef CONFIG_SMP int random_prepare_cpu(unsigned int cpu); int random_online_cpu(unsigned int cpu); -- cgit v1.2.3 From d18c13697b4dcbf6a8f06c3d8e564c4f5ad1477c Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 30 Sep 2024 14:33:30 +0200 Subject: prandom: Include in include was removed from in d9f29deb7fe8 ("prandom: Remove unused include") because this inclusion broke arm64 due to a circular dependency on include files. __percpu tag is defined in include/linux/compiler_types.h, so there is currently no direct need for the inclusion of . However, in [1] we would like to repurpose __percpu tag as a named address space qualifier, where __percpu macro uses defines from . The circular dependency was removed in ddd8e37ebaa1 ("random: Do not include in ") and it cleared the path for the inclusion of in . This patch is basically a revert of d9f29deb7fe8 ("prandom: Remove unused include"). [1] https://lore.kernel.org/lkml/20240812115945.484051-4-ubizjak@gmail.com/ Signed-off-by: Uros Bizjak Cc: Theodore Ts'o Cc: Jason A. Donenfeld Cc: Kent Overstreet Signed-off-by: Jason A. Donenfeld --- include/linux/prandom.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/prandom.h b/include/linux/prandom.h index f7f1e5251c67..f2ed5b72b3d6 100644 --- a/include/linux/prandom.h +++ b/include/linux/prandom.h @@ -10,6 +10,7 @@ #include #include +#include #include struct rnd_state { -- cgit v1.2.3 From 7788d320ba5ecbfa88d0be8c32ef8f018f2f020f Mon Sep 17 00:00:00 2001 From: Antonino Maniscalco Date: Thu, 3 Oct 2024 18:12:59 +0200 Subject: drm/msm/a6xx: Add a flag to allow preemption to submitqueue_create Some userspace changes are necessary so add a flag for userspace to advertise support for preemption when creating the submitqueue. When this flag is not set preemption will not be allowed in the middle of the submitted IBs therefore mantaining compatibility with older userspace. The flag is rejected if preemption is not supported on the target, this allows userspace to know whether preemption is supported. Tested-by: Rob Clark Tested-by: Neil Armstrong # on SM8650-QRD Tested-by: Neil Armstrong # on SM8550-QRD Tested-by: Neil Armstrong # on SM8450-HDK Signed-off-by: Antonino Maniscalco Patchwork: https://patchwork.freedesktop.org/patch/618028/ Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 12 ++++++++---- drivers/gpu/drm/msm/msm_submitqueue.c | 3 +++ include/uapi/drm/msm_drm.h | 5 ++++- 3 files changed, 15 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index be306ae33c08..a0ef86987c5f 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -352,8 +352,10 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) OUT_PKT7(ring, CP_SET_MARKER, 1); OUT_RING(ring, 0x101); /* IFPC disable */ - OUT_PKT7(ring, CP_SET_MARKER, 1); - OUT_RING(ring, 0x00d); /* IB1LIST start */ + if (submit->queue->flags & MSM_SUBMITQUEUE_ALLOW_PREEMPT) { + OUT_PKT7(ring, CP_SET_MARKER, 1); + OUT_RING(ring, 0x00d); /* IB1LIST start */ + } /* Submit the commands */ for (i = 0; i < submit->nr_cmds; i++) { @@ -384,8 +386,10 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) update_shadow_rptr(gpu, ring); } - OUT_PKT7(ring, CP_SET_MARKER, 1); - OUT_RING(ring, 0x00e); /* IB1LIST end */ + if (submit->queue->flags & MSM_SUBMITQUEUE_ALLOW_PREEMPT) { + OUT_PKT7(ring, CP_SET_MARKER, 1); + OUT_RING(ring, 0x00e); /* IB1LIST end */ + } get_stats_counter(ring, REG_A7XX_RBBM_PERFCTR_CP(0), rbmemptr_stats(ring, index, cpcycles_end)); diff --git a/drivers/gpu/drm/msm/msm_submitqueue.c b/drivers/gpu/drm/msm/msm_submitqueue.c index 0e803125a325..9b3ffca3f3b4 100644 --- a/drivers/gpu/drm/msm/msm_submitqueue.c +++ b/drivers/gpu/drm/msm/msm_submitqueue.c @@ -170,6 +170,9 @@ int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx, if (!priv->gpu) return -ENODEV; + if (flags & MSM_SUBMITQUEUE_ALLOW_PREEMPT && priv->gpu->nr_rings == 1) + return -EINVAL; + ret = msm_gpu_convert_priority(priv->gpu, prio, &ring_nr, &sched_prio); if (ret) return ret; diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index 2377147b6af0..b916aab80dde 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -347,7 +347,10 @@ struct drm_msm_gem_madvise { * backwards compatibility as a "default" submitqueue */ -#define MSM_SUBMITQUEUE_FLAGS (0) +#define MSM_SUBMITQUEUE_ALLOW_PREEMPT 0x00000001 +#define MSM_SUBMITQUEUE_FLAGS ( \ + MSM_SUBMITQUEUE_ALLOW_PREEMPT | \ + 0) /* * The submitqueue priority should be between 0 and MSM_PARAM_PRIORITIES-1, -- cgit v1.2.3 From 816ad8f1e498fe5e9e992da137316302219f2137 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 2 Oct 2024 14:51:52 -0700 Subject: lib: packing: remove kernel-doc from header file It is not necessary to have the kernel-doc duplicated both in the header and in the implementation. It is better to have it near the implementation of the function, since in C, a function can have N declarations, but only one definition. Signed-off-by: Vladimir Oltean Signed-off-by: Jacob Keller Reviewed-by: Przemek Kitszel Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20241002-packing-kunit-tests-and-split-pack-unpack-v2-3-8373e551eae3@intel.com Signed-off-by: Jakub Kicinski --- include/linux/packing.h | 26 -------------------------- 1 file changed, 26 deletions(-) (limited to 'include') diff --git a/include/linux/packing.h b/include/linux/packing.h index 8d6571feb95d..69baefebcd02 100644 --- a/include/linux/packing.h +++ b/include/linux/packing.h @@ -17,32 +17,6 @@ enum packing_op { UNPACK, }; -/** - * packing - Convert numbers (currently u64) between a packed and an unpacked - * format. Unpacked means laid out in memory in the CPU's native - * understanding of integers, while packed means anything else that - * requires translation. - * - * @pbuf: Pointer to a buffer holding the packed value. - * @uval: Pointer to an u64 holding the unpacked value. - * @startbit: The index (in logical notation, compensated for quirks) where - * the packed value starts within pbuf. Must be larger than, or - * equal to, endbit. - * @endbit: The index (in logical notation, compensated for quirks) where - * the packed value ends within pbuf. Must be smaller than, or equal - * to, startbit. - * @op: If PACK, then uval will be treated as const pointer and copied (packed) - * into pbuf, between startbit and endbit. - * If UNPACK, then pbuf will be treated as const pointer and the logical - * value between startbit and endbit will be copied (unpacked) to uval. - * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and - * QUIRK_MSB_ON_THE_RIGHT. - * - * Return: 0 on success, EINVAL or ERANGE if called incorrectly. Assuming - * correct usage, return code may be discarded. - * If op is PACK, pbuf is modified. - * If op is UNPACK, uval is modified. - */ int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen, enum packing_op op, u8 quirks); -- cgit v1.2.3 From 7263f64e16d90ded44ce211381b2def83db32fd9 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 2 Oct 2024 14:51:53 -0700 Subject: lib: packing: add pack() and unpack() wrappers over packing() Geert Uytterhoeven described packing() as "really bad API" because of not being able to enforce const correctness. The same function is used both when "pbuf" is input and "uval" is output, as in the other way around. Create 2 wrapper functions where const correctness can be ensured. Do ugly type casts inside, to be able to reuse packing() as currently implemented - which will _not_ modify the input argument. Also, take the opportunity to change the type of startbit and endbit to size_t - an unsigned type - in these new function prototypes. When int, an extra check for negative values is necessary. Hopefully, when packing() goes away completely, that check can be dropped. My concern is that code which does rely on the conditional directionality of packing() is harder to refactor without blowing up in size. So it may take a while to completely eliminate packing(). But let's make alternatives available for those who do not need that. Link: https://lore.kernel.org/netdev/20210223112003.2223332-1-geert+renesas@glider.be/ Signed-off-by: Vladimir Oltean Signed-off-by: Jacob Keller Reviewed-by: Przemek Kitszel Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20241002-packing-kunit-tests-and-split-pack-unpack-v2-4-8373e551eae3@intel.com Signed-off-by: Jakub Kicinski --- include/linux/packing.h | 6 ++++++ lib/packing.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) (limited to 'include') diff --git a/include/linux/packing.h b/include/linux/packing.h index 69baefebcd02..ea25cb93cc70 100644 --- a/include/linux/packing.h +++ b/include/linux/packing.h @@ -20,4 +20,10 @@ enum packing_op { int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen, enum packing_op op, u8 quirks); +int pack(void *pbuf, const u64 *uval, size_t startbit, size_t endbit, + size_t pbuflen, u8 quirks); + +int unpack(const void *pbuf, u64 *uval, size_t startbit, size_t endbit, + size_t pbuflen, u8 quirks); + #endif diff --git a/lib/packing.c b/lib/packing.c index 435236a914fe..2922db8a528c 100644 --- a/lib/packing.c +++ b/lib/packing.c @@ -90,6 +90,8 @@ static int calculate_box_addr(int box, size_t len, u8 quirks) * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and * QUIRK_MSB_ON_THE_RIGHT. * + * Note: this is deprecated, prefer to use pack() or unpack() in new code. + * * Return: 0 on success, EINVAL or ERANGE if called incorrectly. Assuming * correct usage, return code may be discarded. * If op is PACK, pbuf is modified. @@ -216,4 +218,56 @@ int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen, } EXPORT_SYMBOL(packing); +/** + * pack - Pack u64 number into bitfield of buffer. + * + * @pbuf: Pointer to a buffer holding the packed value. + * @uval: Pointer to an u64 holding the unpacked value. + * @startbit: The index (in logical notation, compensated for quirks) where + * the packed value starts within pbuf. Must be larger than, or + * equal to, endbit. + * @endbit: The index (in logical notation, compensated for quirks) where + * the packed value ends within pbuf. Must be smaller than, or equal + * to, startbit. + * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf. + * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and + * QUIRK_MSB_ON_THE_RIGHT. + * + * Return: 0 on success, EINVAL or ERANGE if called incorrectly. Assuming + * correct usage, return code may be discarded. The @pbuf memory will + * be modified on success. + */ +int pack(void *pbuf, const u64 *uval, size_t startbit, size_t endbit, + size_t pbuflen, u8 quirks) +{ + return packing(pbuf, (u64 *)uval, startbit, endbit, pbuflen, PACK, quirks); +} +EXPORT_SYMBOL(pack); + +/** + * unpack - Unpack u64 number from packed buffer. + * + * @pbuf: Pointer to a buffer holding the packed value. + * @uval: Pointer to an u64 holding the unpacked value. + * @startbit: The index (in logical notation, compensated for quirks) where + * the packed value starts within pbuf. Must be larger than, or + * equal to, endbit. + * @endbit: The index (in logical notation, compensated for quirks) where + * the packed value ends within pbuf. Must be smaller than, or equal + * to, startbit. + * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf. + * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and + * QUIRK_MSB_ON_THE_RIGHT. + * + * Return: 0 on success, EINVAL or ERANGE if called incorrectly. Assuming + * correct usage, return code may be discarded. The @uval will be + * modified on success. + */ +int unpack(const void *pbuf, u64 *uval, size_t startbit, size_t endbit, + size_t pbuflen, u8 quirks) +{ + return packing((void *)pbuf, uval, startbit, endbit, pbuflen, UNPACK, quirks); +} +EXPORT_SYMBOL(unpack); + MODULE_DESCRIPTION("Generic bitfield packing and unpacking"); -- cgit v1.2.3 From 28aec9ca29f05dabb835293acc6e202bf51b8092 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 2 Oct 2024 14:51:54 -0700 Subject: lib: packing: duplicate pack() and unpack() implementations packing() is now used in some hot paths, and it would be good to get rid of some ifs and buts that depend on "op", to speed things up a little bit. With the main implementations now taking size_t endbit, we no longer have to check for negative values. Update the local integer variables to also be size_t to match. Signed-off-by: Vladimir Oltean Signed-off-by: Jacob Keller Reviewed-by: Przemek Kitszel Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20241002-packing-kunit-tests-and-split-pack-unpack-v2-5-8373e551eae3@intel.com Signed-off-by: Jakub Kicinski --- include/linux/packing.h | 4 +- lib/packing.c | 221 ++++++++++++++++++++++++++++++------------------ 2 files changed, 143 insertions(+), 82 deletions(-) (limited to 'include') diff --git a/include/linux/packing.h b/include/linux/packing.h index ea25cb93cc70..5d36dcd06f60 100644 --- a/include/linux/packing.h +++ b/include/linux/packing.h @@ -20,8 +20,8 @@ enum packing_op { int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen, enum packing_op op, u8 quirks); -int pack(void *pbuf, const u64 *uval, size_t startbit, size_t endbit, - size_t pbuflen, u8 quirks); +int pack(void *pbuf, u64 uval, size_t startbit, size_t endbit, size_t pbuflen, + u8 quirks); int unpack(const void *pbuf, u64 *uval, size_t startbit, size_t endbit, size_t pbuflen, u8 quirks); diff --git a/lib/packing.c b/lib/packing.c index 2922db8a528c..500184530d07 100644 --- a/lib/packing.c +++ b/lib/packing.c @@ -9,11 +9,11 @@ #include #include -static void adjust_for_msb_right_quirk(u64 *to_write, int *box_start_bit, - int *box_end_bit, u8 *box_mask) +static void adjust_for_msb_right_quirk(u64 *to_write, size_t *box_start_bit, + size_t *box_end_bit, u8 *box_mask) { - int box_bit_width = *box_start_bit - *box_end_bit + 1; - int new_box_start_bit, new_box_end_bit; + size_t box_bit_width = *box_start_bit - *box_end_bit + 1; + size_t new_box_start_bit, new_box_end_bit; *to_write >>= *box_end_bit; *to_write = bitrev8(*to_write) >> (8 - box_bit_width); @@ -48,7 +48,7 @@ static void adjust_for_msb_right_quirk(u64 *to_write, int *box_start_bit, * * Return: the physical offset into the buffer corresponding to the logical box. */ -static int calculate_box_addr(int box, size_t len, u8 quirks) +static size_t calculate_box_addr(size_t box, size_t len, u8 quirks) { size_t offset_of_group, offset_in_group, this_group = box / 4; size_t group_size; @@ -69,13 +69,10 @@ static int calculate_box_addr(int box, size_t len, u8 quirks) } /** - * packing - Convert numbers (currently u64) between a packed and an unpacked - * format. Unpacked means laid out in memory in the CPU's native - * understanding of integers, while packed means anything else that - * requires translation. + * pack - Pack u64 number into bitfield of buffer. * * @pbuf: Pointer to a buffer holding the packed value. - * @uval: Pointer to an u64 holding the unpacked value. + * @uval: CPU-readable unpacked value to pack. * @startbit: The index (in logical notation, compensated for quirks) where * the packed value starts within pbuf. Must be larger than, or * equal to, endbit. @@ -83,58 +80,45 @@ static int calculate_box_addr(int box, size_t len, u8 quirks) * the packed value ends within pbuf. Must be smaller than, or equal * to, startbit. * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf. - * @op: If PACK, then uval will be treated as const pointer and copied (packed) - * into pbuf, between startbit and endbit. - * If UNPACK, then pbuf will be treated as const pointer and the logical - * value between startbit and endbit will be copied (unpacked) to uval. * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and * QUIRK_MSB_ON_THE_RIGHT. * - * Note: this is deprecated, prefer to use pack() or unpack() in new code. - * * Return: 0 on success, EINVAL or ERANGE if called incorrectly. Assuming - * correct usage, return code may be discarded. - * If op is PACK, pbuf is modified. - * If op is UNPACK, uval is modified. + * correct usage, return code may be discarded. The @pbuf memory will + * be modified on success. */ -int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen, - enum packing_op op, u8 quirks) +int pack(void *pbuf, u64 uval, size_t startbit, size_t endbit, size_t pbuflen, + u8 quirks) { - /* Number of bits for storing "uval" - * also width of the field to access in the pbuf - */ - u64 value_width; /* Logical byte indices corresponding to the * start and end of the field. */ int plogical_first_u8, plogical_last_u8, box; + /* width of the field to access in the pbuf */ + u64 value_width; /* startbit is expected to be larger than endbit, and both are * expected to be within the logically addressable range of the buffer. */ - if (unlikely(startbit < endbit || startbit >= 8 * pbuflen || endbit < 0)) + if (unlikely(startbit < endbit || startbit >= 8 * pbuflen)) /* Invalid function call */ return -EINVAL; value_width = startbit - endbit + 1; - if (value_width > 64) + if (unlikely(value_width > 64)) return -ERANGE; /* Check if "uval" fits in "value_width" bits. * If value_width is 64, the check will fail, but any * 64-bit uval will surely fit. */ - if (op == PACK && value_width < 64 && (*uval >= (1ull << value_width))) + if (unlikely(value_width < 64 && uval >= (1ull << value_width))) /* Cannot store "uval" inside "value_width" bits. * Truncating "uval" is most certainly not desirable, * so simply erroring out is appropriate. */ return -ERANGE; - /* Initialize parameter */ - if (op == UNPACK) - *uval = 0; - /* Iterate through an idealistic view of the pbuf as an u64 with * no quirks, u8 by u8 (aligned at u8 boundaries), from high to low * logical bit significance. "box" denotes the current logical u8. @@ -144,11 +128,12 @@ int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen, for (box = plogical_first_u8; box >= plogical_last_u8; box--) { /* Bit indices into the currently accessed 8-bit box */ - int box_start_bit, box_end_bit, box_addr; + size_t box_start_bit, box_end_bit, box_addr; u8 box_mask; /* Corresponding bits from the unpacked u64 parameter */ - int proj_start_bit, proj_end_bit; + size_t proj_start_bit, proj_end_bit; u64 proj_mask; + u64 pval; /* This u8 may need to be accessed in its entirety * (from bit 7 to bit 0), or not, depending on the @@ -182,44 +167,25 @@ int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen, */ box_addr = calculate_box_addr(box, pbuflen, quirks); - if (op == UNPACK) { - u64 pval; - - /* Read from pbuf, write to uval */ - pval = ((u8 *)pbuf)[box_addr] & box_mask; - if (quirks & QUIRK_MSB_ON_THE_RIGHT) - adjust_for_msb_right_quirk(&pval, - &box_start_bit, - &box_end_bit, - &box_mask); - - pval >>= box_end_bit; - pval <<= proj_end_bit; - *uval &= ~proj_mask; - *uval |= pval; - } else { - u64 pval; - - /* Write to pbuf, read from uval */ - pval = (*uval) & proj_mask; - pval >>= proj_end_bit; - if (quirks & QUIRK_MSB_ON_THE_RIGHT) - adjust_for_msb_right_quirk(&pval, - &box_start_bit, - &box_end_bit, - &box_mask); - - pval <<= box_end_bit; - ((u8 *)pbuf)[box_addr] &= ~box_mask; - ((u8 *)pbuf)[box_addr] |= pval; - } + /* Write to pbuf, read from uval */ + pval = uval & proj_mask; + pval >>= proj_end_bit; + if (quirks & QUIRK_MSB_ON_THE_RIGHT) + adjust_for_msb_right_quirk(&pval, + &box_start_bit, + &box_end_bit, + &box_mask); + + pval <<= box_end_bit; + ((u8 *)pbuf)[box_addr] &= ~box_mask; + ((u8 *)pbuf)[box_addr] |= pval; } return 0; } -EXPORT_SYMBOL(packing); +EXPORT_SYMBOL(pack); /** - * pack - Pack u64 number into bitfield of buffer. + * unpack - Unpack u64 number from packed buffer. * * @pbuf: Pointer to a buffer holding the packed value. * @uval: Pointer to an u64 holding the unpacked value. @@ -234,18 +200,103 @@ EXPORT_SYMBOL(packing); * QUIRK_MSB_ON_THE_RIGHT. * * Return: 0 on success, EINVAL or ERANGE if called incorrectly. Assuming - * correct usage, return code may be discarded. The @pbuf memory will - * be modified on success. + * correct usage, return code may be discarded. The @uval will be + * modified on success. */ -int pack(void *pbuf, const u64 *uval, size_t startbit, size_t endbit, - size_t pbuflen, u8 quirks) +int unpack(const void *pbuf, u64 *uval, size_t startbit, size_t endbit, + size_t pbuflen, u8 quirks) { - return packing(pbuf, (u64 *)uval, startbit, endbit, pbuflen, PACK, quirks); + /* Logical byte indices corresponding to the + * start and end of the field. + */ + int plogical_first_u8, plogical_last_u8, box; + /* width of the field to access in the pbuf */ + u64 value_width; + + /* startbit is expected to be larger than endbit, and both are + * expected to be within the logically addressable range of the buffer. + */ + if (unlikely(startbit < endbit || startbit >= 8 * pbuflen)) + /* Invalid function call */ + return -EINVAL; + + value_width = startbit - endbit + 1; + if (unlikely(value_width > 64)) + return -ERANGE; + + /* Initialize parameter */ + *uval = 0; + + /* Iterate through an idealistic view of the pbuf as an u64 with + * no quirks, u8 by u8 (aligned at u8 boundaries), from high to low + * logical bit significance. "box" denotes the current logical u8. + */ + plogical_first_u8 = startbit / 8; + plogical_last_u8 = endbit / 8; + + for (box = plogical_first_u8; box >= plogical_last_u8; box--) { + /* Bit indices into the currently accessed 8-bit box */ + size_t box_start_bit, box_end_bit, box_addr; + u8 box_mask; + /* Corresponding bits from the unpacked u64 parameter */ + size_t proj_start_bit, proj_end_bit; + u64 proj_mask; + u64 pval; + + /* This u8 may need to be accessed in its entirety + * (from bit 7 to bit 0), or not, depending on the + * input arguments startbit and endbit. + */ + if (box == plogical_first_u8) + box_start_bit = startbit % 8; + else + box_start_bit = 7; + if (box == plogical_last_u8) + box_end_bit = endbit % 8; + else + box_end_bit = 0; + + /* We have determined the box bit start and end. + * Now we calculate where this (masked) u8 box would fit + * in the unpacked (CPU-readable) u64 - the u8 box's + * projection onto the unpacked u64. Though the + * box is u8, the projection is u64 because it may fall + * anywhere within the unpacked u64. + */ + proj_start_bit = ((box * 8) + box_start_bit) - endbit; + proj_end_bit = ((box * 8) + box_end_bit) - endbit; + proj_mask = GENMASK_ULL(proj_start_bit, proj_end_bit); + box_mask = GENMASK_ULL(box_start_bit, box_end_bit); + + /* Determine the offset of the u8 box inside the pbuf, + * adjusted for quirks. The adjusted box_addr will be used for + * effective addressing inside the pbuf (so it's not + * logical any longer). + */ + box_addr = calculate_box_addr(box, pbuflen, quirks); + + /* Read from pbuf, write to uval */ + pval = ((u8 *)pbuf)[box_addr] & box_mask; + if (quirks & QUIRK_MSB_ON_THE_RIGHT) + adjust_for_msb_right_quirk(&pval, + &box_start_bit, + &box_end_bit, + &box_mask); + + pval >>= box_end_bit; + pval <<= proj_end_bit; + *uval &= ~proj_mask; + *uval |= pval; + } + return 0; } -EXPORT_SYMBOL(pack); +EXPORT_SYMBOL(unpack); /** - * unpack - Unpack u64 number from packed buffer. + * packing - Convert numbers (currently u64) between a packed and an unpacked + * format. Unpacked means laid out in memory in the CPU's native + * understanding of integers, while packed means anything else that + * requires translation. * * @pbuf: Pointer to a buffer holding the packed value. * @uval: Pointer to an u64 holding the unpacked value. @@ -256,18 +307,28 @@ EXPORT_SYMBOL(pack); * the packed value ends within pbuf. Must be smaller than, or equal * to, startbit. * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf. + * @op: If PACK, then uval will be treated as const pointer and copied (packed) + * into pbuf, between startbit and endbit. + * If UNPACK, then pbuf will be treated as const pointer and the logical + * value between startbit and endbit will be copied (unpacked) to uval. * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and * QUIRK_MSB_ON_THE_RIGHT. * + * Note: this is deprecated, prefer to use pack() or unpack() in new code. + * * Return: 0 on success, EINVAL or ERANGE if called incorrectly. Assuming - * correct usage, return code may be discarded. The @uval will be - * modified on success. + * correct usage, return code may be discarded. + * If op is PACK, pbuf is modified. + * If op is UNPACK, uval is modified. */ -int unpack(const void *pbuf, u64 *uval, size_t startbit, size_t endbit, - size_t pbuflen, u8 quirks) +int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen, + enum packing_op op, u8 quirks) { - return packing((void *)pbuf, uval, startbit, endbit, pbuflen, UNPACK, quirks); + if (op == PACK) + return pack(pbuf, *uval, startbit, endbit, pbuflen, quirks); + + return unpack(pbuf, uval, startbit, endbit, pbuflen, quirks); } -EXPORT_SYMBOL(unpack); +EXPORT_SYMBOL(packing); MODULE_DESCRIPTION("Generic bitfield packing and unpacking"); -- cgit v1.2.3 From 7e863e5db6185b1add0df4cb01b31a4ed1c4b738 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Tue, 1 Oct 2024 21:28:43 +0200 Subject: ipv4: Convert ip_route_input() to dscp_t. Pass a dscp_t variable to ip_route_input(), instead of a plain u8, to prevent accidental setting of ECN bits in ->flowi4_tos. Callers of ip_route_input() to consider are: * input_action_end_dx4_finish() and input_action_end_dt4() in net/ipv6/seg6_local.c. These functions set the tos parameter to 0, which is already a valid dscp_t value, so they don't need to be adjusted for the new prototype. * icmp_route_lookup(), which already has a dscp_t variable to pass as parameter. We just need to remove the inet_dscp_to_dsfield() conversion. * br_nf_pre_routing_finish(), ip_options_rcv_srr() and ip4ip6_err(), which get the DSCP directly from IPv4 headers. Define a helper to read the .tos field of struct iphdr as dscp_t, so that these function don't have to do the conversion manually. While there, declare *iph as const in br_nf_pre_routing_finish(), declare its local variables in reverse-christmas-tree order and move the "err = ip_route_input()" assignment out of the conditional to avoid checkpatch warning. Signed-off-by: Guillaume Nault Reviewed-by: David Ahern Link: https://patch.msgid.link/e9d40781d64d3d69f4c79ac8a008b8d67a033e8d.1727807926.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- include/net/ip.h | 5 +++++ include/net/route.h | 5 +++-- net/bridge/br_netfilter_hooks.c | 8 +++++--- net/ipv4/icmp.c | 2 +- net/ipv4/ip_options.c | 3 ++- net/ipv6/ip6_tunnel.c | 4 ++-- 6 files changed, 18 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index d92d3bc3ec0e..bab084df1567 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -424,6 +424,11 @@ int ip_decrease_ttl(struct iphdr *iph) return --iph->ttl; } +static inline dscp_t ip4h_dscp(const struct iphdr *ip4h) +{ + return inet_dsfield_to_dscp(ip4h->tos); +} + static inline int ip_mtu_locked(const struct dst_entry *dst) { const struct rtable *rt = dst_rtable(dst); diff --git a/include/net/route.h b/include/net/route.h index 1789f1e6640b..03dd28cf4bc4 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -208,12 +208,13 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 dst, __be32 src, const struct sk_buff *hint); static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, - u8 tos, struct net_device *devin) + dscp_t dscp, struct net_device *devin) { int err; rcu_read_lock(); - err = ip_route_input_noref(skb, dst, src, tos, devin); + err = ip_route_input_noref(skb, dst, src, inet_dscp_to_dsfield(dscp), + devin); if (!err) { skb_dst_force(skb); if (!skb_dst(skb)) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 0e8bc0ea6175..c6bab2b5e834 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -369,9 +369,9 @@ br_nf_ipv4_daddr_was_changed(const struct sk_buff *skb, */ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct net_device *dev = skb->dev, *br_indev; - struct iphdr *iph = ip_hdr(skb); struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + struct net_device *dev = skb->dev, *br_indev; + const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; int err; @@ -389,7 +389,9 @@ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_ } nf_bridge->in_prerouting = 0; if (br_nf_ipv4_daddr_was_changed(skb, nf_bridge)) { - if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { + err = ip_route_input(skb, iph->daddr, iph->saddr, + ip4h_dscp(iph), dev); + if (err) { struct in_device *in_dev = __in_dev_get_rcu(dev); /* If err equals -EHOSTUNREACH the error is due to a diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 7d7b25ed8d21..23664434922e 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -545,7 +545,7 @@ static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4, orefdst = skb_in->_skb_refdst; /* save old refdst */ skb_dst_set(skb_in, NULL); err = ip_route_input(skb_in, fl4_dec.daddr, fl4_dec.saddr, - inet_dscp_to_dsfield(dscp), rt2->dst.dev); + dscp, rt2->dst.dev); dst_release(&rt2->dst); rt2 = skb_rtable(skb_in); diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 68aedb8877b9..81e86e5defee 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -617,7 +617,8 @@ int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev) orefdst = skb->_skb_refdst; skb_dst_set(skb, NULL); - err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, dev); + err = ip_route_input(skb, nexthop, iph->saddr, ip4h_dscp(iph), + dev); rt2 = skb_rtable(skb); if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) { skb_dst_drop(skb); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index b60e13c42bca..48fd53b98972 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -630,8 +630,8 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } skb_dst_set(skb2, &rt->dst); } else { - if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, - skb2->dev) || + if (ip_route_input(skb2, eiph->daddr, eiph->saddr, + ip4h_dscp(eiph), skb2->dev) || skb_dst(skb2)->dev->type != ARPHRD_TUNNEL6) goto out; } -- cgit v1.2.3 From 66fb6386d358a04edd5c640e38b4a02b323b89d8 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Tue, 1 Oct 2024 21:28:49 +0200 Subject: ipv4: Convert ip_route_input_noref() to dscp_t. Pass a dscp_t variable to ip_route_input_noref(), instead of a plain u8, to prevent accidental setting of ECN bits in ->flowi4_tos. Callers of ip_route_input_noref() to consider are: * arp_process() in net/ipv4/arp.c. This function sets the tos parameter to 0, which is already a valid dscp_t value, so it doesn't need to be adjusted for the new prototype. * ip_route_input(), which already has a dscp_t variable to pass as parameter. We just need to remove the inet_dscp_to_dsfield() conversion. * ipvlan_l3_rcv(), bpf_lwt_input_reroute(), ip_expire(), ip_rcv_finish_core(), xfrm4_rcv_encap_finish() and xfrm4_rcv_encap(), which get the DSCP directly from IPv4 headers and can simply use the ip4h_dscp() helper. While there, declare the IPv4 header pointers as const in ipvlan_l3_rcv() and bpf_lwt_input_reroute(). Also, modify the declaration of ip_route_input_noref() in include/net/route.h so that it matches the prototype of its implementation in net/ipv4/route.c. Signed-off-by: Guillaume Nault Reviewed-by: David Ahern Link: https://patch.msgid.link/a8a747bed452519c4d0cc06af32c7e7795d7b627.1727807926.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/ipvlan/ipvlan_l3s.c | 6 ++++-- include/net/route.h | 7 +++---- net/core/lwt_bpf.c | 5 +++-- net/ipv4/ip_fragment.c | 4 ++-- net/ipv4/ip_input.c | 2 +- net/ipv4/route.c | 6 +++--- net/ipv4/xfrm4_input.c | 2 +- net/ipv4/xfrm4_protocol.c | 2 +- 8 files changed, 18 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/drivers/net/ipvlan/ipvlan_l3s.c b/drivers/net/ipvlan/ipvlan_l3s.c index d5b05e803219..b4ef386bdb1b 100644 --- a/drivers/net/ipvlan/ipvlan_l3s.c +++ b/drivers/net/ipvlan/ipvlan_l3s.c @@ -2,6 +2,8 @@ /* Copyright (c) 2014 Mahesh Bandewar */ +#include + #include "ipvlan.h" static unsigned int ipvlan_netid __read_mostly; @@ -48,11 +50,11 @@ static struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, switch (proto) { case AF_INET: { - struct iphdr *ip4h = ip_hdr(skb); + const struct iphdr *ip4h = ip_hdr(skb); int err; err = ip_route_input_noref(skb, ip4h->daddr, ip4h->saddr, - ip4h->tos, sdev); + ip4h_dscp(ip4h), sdev); if (unlikely(err)) goto out; break; diff --git a/include/net/route.h b/include/net/route.h index 03dd28cf4bc4..5e4374d66927 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -201,8 +201,8 @@ static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4 int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, u8 tos, struct net_device *dev, struct in_device *in_dev, u32 *itag); -int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src, - u8 tos, struct net_device *devin); +int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, + dscp_t dscp, struct net_device *dev); int ip_route_use_hint(struct sk_buff *skb, __be32 dst, __be32 src, u8 tos, struct net_device *devin, const struct sk_buff *hint); @@ -213,8 +213,7 @@ static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, int err; rcu_read_lock(); - err = ip_route_input_noref(skb, dst, src, inet_dscp_to_dsfield(dscp), - devin); + err = ip_route_input_noref(skb, dst, src, dscp, devin); if (!err) { skb_dst_force(skb); if (!skb_dst(skb)) diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index 1a14f915b7a4..e0ca24a58810 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -91,12 +92,12 @@ static int bpf_lwt_input_reroute(struct sk_buff *skb) if (skb->protocol == htons(ETH_P_IP)) { struct net_device *dev = skb_dst(skb)->dev; - struct iphdr *iph = ip_hdr(skb); + const struct iphdr *iph = ip_hdr(skb); dev_hold(dev); skb_dst_drop(skb); err = ip_route_input_noref(skb, iph->daddr, iph->saddr, - iph->tos, dev); + ip4h_dscp(iph), dev); dev_put(dev); } else if (skb->protocol == htons(ETH_P_IPV6)) { skb_dst_drop(skb); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index a92664a5ef2e..48e2810f1f27 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -175,8 +175,8 @@ static void ip_expire(struct timer_list *t) /* skb has no dst, perform route lookup again */ iph = ip_hdr(head); - err = ip_route_input_noref(head, iph->daddr, iph->saddr, - iph->tos, head->dev); + err = ip_route_input_noref(head, iph->daddr, iph->saddr, ip4h_dscp(iph), + head->dev); if (err) goto out; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index b6e7d4921309..c0a2490eb7c1 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -363,7 +363,7 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk, */ if (!skb_valid_dst(skb)) { err = ip_route_input_noref(skb, iph->daddr, iph->saddr, - iph->tos, dev); + ip4h_dscp(iph), dev); if (unlikely(err)) goto drop_error; } else { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 723ac9181558..00bfc0a11f64 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2465,14 +2465,14 @@ static int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr, } int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev) + dscp_t dscp, struct net_device *dev) { struct fib_result res; int err; - tos &= INET_DSCP_MASK; rcu_read_lock(); - err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, &res); + err = ip_route_input_rcu(skb, daddr, saddr, inet_dscp_to_dsfield(dscp), + dev, &res); rcu_read_unlock(); return err; diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index a620618cc568..b5b06323cfd9 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -33,7 +33,7 @@ static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk, const struct iphdr *iph = ip_hdr(skb); if (ip_route_input_noref(skb, iph->daddr, iph->saddr, - iph->tos, skb->dev)) + ip4h_dscp(iph), skb->dev)) goto drop; } diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c index b146ce88c5d0..4ee624d8e66f 100644 --- a/net/ipv4/xfrm4_protocol.c +++ b/net/ipv4/xfrm4_protocol.c @@ -76,7 +76,7 @@ int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, const struct iphdr *iph = ip_hdr(skb); if (ip_route_input_noref(skb, iph->daddr, iph->saddr, - iph->tos, skb->dev)) + ip4h_dscp(iph), skb->dev)) goto drop; } -- cgit v1.2.3 From 4f647a780f3606acbd2116248d51eadb4d865615 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Mon, 16 Sep 2024 02:17:10 -0700 Subject: bpf: __bpf_fastcall for bpf_get_smp_processor_id in uapi Since [1] kernel supports __bpf_fastcall attribute for helper function bpf_get_smp_processor_id(). Update uapi definition for this helper in order to have this attribute in the generated bpf_helper_defs.h [1] commit 91b7fbf3936f ("bpf, x86, riscv, arm: no_caller_saved_registers for bpf_get_smp_processor_id()") Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240916091712.2929279-3-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 2 ++ tools/include/uapi/linux/bpf.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c6cd7c7aeeee..8ab4d8184b9d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1970,6 +1970,8 @@ union bpf_attr { * program. * Return * The SMP id of the processor running the program. + * Attributes + * __bpf_fastcall * * long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) * Description diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 1fb3cb2636e6..7610883c8191 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1970,6 +1970,8 @@ union bpf_attr { * program. * Return * The SMP id of the processor running the program. + * Attributes + * __bpf_fastcall * * long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) * Description -- cgit v1.2.3 From da7d71bcb0637b7aa18934628fdb5a55f2db49a6 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Mon, 16 Sep 2024 02:17:11 -0700 Subject: bpf: Use KF_FASTCALL to mark kfuncs supporting fastcall contract In order to allow pahole add btf_decl_tag("bpf_fastcall") for kfuncs supporting bpf_fastcall, mark such functions with KF_FASTCALL in id_set8 objects. Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240916091712.2929279-4-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/btf.h | 1 + kernel/bpf/helpers.c | 4 ++-- kernel/bpf/verifier.c | 5 +---- 3 files changed, 4 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/btf.h b/include/linux/btf.h index b8a583194c4a..631060e3ad14 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -75,6 +75,7 @@ #define KF_ITER_NEXT (1 << 9) /* kfunc implements BPF iter next method */ #define KF_ITER_DESTROY (1 << 10) /* kfunc implements BPF iter destructor */ #define KF_RCU_PROTECTED (1 << 11) /* kfunc should be protected by rcu cs when they are invoked */ +#define KF_FASTCALL (1 << 12) /* kfunc supports bpf_fastcall protocol */ /* * Tag marking a kernel function as a kfunc. This is meant to minimize the diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 1a43d06eab28..4053f279ed4c 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -3052,8 +3052,8 @@ BTF_ID(func, bpf_cgroup_release_dtor) #endif BTF_KFUNCS_START(common_btf_ids) -BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx) -BTF_ID_FLAGS(func, bpf_rdonly_cast) +BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx, KF_FASTCALL) +BTF_ID_FLAGS(func, bpf_rdonly_cast, KF_FASTCALL) BTF_ID_FLAGS(func, bpf_rcu_read_lock) BTF_ID_FLAGS(func, bpf_rcu_read_unlock) BTF_ID_FLAGS(func, bpf_dynptr_slice, KF_RET_NULL) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9a7ed527e47e..7d9b38ffd220 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -16176,10 +16176,7 @@ static u32 kfunc_fastcall_clobber_mask(struct bpf_kfunc_call_arg_meta *meta) /* Same as verifier_inlines_helper_call() but for kfuncs, see comment above */ static bool is_fastcall_kfunc_call(struct bpf_kfunc_call_arg_meta *meta) { - if (meta->btf == btf_vmlinux) - return meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] || - meta->func_id == special_kfunc_list[KF_bpf_rdonly_cast]; - return false; + return meta->kfunc_flags & KF_FASTCALL; } /* LLVM define a bpf_fastcall function attribute. -- cgit v1.2.3 From 22fbabe82cea7af4127f089b7f3553cd75571d9a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 12 Sep 2024 15:30:02 -0700 Subject: scsi: ufs: core: Improve the struct ufs_hba documentation Make the role of the structure members related to UIC command processing more clear. Reviewed-by: Peter Wang Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240912223019.3510966-2-bvanassche@acm.org Reviewed-by: Bao D. Nguyen Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index 3f68ae3e4330..a95282b9f743 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -877,9 +877,10 @@ enum ufshcd_mcq_opr { * @tmf_tag_set: TMF tag set. * @tmf_queue: Used to allocate TMF tags. * @tmf_rqs: array with pointers to TMF requests while these are in progress. - * @active_uic_cmd: handle of active UIC command - * @uic_cmd_mutex: mutex for UIC command - * @uic_async_done: completion used during UIC processing + * @active_uic_cmd: pointer to active UIC command. + * @uic_cmd_mutex: mutex used for serializing UIC command processing. + * @uic_async_done: completion used to wait for power mode or hibernation state + * changes. * @ufshcd_state: UFSHCD state * @eh_flags: Error handling flags * @intr_mask: Interrupt Mask Bits -- cgit v1.2.3 From a04d5f82fa3893aa06386db93883b151b93b11ed Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 2 Oct 2024 05:01:05 +0100 Subject: mm: Remove PageMappedToDisk All callers have now been converted to the folio APIs, so remove the page API for this flag. Signed-off-by: Matthew Wilcox (Oracle) Link: https://lore.kernel.org/r/20241002040111.1023018-4-willy@infradead.org Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/page-flags.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 1b3a76710487..35d08c30d4a6 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -554,7 +554,7 @@ FOLIO_FLAG(owner_2, FOLIO_HEAD_PAGE) */ TESTPAGEFLAG(Writeback, writeback, PF_NO_TAIL) TESTSCFLAG(Writeback, writeback, PF_NO_TAIL) -PAGEFLAG(MappedToDisk, mappedtodisk, PF_NO_TAIL) +FOLIO_FLAG(mappedtodisk, FOLIO_HEAD_PAGE) /* PG_readahead is only used for reads; PG_reclaim is only for writes */ PAGEFLAG(Reclaim, reclaim, PF_NO_TAIL) -- cgit v1.2.3 From fd15ba4cb00a43fb4df42e1f95f94857ad122eea Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 2 Oct 2024 05:01:07 +0100 Subject: ceph: Remove call to PagePrivate2() Use the folio that we already have to call folio_test_private_2() instead. This is the last call to PagePrivate2(), so replace its PAGEFLAG() definition with FOLIO_FLAG(). Signed-off-by: Matthew Wilcox (Oracle) Link: https://lore.kernel.org/r/20241002040111.1023018-6-willy@infradead.org Signed-off-by: Christian Brauner --- fs/ceph/addr.c | 20 ++++++++++---------- include/linux/page-flags.h | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 53fef258c2bc..a8788e300dc7 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1051,7 +1051,9 @@ get_more_pages: if (!nr_folios && !locked_pages) break; for (i = 0; i < nr_folios && locked_pages < max_pages; i++) { - page = &fbatch.folios[i]->page; + struct folio *folio = fbatch.folios[i]; + + page = &folio->page; doutc(cl, "? %p idx %lu\n", page, page->index); if (locked_pages == 0) lock_page(page); /* first page */ @@ -1078,8 +1080,6 @@ get_more_pages: continue; } if (page_offset(page) >= ceph_wbc.i_size) { - struct folio *folio = page_folio(page); - doutc(cl, "folio at %lu beyond eof %llu\n", folio->index, ceph_wbc.i_size); if ((ceph_wbc.size_stable || @@ -1095,16 +1095,16 @@ get_more_pages: unlock_page(page); break; } - if (PageWriteback(page) || - PagePrivate2(page) /* [DEPRECATED] */) { + if (folio_test_writeback(folio) || + folio_test_private_2(folio) /* [DEPRECATED] */) { if (wbc->sync_mode == WB_SYNC_NONE) { - doutc(cl, "%p under writeback\n", page); - unlock_page(page); + doutc(cl, "%p under writeback\n", folio); + folio_unlock(folio); continue; } - doutc(cl, "waiting on writeback %p\n", page); - wait_on_page_writeback(page); - folio_wait_private_2(page_folio(page)); /* [DEPRECATED] */ + doutc(cl, "waiting on writeback %p\n", folio); + folio_wait_writeback(folio); + folio_wait_private_2(folio); /* [DEPRECATED] */ } if (!clear_page_dirty_for_io(page)) { diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 35d08c30d4a6..4c2dfe289046 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -543,7 +543,7 @@ FOLIO_FLAG(swapbacked, FOLIO_HEAD_PAGE) * - PG_private and PG_private_2 cause release_folio() and co to be invoked */ PAGEFLAG(Private, private, PF_ANY) -PAGEFLAG(Private2, private_2, PF_ANY) TESTSCFLAG(Private2, private_2, PF_ANY) +FOLIO_FLAG(private_2, FOLIO_HEAD_PAGE) /* owner_2 can be set on tail pages for anon memory */ FOLIO_FLAG(owner_2, FOLIO_HEAD_PAGE) -- cgit v1.2.3 From 0e45a09a1da0872786885c505467aab8fb29b5b4 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 4 Sep 2024 21:17:06 -0700 Subject: Input: serio - define serio_pause_rx guard to pause and resume serio ports serio_pause_rx() and serio_continue_rx() are usually used together to temporarily stop receiving interrupts/data for a given serio port. Define "serio_pause_rx" guard for this so that the port is always resumed once critical section is over. Example: scoped_guard(serio_pause_rx, elo->serio) { elo->expected_packet = toupper(packet[0]); init_completion(&elo->cmd_done); } Link: https://lore.kernel.org/r/20240905041732.2034348-2-dmitry.torokhov@gmail.com Signed-off-by: Dmitry Torokhov --- include/linux/serio.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/serio.h b/include/linux/serio.h index bf2191f25350..69a47674af65 100644 --- a/include/linux/serio.h +++ b/include/linux/serio.h @@ -6,6 +6,7 @@ #define _SERIO_H +#include #include #include #include @@ -161,4 +162,6 @@ static inline void serio_continue_rx(struct serio *serio) spin_unlock_irq(&serio->lock); } +DEFINE_GUARD(serio_pause_rx, struct serio *, serio_pause_rx(_T), serio_continue_rx(_T)) + #endif -- cgit v1.2.3 From c653ffc283404a6c1c0e65143a833180c7ff799b Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 3 Oct 2024 07:46:51 -0700 Subject: HID: stop exporting hid_snto32() The only user of hid_snto32() is Logitech HID++ driver, which always calls hid_snto32() with valid size (constant, either 12 or 8) and therefore can simply use sign_extend32(). Make the switch and remove hid_snto32(). Move snto32() and s32ton() to avoid introducing forward declaration. Signed-off-by: Dmitry Torokhov Link: https://patch.msgid.link/20241003144656.3786064-2-dmitry.torokhov@gmail.com [bentiss: fix checkpatch warning] Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-core.c | 63 ++++++++++++++++++---------------------- drivers/hid/hid-logitech-hidpp.c | 6 ++-- include/linux/hid.h | 1 - 3 files changed, 32 insertions(+), 38 deletions(-) (limited to 'include') diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index a6fe04e1c726..87b2cdd7f0c4 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -45,6 +45,34 @@ static int hid_ignore_special_drivers = 0; module_param_named(ignore_special_drivers, hid_ignore_special_drivers, int, 0600); MODULE_PARM_DESC(ignore_special_drivers, "Ignore any special drivers and handle all devices by generic driver"); +/* + * Convert a signed n-bit integer to signed 32-bit integer. + */ + +static s32 snto32(__u32 value, unsigned int n) +{ + if (!value || !n) + return 0; + + if (n > 32) + n = 32; + + return sign_extend32(value, n - 1); +} + +/* + * Convert a signed 32-bit integer to a signed n-bit integer. + */ + +static u32 s32ton(__s32 value, unsigned int n) +{ + s32 a = value >> (n - 1); + + if (a && a != -1) + return value < 0 ? 1 << (n - 1) : (1 << (n - 1)) - 1; + return value & ((1 << n) - 1); +} + /* * Register a new report for a device. */ @@ -425,7 +453,7 @@ static int hid_parser_global(struct hid_parser *parser, struct hid_item *item) * both this and the standard encoding. */ raw_value = item_sdata(item); if (!(raw_value & 0xfffffff0)) - parser->global.unit_exponent = hid_snto32(raw_value, 4); + parser->global.unit_exponent = snto32(raw_value, 4); else parser->global.unit_exponent = raw_value; return 0; @@ -1312,39 +1340,6 @@ alloc_err: } EXPORT_SYMBOL_GPL(hid_open_report); -/* - * Convert a signed n-bit integer to signed 32-bit integer. - */ - -static s32 snto32(__u32 value, unsigned n) -{ - if (!value || !n) - return 0; - - if (n > 32) - n = 32; - - return sign_extend32(value, n - 1); -} - -s32 hid_snto32(__u32 value, unsigned n) -{ - return snto32(value, n); -} -EXPORT_SYMBOL_GPL(hid_snto32); - -/* - * Convert a signed 32-bit integer to a signed n-bit integer. - */ - -static u32 s32ton(__s32 value, unsigned n) -{ - s32 a = value >> (n - 1); - if (a && a != -1) - return value < 0 ? 1 << (n - 1) : (1 << (n - 1)) - 1; - return value & ((1 << n) - 1); -} - /* * Extract/implement a data field from/to a little endian report (bit array). * diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index 0e33fa0eb8db..0d114268ade8 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -3296,13 +3296,13 @@ static int m560_raw_event(struct hid_device *hdev, u8 *data, int size) 120); } - v = hid_snto32(hid_field_extract(hdev, data+3, 0, 12), 12); + v = sign_extend32(hid_field_extract(hdev, data + 3, 0, 12), 11); input_report_rel(hidpp->input, REL_X, v); - v = hid_snto32(hid_field_extract(hdev, data+3, 12, 12), 12); + v = sign_extend32(hid_field_extract(hdev, data + 3, 12, 12), 11); input_report_rel(hidpp->input, REL_Y, v); - v = hid_snto32(data[6], 8); + v = sign_extend32(data[6], 7); if (v != 0) hidpp_scroll_counter_handle_scroll(hidpp->input, &hidpp->vertical_wheel_counter, v); diff --git a/include/linux/hid.h b/include/linux/hid.h index 121d5b8bc867..f7c3a66647fd 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -974,7 +974,6 @@ const struct hid_device_id *hid_match_device(struct hid_device *hdev, struct hid_driver *hdrv); bool hid_compare_device_paths(struct hid_device *hdev_a, struct hid_device *hdev_b, char separator); -s32 hid_snto32(__u32 value, unsigned n); __u32 hid_field_extract(const struct hid_device *hid, __u8 *report, unsigned offset, unsigned n); -- cgit v1.2.3 From 9ab515b18f8463fbb340fece47cd461809e42a9d Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 1 Oct 2024 23:58:41 +0100 Subject: mm: Define VM_HIGH_ARCH_6 The addition of protection keys means that on arm64 we now use all of the currently defined VM_HIGH_ARCH_x bits. In order to allow us to allocate a new flag for GCS pages define VM_HIGH_ARCH_6. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20241001-arm64-gcs-v13-2-222b78d87eee@kernel.org Signed-off-by: Catalin Marinas --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index ecf63d2b0582..182bad0c55df 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -329,12 +329,14 @@ extern unsigned int kobjsize(const void *objp); #define VM_HIGH_ARCH_BIT_3 35 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_BIT_4 36 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_BIT_5 37 /* bit only usable on 64-bit architectures */ +#define VM_HIGH_ARCH_BIT_6 38 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_0 BIT(VM_HIGH_ARCH_BIT_0) #define VM_HIGH_ARCH_1 BIT(VM_HIGH_ARCH_BIT_1) #define VM_HIGH_ARCH_2 BIT(VM_HIGH_ARCH_BIT_2) #define VM_HIGH_ARCH_3 BIT(VM_HIGH_ARCH_BIT_3) #define VM_HIGH_ARCH_4 BIT(VM_HIGH_ARCH_BIT_4) #define VM_HIGH_ARCH_5 BIT(VM_HIGH_ARCH_BIT_5) +#define VM_HIGH_ARCH_6 BIT(VM_HIGH_ARCH_BIT_6) #endif /* CONFIG_ARCH_USES_HIGH_VMA_FLAGS */ #ifdef CONFIG_ARCH_HAS_PKEYS -- cgit v1.2.3 From 91e102e79740ae43ded050ccac71aa3371db4f33 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 1 Oct 2024 23:58:43 +0100 Subject: prctl: arch-agnostic prctl for shadow stack Three architectures (x86, aarch64, riscv) have announced support for shadow stacks with fairly similar functionality. While x86 is using arch_prctl() to control the functionality neither arm64 nor riscv uses that interface so this patch adds arch-agnostic prctl() support to get and set status of shadow stacks and lock the current configuation to prevent further changes, with support for turning on and off individual subfeatures so applications can limit their exposure to features that they do not need. The features are: - PR_SHADOW_STACK_ENABLE: Tracking and enforcement of shadow stacks, including allocation of a shadow stack if one is not already allocated. - PR_SHADOW_STACK_WRITE: Writes to specific addresses in the shadow stack. - PR_SHADOW_STACK_PUSH: Push additional values onto the shadow stack. These features are expected to be inherited by new threads and cleared on exec(), unknown features should be rejected for enable but accepted for locking (in order to allow for future proofing). This is based on a patch originally written by Deepak Gupta but modified fairly heavily, support for indirect landing pads is removed, additional modes added and the locking interface reworked. The set status prctl() is also reworked to just set flags, if setting/reading the shadow stack pointer is required this could be a separate prctl. Reviewed-by: Thiago Jung Bauermann Reviewed-by: Catalin Marinas Acked-by: Yury Khrustalev Signed-off-by: Mark Brown Reviewed-by: Deepak Gupta Link: https://lore.kernel.org/r/20241001-arm64-gcs-v13-4-222b78d87eee@kernel.org Signed-off-by: Catalin Marinas --- include/linux/mm.h | 4 ++++ include/uapi/linux/prctl.h | 22 ++++++++++++++++++++++ kernel/sys.c | 30 ++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 182bad0c55df..56654306a832 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4221,4 +4221,8 @@ static inline void pgalloc_tag_copy(struct folio *new, struct folio *old) } #endif /* CONFIG_MEM_ALLOC_PROFILING */ +int arch_get_shadow_stack_status(struct task_struct *t, unsigned long __user *status); +int arch_set_shadow_stack_status(struct task_struct *t, unsigned long status); +int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status); + #endif /* _LINUX_MM_H */ diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 35791791a879..557a3d2ac1d4 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -328,4 +328,26 @@ struct prctl_mm_map { # define PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC 0x10 /* Clear the aspect on exec */ # define PR_PPC_DEXCR_CTRL_MASK 0x1f +/* + * Get the current shadow stack configuration for the current thread, + * this will be the value configured via PR_SET_SHADOW_STACK_STATUS. + */ +#define PR_GET_SHADOW_STACK_STATUS 74 + +/* + * Set the current shadow stack configuration. Enabling the shadow + * stack will cause a shadow stack to be allocated for the thread. + */ +#define PR_SET_SHADOW_STACK_STATUS 75 +# define PR_SHADOW_STACK_ENABLE (1UL << 0) +# define PR_SHADOW_STACK_WRITE (1UL << 1) +# define PR_SHADOW_STACK_PUSH (1UL << 2) + +/* + * Prevent further changes to the specified shadow stack + * configuration. All bits may be locked via this call, including + * undefined bits. + */ +#define PR_LOCK_SHADOW_STACK_STATUS 76 + #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/sys.c b/kernel/sys.c index 4da31f28fda8..3d38a9c7c5c9 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2324,6 +2324,21 @@ int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which, return -EINVAL; } +int __weak arch_get_shadow_stack_status(struct task_struct *t, unsigned long __user *status) +{ + return -EINVAL; +} + +int __weak arch_set_shadow_stack_status(struct task_struct *t, unsigned long status) +{ + return -EINVAL; +} + +int __weak arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status) +{ + return -EINVAL; +} + #define PR_IO_FLUSHER (PF_MEMALLOC_NOIO | PF_LOCAL_THROTTLE) #ifdef CONFIG_ANON_VMA_NAME @@ -2784,6 +2799,21 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_RISCV_SET_ICACHE_FLUSH_CTX: error = RISCV_SET_ICACHE_FLUSH_CTX(arg2, arg3); break; + case PR_GET_SHADOW_STACK_STATUS: + if (arg3 || arg4 || arg5) + return -EINVAL; + error = arch_get_shadow_stack_status(me, (unsigned long __user *) arg2); + break; + case PR_SET_SHADOW_STACK_STATUS: + if (arg3 || arg4 || arg5) + return -EINVAL; + error = arch_set_shadow_stack_status(me, arg2); + break; + case PR_LOCK_SHADOW_STACK_STATUS: + if (arg3 || arg4 || arg5) + return -EINVAL; + error = arch_lock_shadow_stack_status(me, arg2); + break; default: error = -EINVAL; break; -- cgit v1.2.3 From 3630e82ab6bd2642f0fc03b574783ccf2fb0c955 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 1 Oct 2024 23:58:44 +0100 Subject: mman: Add map_shadow_stack() flags In preparation for adding arm64 GCS support make the map_shadow_stack() SHADOW_STACK_SET_TOKEN flag generic and add _SET_MARKER. The existing flag indicates that a token usable for stack switch should be added to the top of the newly mapped GCS region while the new flag indicates that a top of stack marker suitable for use by unwinders should be added above that. For arm64 the top of stack marker is all bits 0. Reviewed-by: Thiago Jung Bauermann Reviewed-by: Catalin Marinas Acked-by: Yury Khrustalev Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20241001-arm64-gcs-v13-5-222b78d87eee@kernel.org Signed-off-by: Catalin Marinas --- arch/x86/include/uapi/asm/mman.h | 3 --- include/uapi/asm-generic/mman.h | 4 ++++ 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/x86/include/uapi/asm/mman.h b/arch/x86/include/uapi/asm/mman.h index 46cdc941f958..ac1e6277212b 100644 --- a/arch/x86/include/uapi/asm/mman.h +++ b/arch/x86/include/uapi/asm/mman.h @@ -5,9 +5,6 @@ #define MAP_32BIT 0x40 /* only give out 32bit addresses */ #define MAP_ABOVE4G 0x80 /* only map above 4GB */ -/* Flags for map_shadow_stack(2) */ -#define SHADOW_STACK_SET_TOKEN (1ULL << 0) /* Set up a restore token in the shadow stack */ - #include #endif /* _ASM_X86_MMAN_H */ diff --git a/include/uapi/asm-generic/mman.h b/include/uapi/asm-generic/mman.h index 57e8195d0b53..5e3d61ddbd8c 100644 --- a/include/uapi/asm-generic/mman.h +++ b/include/uapi/asm-generic/mman.h @@ -19,4 +19,8 @@ #define MCL_FUTURE 2 /* lock all future mappings */ #define MCL_ONFAULT 4 /* lock all pages that are faulted in */ +#define SHADOW_STACK_SET_TOKEN (1ULL << 0) /* Set up a restore token in the shadow stack */ +#define SHADOW_STACK_SET_MARKER (1ULL << 1) /* Set up a top of stack marker in the shadow stack */ + + #endif /* __ASM_GENERIC_MMAN_H */ -- cgit v1.2.3 From ae80e1629aeaf5be726a9ea94eb7345b1a44b00d Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 1 Oct 2024 23:58:53 +0100 Subject: mm: Define VM_SHADOW_STACK for arm64 when we support GCS Use VM_HIGH_ARCH_5 for guarded control stack pages. Reviewed-by: Thiago Jung Bauermann Reviewed-by: Catalin Marinas Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20241001-arm64-gcs-v13-14-222b78d87eee@kernel.org Signed-off-by: Catalin Marinas --- Documentation/filesystems/proc.rst | 2 +- include/linux/mm.h | 12 +++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst index e834779d9611..6a882c57a7e7 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -579,7 +579,7 @@ encoded manner. The codes are the following: mt arm64 MTE allocation tags are enabled um userfaultfd missing tracking uw userfaultfd wr-protect tracking - ss shadow stack page + ss shadow/guarded control stack page sl sealed == ======================================= diff --git a/include/linux/mm.h b/include/linux/mm.h index 56654306a832..8852c39c7695 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -367,7 +367,17 @@ extern unsigned int kobjsize(const void *objp); * for more details on the guard size. */ # define VM_SHADOW_STACK VM_HIGH_ARCH_5 -#else +#endif + +#if defined(CONFIG_ARM64_GCS) +/* + * arm64's Guarded Control Stack implements similar functionality and + * has similar constraints to shadow stacks. + */ +# define VM_SHADOW_STACK VM_HIGH_ARCH_6 +#endif + +#ifndef VM_SHADOW_STACK # define VM_SHADOW_STACK VM_NONE #endif -- cgit v1.2.3 From 7ec3b57cb29f8371bf12a725b6e8f75831a03f27 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 1 Oct 2024 23:59:06 +0100 Subject: arm64/ptrace: Expose GCS via ptrace and core files Provide a new register type NT_ARM_GCS reporting the current GCS mode and pointer for EL0. Due to the interactions with allocation and deallocation of Guarded Control Stacks we do not permit any changes to the GCS mode via ptrace, only GCSPR_EL0 may be changed. Reviewed-by: Thiago Jung Bauermann Reviewed-by: Catalin Marinas Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20241001-arm64-gcs-v13-27-222b78d87eee@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/uapi/asm/ptrace.h | 8 +++++ arch/arm64/kernel/ptrace.c | 62 +++++++++++++++++++++++++++++++++++- include/uapi/linux/elf.h | 1 + 3 files changed, 70 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 7fa2f7036aa7..0f39ba4f3efd 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -324,6 +324,14 @@ struct user_za_header { #define ZA_PT_SIZE(vq) \ (ZA_PT_ZA_OFFSET + ZA_PT_ZA_SIZE(vq)) +/* GCS state (NT_ARM_GCS) */ + +struct user_gcs { + __u64 features_enabled; + __u64 features_locked; + __u64 gcspr_el0; +}; + #endif /* __ASSEMBLY__ */ #endif /* _UAPI__ASM_PTRACE_H */ diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index b756578aeaee..6c1dcfe6d25a 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -1473,6 +1474,52 @@ static int poe_set(struct task_struct *target, const struct } #endif +#ifdef CONFIG_ARM64_GCS +static int gcs_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + struct user_gcs user_gcs; + + if (!system_supports_gcs()) + return -EINVAL; + + if (target == current) + gcs_preserve_current_state(); + + user_gcs.features_enabled = target->thread.gcs_el0_mode; + user_gcs.features_locked = target->thread.gcs_el0_locked; + user_gcs.gcspr_el0 = target->thread.gcspr_el0; + + return membuf_write(&to, &user_gcs, sizeof(user_gcs)); +} + +static int gcs_set(struct task_struct *target, const struct + user_regset *regset, unsigned int pos, + unsigned int count, const void *kbuf, const + void __user *ubuf) +{ + int ret; + struct user_gcs user_gcs; + + if (!system_supports_gcs()) + return -EINVAL; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &user_gcs, 0, -1); + if (ret) + return ret; + + if (user_gcs.features_enabled & ~PR_SHADOW_STACK_SUPPORTED_STATUS_MASK) + return -EINVAL; + + target->thread.gcs_el0_mode = user_gcs.features_enabled; + target->thread.gcs_el0_locked = user_gcs.features_locked; + target->thread.gcspr_el0 = user_gcs.gcspr_el0; + + return 0; +} +#endif + enum aarch64_regset { REGSET_GPR, REGSET_FPR, @@ -1503,7 +1550,10 @@ enum aarch64_regset { REGSET_TAGGED_ADDR_CTRL, #endif #ifdef CONFIG_ARM64_POE - REGSET_POE + REGSET_POE, +#endif +#ifdef CONFIG_ARM64_GCS + REGSET_GCS, #endif }; @@ -1674,6 +1724,16 @@ static const struct user_regset aarch64_regsets[] = { .set = poe_set, }, #endif +#ifdef CONFIG_ARM64_GCS + [REGSET_GCS] = { + .core_note_type = NT_ARM_GCS, + .n = sizeof(struct user_gcs) / sizeof(u64), + .size = sizeof(u64), + .align = sizeof(u64), + .regset_get = gcs_get, + .set = gcs_set, + }, +#endif }; static const struct user_regset_view user_aarch64_view = { diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index b9935988da5c..9adc218fb6df 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -443,6 +443,7 @@ typedef struct elf64_shdr { #define NT_ARM_ZT 0x40d /* ARM SME ZT registers */ #define NT_ARM_FPMR 0x40e /* ARM floating point mode register */ #define NT_ARM_POE 0x40f /* ARM POE registers */ +#define NT_ARM_GCS 0x410 /* ARM GCS state */ #define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */ #define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */ #define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */ -- cgit v1.2.3 From b63c755cb65d43c8aba987c4f6b57c77c6f123f2 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 30 Sep 2024 14:29:53 +0100 Subject: appletalk: Remove deadcode alloc_ltalkdev in net/appletalk/dev.c is dead since commit 00f3696f7555 ("net: appletalk: remove cops support") Removing it (and it's helper) leaves dev.c and if_ltalk.h empty; remove them and the Makefile entry. tun.c was including that if_ltalk.h but actually wanted the uapi version for LTALK_ALEN, fix up the path. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Willem de Bruijn Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/tun.c | 2 +- include/linux/if_ltalk.h | 8 -------- net/appletalk/Makefile | 2 +- net/appletalk/dev.c | 46 ---------------------------------------------- 4 files changed, 2 insertions(+), 56 deletions(-) delete mode 100644 include/linux/if_ltalk.h delete mode 100644 net/appletalk/dev.c (limited to 'include') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 9a0f6eb32016..d7a865ef370b 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -71,7 +71,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/include/linux/if_ltalk.h b/include/linux/if_ltalk.h deleted file mode 100644 index 4cc1c0b77870..000000000000 --- a/include/linux/if_ltalk.h +++ /dev/null @@ -1,8 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LINUX_LTALK_H -#define __LINUX_LTALK_H - -#include - -extern struct net_device *alloc_ltalkdev(int sizeof_priv); -#endif diff --git a/net/appletalk/Makefile b/net/appletalk/Makefile index 33164d972d37..152312a15180 100644 --- a/net/appletalk/Makefile +++ b/net/appletalk/Makefile @@ -5,6 +5,6 @@ obj-$(CONFIG_ATALK) += appletalk.o -appletalk-y := aarp.o ddp.o dev.o +appletalk-y := aarp.o ddp.o appletalk-$(CONFIG_PROC_FS) += atalk_proc.o appletalk-$(CONFIG_SYSCTL) += sysctl_net_atalk.o diff --git a/net/appletalk/dev.c b/net/appletalk/dev.c deleted file mode 100644 index 284c8e585533..000000000000 --- a/net/appletalk/dev.c +++ /dev/null @@ -1,46 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Moved here from drivers/net/net_init.c, which is: - * Written 1993,1994,1995 by Donald Becker. - */ - -#include -#include -#include -#include -#include - -static void ltalk_setup(struct net_device *dev) -{ - /* Fill in the fields of the device structure with localtalk-generic values. */ - - dev->type = ARPHRD_LOCALTLK; - dev->hard_header_len = LTALK_HLEN; - dev->mtu = LTALK_MTU; - dev->addr_len = LTALK_ALEN; - dev->tx_queue_len = 10; - - dev->broadcast[0] = 0xFF; - - dev->flags = IFF_BROADCAST|IFF_MULTICAST|IFF_NOARP; -} - -/** - * alloc_ltalkdev - Allocates and sets up an localtalk device - * @sizeof_priv: Size of additional driver-private structure to be allocated - * for this localtalk device - * - * Fill in the fields of the device structure with localtalk-generic - * values. Basically does everything except registering the device. - * - * Constructs a new net device, complete with a private data area of - * size @sizeof_priv. A 32-byte (not bit) alignment is enforced for - * this private data area. - */ - -struct net_device *alloc_ltalkdev(int sizeof_priv) -{ - return alloc_netdev(sizeof_priv, "lt%d", NET_NAME_UNKNOWN, - ltalk_setup); -} -EXPORT_SYMBOL(alloc_ltalkdev); -- cgit v1.2.3 From ec841b8d73cff37f8960e209017efe1eb2fb21f2 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Mon, 23 Sep 2024 16:12:01 +0800 Subject: usb: chipidea: add CI_HDRC_HAS_SHORT_PKT_LIMIT flag Currently, the imx deivice controller has below limitations: 1. can't generate short packet interrupt if IOC not set in dTD. So if one request span more than one dTDs and only the last dTD set IOC, the usb request will pending there if no more data comes. 2. the controller can't accurately deliver data to differtent usb requests in some cases due to short packet. For example: one usb request span 3 dTDs, then if the controller received a short packet the next packet will go to 2nd dTD of current request rather than the first dTD of next request. 3. can't build a bus packet use multiple dTDs. For example: controller needs to send one packet of 512 bytes use dTD1 (200 bytes) + dTD2 (312 bytes), actually the host side will see 200 bytes short packet. Based on these limits, add CI_HDRC_HAS_SHORT_PKT_LIMIT flag and use it on imx platforms. Signed-off-by: Xu Yang Acked-by: Peter Chen Link: https://lore.kernel.org/r/20240923081203.2851768-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/ci.h | 1 + drivers/usb/chipidea/ci_hdrc_imx.c | 1 + drivers/usb/chipidea/core.c | 2 ++ include/linux/usb/chipidea.h | 1 + 4 files changed, 5 insertions(+) (limited to 'include') diff --git a/drivers/usb/chipidea/ci.h b/drivers/usb/chipidea/ci.h index 2a38e1eb6546..e4b003d060c2 100644 --- a/drivers/usb/chipidea/ci.h +++ b/drivers/usb/chipidea/ci.h @@ -260,6 +260,7 @@ struct ci_hdrc { bool b_sess_valid_event; bool imx28_write_fix; bool has_portsc_pec_bug; + bool has_short_pkt_limit; bool supports_runtime_pm; bool in_lpm; bool wakeup_int; diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c index d655db733797..f2801700be8e 100644 --- a/drivers/usb/chipidea/ci_hdrc_imx.c +++ b/drivers/usb/chipidea/ci_hdrc_imx.c @@ -342,6 +342,7 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) struct ci_hdrc_platform_data pdata = { .name = dev_name(&pdev->dev), .capoffset = DEF_CAPOFFSET, + .flags = CI_HDRC_HAS_SHORT_PKT_LIMIT, .notify_event = ci_hdrc_imx_notify_event, }; int ret; diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c index 36143b2ae482..2d01af746ff8 100644 --- a/drivers/usb/chipidea/core.c +++ b/drivers/usb/chipidea/core.c @@ -1076,6 +1076,8 @@ static int ci_hdrc_probe(struct platform_device *pdev) CI_HDRC_SUPPORTS_RUNTIME_PM); ci->has_portsc_pec_bug = !!(ci->platdata->flags & CI_HDRC_HAS_PORTSC_PEC_MISSED); + ci->has_short_pkt_limit = !!(ci->platdata->flags & + CI_HDRC_HAS_SHORT_PKT_LIMIT); platform_set_drvdata(pdev, ci); ret = hw_device_init(ci, base); diff --git a/include/linux/usb/chipidea.h b/include/linux/usb/chipidea.h index 5a7f96684ea2..ebdfef124b2b 100644 --- a/include/linux/usb/chipidea.h +++ b/include/linux/usb/chipidea.h @@ -65,6 +65,7 @@ struct ci_hdrc_platform_data { #define CI_HDRC_PHY_VBUS_CONTROL BIT(16) #define CI_HDRC_HAS_PORTSC_PEC_MISSED BIT(17) #define CI_HDRC_FORCE_VBUS_ACTIVE_ALWAYS BIT(18) +#define CI_HDRC_HAS_SHORT_PKT_LIMIT BIT(19) enum usb_dr_mode dr_mode; #define CI_HDRC_CONTROLLER_RESET_EVENT 0 #define CI_HDRC_CONTROLLER_STOPPED_EVENT 1 -- cgit v1.2.3 From 8b7fd6a15f8c32760c2026a62dcf55219b4da15b Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Tue, 1 Oct 2024 16:30:05 +0200 Subject: HID: bpf: move HID-BPF report descriptor fixup earlier Currently, hid_bpf_rdesc_fixup() is called once the match between the HID device and the driver is done. This can be problematic in case the driver selected by the kernel would change the report descriptor after the fact. To give a chance for hid_bpf_rdesc_fixup() to provide hints on to how to select a dedicated driver or not, move the call to that BPF hook earlier in the .probe() process, when we get the first match. However, this means that we might get called more than once (typically once for hid-generic, and once for hid-vendor-specific). So we store the result of HID-BPF fixup in struct hid_device. Basically, this means that ->bpf_rdesc can replace ->dev_rdesc when it was used in the code. In order to not grow struct hid_device, some fields are re-ordered. This was the output of pahole for the first 128 bytes: struct hid_device { __u8 * dev_rdesc; /* 0 8 */ unsigned int dev_rsize; /* 8 4 */ /* XXX 4 bytes hole, try to pack */ __u8 * rdesc; /* 16 8 */ unsigned int rsize; /* 24 4 */ /* XXX 4 bytes hole, try to pack */ struct hid_collection * collection; /* 32 8 */ unsigned int collection_size; /* 40 4 */ unsigned int maxcollection; /* 44 4 */ unsigned int maxapplication; /* 48 4 */ __u16 bus; /* 52 2 */ __u16 group; /* 54 2 */ __u32 vendor; /* 56 4 */ __u32 product; /* 60 4 */ /* --- cacheline 1 boundary (64 bytes) --- */ __u32 version; /* 64 4 */ enum hid_type type; /* 68 4 */ unsigned int country; /* 72 4 */ /* XXX 4 bytes hole, try to pack */ struct hid_report_enum report_enum[3]; /* 80 6216 */ Basically, we got three holes of 4 bytes. We can reorder things a little and makes those 3 holes a continuous 12 bytes hole, which can be replaced by the new pointer and the new unsigned int we need. In terms of code allocation, when not using HID-BPF, we are back to kernel v6.2 in hid_open_report(). These multiple kmemdup() calls will be fixed in a later commit. Link: https://patch.msgid.link/20241001-hid-bpf-hid-generic-v3-1-2ef1019468df@kernel.org Signed-off-by: Benjamin Tissoires --- drivers/hid/bpf/hid_bpf_dispatch.c | 9 ++++++--- drivers/hid/hid-core.c | 30 ++++++++++++++++++++++++++---- include/linux/hid.h | 18 ++++++++++-------- include/linux/hid_bpf.h | 11 +++-------- 4 files changed, 45 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/drivers/hid/bpf/hid_bpf_dispatch.c b/drivers/hid/bpf/hid_bpf_dispatch.c index 8420c227e21b..961b7f35aa67 100644 --- a/drivers/hid/bpf/hid_bpf_dispatch.c +++ b/drivers/hid/bpf/hid_bpf_dispatch.c @@ -148,7 +148,7 @@ out: } EXPORT_SYMBOL_GPL(dispatch_hid_bpf_output_report); -u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, const u8 *rdesc, unsigned int *size) +const u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, const u8 *rdesc, unsigned int *size) { int ret; struct hid_bpf_ctx_kern ctx_kern = { @@ -183,7 +183,7 @@ u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, const u8 *rdesc, unsigned ignore_bpf: kfree(ctx_kern.data); - return kmemdup(rdesc, *size, GFP_KERNEL); + return rdesc; } EXPORT_SYMBOL_GPL(call_hid_bpf_rdesc_fixup); @@ -260,8 +260,11 @@ int hid_bpf_allocate_event_data(struct hid_device *hdev) int hid_bpf_reconnect(struct hid_device *hdev) { - if (!test_and_set_bit(ffs(HID_STAT_REPROBED), &hdev->status)) + if (!test_and_set_bit(ffs(HID_STAT_REPROBED), &hdev->status)) { + /* trigger call to call_hid_bpf_rdesc_fixup() during the next probe */ + hdev->bpf_rsize = 0; return device_reprobe(&hdev->dev); + } return 0; } diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 30de92d0bf0f..d6bf933623e8 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -698,6 +698,14 @@ static void hid_close_report(struct hid_device *device) device->status &= ~HID_STAT_PARSED; } +static inline void hid_free_bpf_rdesc(struct hid_device *hdev) +{ + /* bpf_rdesc is either equal to dev_rdesc or allocated by call_hid_bpf_rdesc_fixup() */ + if (hdev->bpf_rdesc != hdev->dev_rdesc) + kfree(hdev->bpf_rdesc); + hdev->bpf_rdesc = NULL; +} + /* * Free a device structure, all reports, and all fields. */ @@ -707,6 +715,7 @@ void hiddev_free(struct kref *ref) struct hid_device *hid = container_of(ref, struct hid_device, ref); hid_close_report(hid); + hid_free_bpf_rdesc(hid); kfree(hid->dev_rdesc); kfree(hid); } @@ -1221,13 +1230,12 @@ int hid_open_report(struct hid_device *device) if (WARN_ON(device->status & HID_STAT_PARSED)) return -EBUSY; - start = device->dev_rdesc; + start = device->bpf_rdesc; if (WARN_ON(!start)) return -ENODEV; - size = device->dev_rsize; + size = device->bpf_rsize; - /* call_hid_bpf_rdesc_fixup() ensures we work on a copy of rdesc */ - buf = call_hid_bpf_rdesc_fixup(device, start, &size); + buf = kmemdup(start, size, GFP_KERNEL); if (buf == NULL) return -ENOMEM; @@ -2684,6 +2692,18 @@ static int __hid_device_probe(struct hid_device *hdev, struct hid_driver *hdrv) const struct hid_device_id *id; int ret; + if (!hdev->bpf_rsize) { + /* in case a bpf program gets detached, we need to free the old one */ + hid_free_bpf_rdesc(hdev); + + /* keep this around so we know we called it once */ + hdev->bpf_rsize = hdev->dev_rsize; + + /* call_hid_bpf_rdesc_fixup will always return a valid pointer */ + hdev->bpf_rdesc = call_hid_bpf_rdesc_fixup(hdev, hdev->dev_rdesc, + &hdev->bpf_rsize); + } + if (!hid_check_device_match(hdev, hdrv, &id)) return -ENODEV; @@ -2940,9 +2960,11 @@ static void hid_remove_device(struct hid_device *hdev) hid_debug_unregister(hdev); hdev->status &= ~HID_STAT_ADDED; } + hid_free_bpf_rdesc(hdev); kfree(hdev->dev_rdesc); hdev->dev_rdesc = NULL; hdev->dev_rsize = 0; + hdev->bpf_rsize = 0; } /** diff --git a/include/linux/hid.h b/include/linux/hid.h index 121d5b8bc867..ff58b5ceb62e 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -599,15 +599,17 @@ enum hid_battery_status { struct hid_driver; struct hid_ll_driver; -struct hid_device { /* device report descriptor */ - const __u8 *dev_rdesc; - unsigned dev_rsize; - const __u8 *rdesc; - unsigned rsize; +struct hid_device { + const __u8 *dev_rdesc; /* device report descriptor */ + const __u8 *bpf_rdesc; /* bpf modified report descriptor, if any */ + const __u8 *rdesc; /* currently used report descriptor */ + unsigned int dev_rsize; + unsigned int bpf_rsize; + unsigned int rsize; + unsigned int collection_size; /* Number of allocated hid_collections */ struct hid_collection *collection; /* List of HID collections */ - unsigned collection_size; /* Number of allocated hid_collections */ - unsigned maxcollection; /* Number of parsed collections */ - unsigned maxapplication; /* Number of applications */ + unsigned int maxcollection; /* Number of parsed collections */ + unsigned int maxapplication; /* Number of applications */ __u16 bus; /* BUS ID */ __u16 group; /* Report group */ __u32 vendor; /* Vendor ID */ diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index 6a47223e6460..a6876ab29004 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -212,7 +212,7 @@ int hid_bpf_connect_device(struct hid_device *hdev); void hid_bpf_disconnect_device(struct hid_device *hdev); void hid_bpf_destroy_device(struct hid_device *hid); int hid_bpf_device_init(struct hid_device *hid); -u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, const u8 *rdesc, unsigned int *size); +const u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, const u8 *rdesc, unsigned int *size); #else /* CONFIG_HID_BPF */ static inline u8 *dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 *size, int interrupt, @@ -228,13 +228,8 @@ static inline int hid_bpf_connect_device(struct hid_device *hdev) { return 0; } static inline void hid_bpf_disconnect_device(struct hid_device *hdev) {} static inline void hid_bpf_destroy_device(struct hid_device *hid) {} static inline int hid_bpf_device_init(struct hid_device *hid) { return 0; } -/* - * This specialized allocator has to be a macro for its allocations to be - * accounted separately (to have a separate alloc_tag). The typecast is - * intentional to enforce typesafety. - */ -#define call_hid_bpf_rdesc_fixup(_hdev, _rdesc, _size) \ - ((u8 *)kmemdup(_rdesc, *(_size), GFP_KERNEL)) +static inline const u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, const u8 *rdesc, + unsigned int *size) { return rdesc; } #endif /* CONFIG_HID_BPF */ -- cgit v1.2.3 From 645c224ac5f6e0013931c342ea707b398d24d410 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Tue, 1 Oct 2024 16:30:12 +0200 Subject: HID: add per device quirk to force bind to hid-generic We already have the possibility to force not binding to hid-generic and rely on a dedicated driver, but we couldn't do the other way around. This is useful for BPF programs where we are fixing the report descriptor and the events, but want to avoid a specialized driver to come after BPF which would unwind everything that is done there. Reviewed-by: Peter Hutterer Link: https://patch.msgid.link/20241001-hid-bpf-hid-generic-v3-8-2ef1019468df@kernel.org Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-core.c | 5 +++-- drivers/hid/hid-generic.c | 3 +++ include/linux/hid.h | 2 ++ 3 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 8e879937e956..f1c23b21e96a 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -2698,9 +2698,10 @@ static bool hid_check_device_match(struct hid_device *hdev, /* * hid-generic implements .match(), so we must be dealing with a * different HID driver here, and can simply check if - * hid_ignore_special_drivers is set or not. + * hid_ignore_special_drivers or HID_QUIRK_IGNORE_SPECIAL_DRIVER + * are set or not. */ - return !hid_ignore_special_drivers; + return !hid_ignore_special_drivers && !(hdev->quirks & HID_QUIRK_IGNORE_SPECIAL_DRIVER); } static int __hid_device_probe(struct hid_device *hdev, struct hid_driver *hdrv) diff --git a/drivers/hid/hid-generic.c b/drivers/hid/hid-generic.c index f9db991d3c5a..88882c1bfffe 100644 --- a/drivers/hid/hid-generic.c +++ b/drivers/hid/hid-generic.c @@ -40,6 +40,9 @@ static bool hid_generic_match(struct hid_device *hdev, if (ignore_special_driver) return true; + if (hdev->quirks & HID_QUIRK_IGNORE_SPECIAL_DRIVER) + return true; + if (hdev->quirks & HID_QUIRK_HAVE_SPECIAL_DRIVER) return false; diff --git a/include/linux/hid.h b/include/linux/hid.h index ff58b5ceb62e..63330d623335 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -359,6 +359,7 @@ struct hid_item { * | @HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP: * | @HID_QUIRK_HAVE_SPECIAL_DRIVER: * | @HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE: + * | @HID_QUIRK_IGNORE_SPECIAL_DRIVER * | @HID_QUIRK_FULLSPEED_INTERVAL: * | @HID_QUIRK_NO_INIT_REPORTS: * | @HID_QUIRK_NO_IGNORE: @@ -384,6 +385,7 @@ struct hid_item { #define HID_QUIRK_HAVE_SPECIAL_DRIVER BIT(19) #define HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE BIT(20) #define HID_QUIRK_NOINVERT BIT(21) +#define HID_QUIRK_IGNORE_SPECIAL_DRIVER BIT(22) #define HID_QUIRK_FULLSPEED_INTERVAL BIT(28) #define HID_QUIRK_NO_INIT_REPORTS BIT(29) #define HID_QUIRK_NO_IGNORE BIT(30) -- cgit v1.2.3 From 5acd957a986c167d357e617a887630203be29ea4 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Tue, 1 Oct 2024 13:37:04 +0300 Subject: net/mlx5: hw counters: Make fc_stats & fc_pool private The mlx5_fc_stats and mlx5_fc_pool structs are only used from fs_counters.c. As such, make them private there. mlx5_fc_pool is not used or referenced at all outside fs_counters. mlx5_fc_stats is referenced from mlx5_core_dev, so instead of having it as a direct member (which requires exporting it from fs_counters), store a pointer to it, allocate it on init and clear it on destroy. One caveat is that a simple container_of to get from a 'work' struct to the outermost mlx5_core_dev struct directly no longer works, so an extra pointer had to be added to mlx5_fc_stats back to the parent dev. Signed-off-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241001103709.58127-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/fs_counters.c | 79 ++++++++++++++++------ include/linux/mlx5/driver.h | 33 +-------- 2 files changed, 60 insertions(+), 52 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index 0c26d707eed2..7d6174d0f260 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -69,6 +69,36 @@ struct mlx5_fc { struct mlx5_fc_cache cache ____cacheline_aligned_in_smp; }; +struct mlx5_fc_pool { + struct mlx5_core_dev *dev; + struct mutex pool_lock; /* protects pool lists */ + struct list_head fully_used; + struct list_head partially_used; + struct list_head unused; + int available_fcs; + int used_fcs; + int threshold; +}; + +struct mlx5_fc_stats { + spinlock_t counters_idr_lock; /* protects counters_idr */ + struct idr counters_idr; + struct list_head counters; + struct llist_head addlist; + struct llist_head dellist; + + struct workqueue_struct *wq; + struct delayed_work work; + unsigned long next_query; + unsigned long sampling_interval; /* jiffies */ + u32 *bulk_query_out; + int bulk_query_len; + size_t num_counters; + bool bulk_query_alloc_failed; + unsigned long next_bulk_query_alloc; + struct mlx5_fc_pool fc_pool; +}; + static void mlx5_fc_pool_init(struct mlx5_fc_pool *fc_pool, struct mlx5_core_dev *dev); static void mlx5_fc_pool_cleanup(struct mlx5_fc_pool *fc_pool); static struct mlx5_fc *mlx5_fc_pool_acquire_counter(struct mlx5_fc_pool *fc_pool); @@ -109,7 +139,7 @@ static void mlx5_fc_pool_release_counter(struct mlx5_fc_pool *fc_pool, struct ml static struct list_head *mlx5_fc_counters_lookup_next(struct mlx5_core_dev *dev, u32 id) { - struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + struct mlx5_fc_stats *fc_stats = dev->priv.fc_stats; unsigned long next_id = (unsigned long)id + 1; struct mlx5_fc *counter; unsigned long tmp; @@ -137,7 +167,7 @@ static void mlx5_fc_stats_insert(struct mlx5_core_dev *dev, static void mlx5_fc_stats_remove(struct mlx5_core_dev *dev, struct mlx5_fc *counter) { - struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + struct mlx5_fc_stats *fc_stats = dev->priv.fc_stats; list_del(&counter->list); @@ -178,7 +208,7 @@ static void mlx5_fc_stats_query_counter_range(struct mlx5_core_dev *dev, struct mlx5_fc *first, u32 last_id) { - struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + struct mlx5_fc_stats *fc_stats = dev->priv.fc_stats; bool query_more_counters = (first->id <= last_id); int cur_bulk_len = fc_stats->bulk_query_len; u32 *data = fc_stats->bulk_query_out; @@ -225,7 +255,7 @@ static void mlx5_fc_free(struct mlx5_core_dev *dev, struct mlx5_fc *counter) static void mlx5_fc_release(struct mlx5_core_dev *dev, struct mlx5_fc *counter) { - struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + struct mlx5_fc_stats *fc_stats = dev->priv.fc_stats; if (counter->bulk) mlx5_fc_pool_release_counter(&fc_stats->fc_pool, counter); @@ -235,7 +265,7 @@ static void mlx5_fc_release(struct mlx5_core_dev *dev, struct mlx5_fc *counter) static void mlx5_fc_stats_bulk_query_size_increase(struct mlx5_core_dev *dev) { - struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + struct mlx5_fc_stats *fc_stats = dev->priv.fc_stats; int max_bulk_len = get_max_bulk_query_len(dev); unsigned long now = jiffies; u32 *bulk_query_out_tmp; @@ -270,9 +300,9 @@ static void mlx5_fc_stats_bulk_query_size_increase(struct mlx5_core_dev *dev) static void mlx5_fc_stats_work(struct work_struct *work) { - struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev, - priv.fc_stats.work.work); - struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + struct mlx5_fc_stats *fc_stats = container_of(work, struct mlx5_fc_stats, + work.work); + struct mlx5_core_dev *dev = fc_stats->fc_pool.dev; /* Take dellist first to ensure that counters cannot be deleted before * they are inserted. */ @@ -334,7 +364,7 @@ static struct mlx5_fc *mlx5_fc_single_alloc(struct mlx5_core_dev *dev) static struct mlx5_fc *mlx5_fc_acquire(struct mlx5_core_dev *dev, bool aging) { - struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + struct mlx5_fc_stats *fc_stats = dev->priv.fc_stats; struct mlx5_fc *counter; if (aging && MLX5_CAP_GEN(dev, flow_counter_bulk_alloc) != 0) { @@ -349,7 +379,7 @@ static struct mlx5_fc *mlx5_fc_acquire(struct mlx5_core_dev *dev, bool aging) struct mlx5_fc *mlx5_fc_create_ex(struct mlx5_core_dev *dev, bool aging) { struct mlx5_fc *counter = mlx5_fc_acquire(dev, aging); - struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + struct mlx5_fc_stats *fc_stats = dev->priv.fc_stats; int err; if (IS_ERR(counter)) @@ -389,7 +419,7 @@ err_out_alloc: struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging) { struct mlx5_fc *counter = mlx5_fc_create_ex(dev, aging); - struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + struct mlx5_fc_stats *fc_stats = dev->priv.fc_stats; if (aging) mod_delayed_work(fc_stats->wq, &fc_stats->work, 0); @@ -405,7 +435,7 @@ EXPORT_SYMBOL(mlx5_fc_id); void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter) { - struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + struct mlx5_fc_stats *fc_stats = dev->priv.fc_stats; if (!counter) return; @@ -422,10 +452,14 @@ EXPORT_SYMBOL(mlx5_fc_destroy); int mlx5_init_fc_stats(struct mlx5_core_dev *dev) { - struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + struct mlx5_fc_stats *fc_stats; int init_bulk_len; int init_out_len; + fc_stats = kzalloc(sizeof(*fc_stats), GFP_KERNEL); + if (!fc_stats) + return -ENOMEM; + spin_lock_init(&fc_stats->counters_idr_lock); idr_init(&fc_stats->counters_idr); INIT_LIST_HEAD(&fc_stats->counters); @@ -436,7 +470,7 @@ int mlx5_init_fc_stats(struct mlx5_core_dev *dev) init_out_len = mlx5_cmd_fc_get_bulk_query_out_len(init_bulk_len); fc_stats->bulk_query_out = kzalloc(init_out_len, GFP_KERNEL); if (!fc_stats->bulk_query_out) - return -ENOMEM; + goto err_bulk; fc_stats->bulk_query_len = init_bulk_len; fc_stats->wq = create_singlethread_workqueue("mlx5_fc"); @@ -447,23 +481,27 @@ int mlx5_init_fc_stats(struct mlx5_core_dev *dev) INIT_DELAYED_WORK(&fc_stats->work, mlx5_fc_stats_work); mlx5_fc_pool_init(&fc_stats->fc_pool, dev); + dev->priv.fc_stats = fc_stats; + return 0; err_wq_create: kfree(fc_stats->bulk_query_out); +err_bulk: + kfree(fc_stats); return -ENOMEM; } void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev) { - struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + struct mlx5_fc_stats *fc_stats = dev->priv.fc_stats; struct llist_node *tmplist; struct mlx5_fc *counter; struct mlx5_fc *tmp; - cancel_delayed_work_sync(&dev->priv.fc_stats.work); - destroy_workqueue(dev->priv.fc_stats.wq); - dev->priv.fc_stats.wq = NULL; + cancel_delayed_work_sync(&fc_stats->work); + destroy_workqueue(fc_stats->wq); + fc_stats->wq = NULL; tmplist = llist_del_all(&fc_stats->addlist); llist_for_each_entry_safe(counter, tmp, tmplist, addlist) @@ -475,6 +513,7 @@ void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev) mlx5_fc_pool_cleanup(&fc_stats->fc_pool); idr_destroy(&fc_stats->counters_idr); kfree(fc_stats->bulk_query_out); + kfree(fc_stats); } int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter, @@ -518,7 +557,7 @@ void mlx5_fc_queue_stats_work(struct mlx5_core_dev *dev, struct delayed_work *dwork, unsigned long delay) { - struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + struct mlx5_fc_stats *fc_stats = dev->priv.fc_stats; queue_delayed_work(fc_stats->wq, dwork, delay); } @@ -526,7 +565,7 @@ void mlx5_fc_queue_stats_work(struct mlx5_core_dev *dev, void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev, unsigned long interval) { - struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + struct mlx5_fc_stats *fc_stats = dev->priv.fc_stats; fc_stats->sampling_interval = min_t(unsigned long, interval, fc_stats->sampling_interval); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index e23c692a34c7..fc7e6153b73d 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -45,7 +45,6 @@ #include #include #include -#include #include #include #include @@ -474,36 +473,6 @@ struct mlx5_core_sriov { u16 max_ec_vfs; }; -struct mlx5_fc_pool { - struct mlx5_core_dev *dev; - struct mutex pool_lock; /* protects pool lists */ - struct list_head fully_used; - struct list_head partially_used; - struct list_head unused; - int available_fcs; - int used_fcs; - int threshold; -}; - -struct mlx5_fc_stats { - spinlock_t counters_idr_lock; /* protects counters_idr */ - struct idr counters_idr; - struct list_head counters; - struct llist_head addlist; - struct llist_head dellist; - - struct workqueue_struct *wq; - struct delayed_work work; - unsigned long next_query; - unsigned long sampling_interval; /* jiffies */ - u32 *bulk_query_out; - int bulk_query_len; - size_t num_counters; - bool bulk_query_alloc_failed; - unsigned long next_bulk_query_alloc; - struct mlx5_fc_pool fc_pool; -}; - struct mlx5_events; struct mlx5_mpfs; struct mlx5_eswitch; @@ -630,7 +599,7 @@ struct mlx5_priv { struct mlx5_devcom_comp_dev *hca_devcom_comp; struct mlx5_fw_reset *fw_reset; struct mlx5_core_roce roce; - struct mlx5_fc_stats fc_stats; + struct mlx5_fc_stats *fc_stats; struct mlx5_rl_table rl_table; struct mlx5_ft_pool *ft_pool; -- cgit v1.2.3 From d1c9cffe4b01f4d8bc52169139a5fedd48908abc Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Tue, 1 Oct 2024 13:37:09 +0300 Subject: net/mlx5: hw counters: Remove mlx5_fc_create_ex It no longer serves any purpose and is identical to mlx5_fc_create upon which it was originally based of. Signed-off-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241001103709.58127-7-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c | 7 +------ include/linux/mlx5/fs.h | 3 --- 3 files changed, 2 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index dcfccaaa8d91..4877a9d86807 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -1026,7 +1026,7 @@ mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv) return ERR_PTR(-ENOMEM); counter->is_shared = false; - counter->counter = mlx5_fc_create_ex(ct_priv->dev, true); + counter->counter = mlx5_fc_create(ct_priv->dev, true); if (IS_ERR(counter->counter)) { ct_dbg("Failed to create counter for ct entry"); ret = PTR_ERR(counter->counter); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index 0b80c33cba5f..62d0c689796b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -275,7 +275,7 @@ static struct mlx5_fc *mlx5_fc_acquire(struct mlx5_core_dev *dev, bool aging) return mlx5_fc_single_alloc(dev); } -struct mlx5_fc *mlx5_fc_create_ex(struct mlx5_core_dev *dev, bool aging) +struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging) { struct mlx5_fc *counter = mlx5_fc_acquire(dev, aging); struct mlx5_fc_stats *fc_stats = dev->priv.fc_stats; @@ -304,11 +304,6 @@ err_out_alloc: mlx5_fc_release(dev, counter); return ERR_PTR(err); } - -struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging) -{ - return mlx5_fc_create_ex(dev, aging); -} EXPORT_SYMBOL(mlx5_fc_create); u32 mlx5_fc_id(struct mlx5_fc *counter) diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index b744e554f014..438db888bde0 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -298,9 +298,6 @@ int mlx5_modify_rule_destination(struct mlx5_flow_handle *handler, struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging); -/* As mlx5_fc_create() but doesn't queue stats refresh thread. */ -struct mlx5_fc *mlx5_fc_create_ex(struct mlx5_core_dev *dev, bool aging); - void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter); u64 mlx5_fc_query_lastuse(struct mlx5_fc *counter); void mlx5_fc_query_cached(struct mlx5_fc *counter, -- cgit v1.2.3 From 4aecca4c76808f3736056d18ff510df80424bc9f Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Tue, 1 Oct 2024 05:57:14 -0700 Subject: net_tstamp: add SCM_TS_OPT_ID to provide OPT_ID in control message SOF_TIMESTAMPING_OPT_ID socket option flag gives a way to correlate TX timestamps and packets sent via socket. Unfortunately, there is no way to reliably predict socket timestamp ID value in case of error returned by sendmsg. For UDP sockets it's impossible because of lockless nature of UDP transmit, several threads may send packets in parallel. In case of RAW sockets MSG_MORE option makes things complicated. More details are in the conversation [1]. This patch adds new control message type to give user-space software an opportunity to control the mapping between packets and values by providing ID with each sendmsg for UDP sockets. The documentation is also added in this patch. [1] https://lore.kernel.org/netdev/CALCETrU0jB+kg0mhV6A8mrHfTE1D1pr1SD_B9Eaa9aDPfgHdtA@mail.gmail.com/ Reviewed-by: Willem de Bruijn Reviewed-by: Jason Xing Signed-off-by: Vadim Fedorenko Link: https://patch.msgid.link/20241001125716.2832769-2-vadfed@meta.com Signed-off-by: Jakub Kicinski --- Documentation/networking/timestamping.rst | 14 ++++++++++++++ arch/alpha/include/uapi/asm/socket.h | 2 ++ arch/mips/include/uapi/asm/socket.h | 2 ++ arch/parisc/include/uapi/asm/socket.h | 2 ++ arch/sparc/include/uapi/asm/socket.h | 2 ++ include/net/inet_sock.h | 4 +++- include/net/sock.h | 7 +++++++ include/uapi/asm-generic/socket.h | 2 ++ net/core/sock.c | 13 +++++++++++++ net/ipv4/ip_output.c | 19 ++++++++++++++----- net/ipv6/ip6_output.c | 20 ++++++++++++++------ 11 files changed, 75 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/Documentation/networking/timestamping.rst b/Documentation/networking/timestamping.rst index 8199e6917671..b37bfbfc7d79 100644 --- a/Documentation/networking/timestamping.rst +++ b/Documentation/networking/timestamping.rst @@ -194,6 +194,20 @@ SOF_TIMESTAMPING_OPT_ID: among all possibly concurrently outstanding timestamp requests for that socket. + The process can optionally override the default generated ID, by + passing a specific ID with control message SCM_TS_OPT_ID (not + supported for TCP sockets):: + + struct msghdr *msg; + ... + cmsg = CMSG_FIRSTHDR(msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_TS_OPT_ID; + cmsg->cmsg_len = CMSG_LEN(sizeof(__u32)); + *((__u32 *) CMSG_DATA(cmsg)) = opt_id; + err = sendmsg(fd, msg, 0); + + SOF_TIMESTAMPING_OPT_ID_TCP: Pass this modifier along with SOF_TIMESTAMPING_OPT_ID for new TCP timestamping applications. SOF_TIMESTAMPING_OPT_ID defines how the diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 251b73c5481e..302507bf9b5d 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -146,6 +146,8 @@ #define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF #define SO_DEVMEM_DONTNEED 80 +#define SCM_TS_OPT_ID 81 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index 8ab7582291ab..d118d4731580 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -157,6 +157,8 @@ #define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF #define SO_DEVMEM_DONTNEED 80 +#define SCM_TS_OPT_ID 81 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index 38fc0b188e08..d268d69bfcd2 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -138,6 +138,8 @@ #define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF #define SO_DEVMEM_DONTNEED 80 +#define SCM_TS_OPT_ID 0x404C + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index 57084ed2f3c4..113cd9f353e3 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -139,6 +139,8 @@ #define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF #define SO_DEVMEM_DONTNEED 0x0059 +#define SCM_TS_OPT_ID 0x005a + #if !defined(__KERNEL__) diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 394c3b66065e..f01dd273bea6 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -174,6 +174,7 @@ struct inet_cork { __s16 tos; char priority; __u16 gso_size; + u32 ts_opt_id; u64 transmit_time; u32 mark; }; @@ -241,7 +242,8 @@ struct inet_sock { struct inet_cork_full cork; }; -#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ +#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ +#define IPCORK_TS_OPT_ID 2 /* ts_opt_id field is valid, overriding sk_tskey */ enum { INET_FLAGS_PKTINFO = 0, diff --git a/include/net/sock.h b/include/net/sock.h index c58ca8dd561b..ccf28c2b70b1 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -954,6 +954,12 @@ enum sock_flags { }; #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) +/* + * The highest bit of sk_tsflags is reserved for kernel-internal + * SOCKCM_FLAG_TS_OPT_ID. There is a check in core/sock.c to control that + * SOF_TIMESTAMPING* values do not reach this reserved area + */ +#define SOCKCM_FLAG_TS_OPT_ID BIT(31) static inline void sock_copy_flags(struct sock *nsk, const struct sock *osk) { @@ -1796,6 +1802,7 @@ struct sockcm_cookie { u64 transmit_time; u32 mark; u32 tsflags; + u32 ts_opt_id; }; static inline void sockcm_init(struct sockcm_cookie *sockc, diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 3b4e3e815602..deacfd6dd197 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -141,6 +141,8 @@ #define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF #define SO_DEVMEM_DONTNEED 80 +#define SCM_TS_OPT_ID 81 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) diff --git a/net/core/sock.c b/net/core/sock.c index 039be95c40cf..846f494a17cf 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2899,6 +2899,8 @@ int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg, { u32 tsflags; + BUILD_BUG_ON(SOF_TIMESTAMPING_LAST == (1 << 31)); + switch (cmsg->cmsg_type) { case SO_MARK: if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) && @@ -2927,6 +2929,17 @@ int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg, return -EINVAL; sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg)); break; + case SCM_TS_OPT_ID: + if (sk_is_tcp(sk)) + return -EINVAL; + tsflags = READ_ONCE(sk->sk_tsflags); + if (!(tsflags & SOF_TIMESTAMPING_OPT_ID)) + return -EINVAL; + if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) + return -EINVAL; + sockc->ts_opt_id = *(u32 *)CMSG_DATA(cmsg); + sockc->tsflags |= SOCKCM_FLAG_TS_OPT_ID; + break; /* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */ case SCM_RIGHTS: case SCM_CREDENTIALS: diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 49811c9281d4..0c7049f50369 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -973,7 +973,7 @@ static int __ip_append_data(struct sock *sk, unsigned int maxfraglen, fragheaderlen, maxnonfragsize; int csummode = CHECKSUM_NONE; struct rtable *rt = dst_rtable(cork->dst); - bool paged, hold_tskey, extra_uref = false; + bool paged, hold_tskey = false, extra_uref = false; unsigned int wmem_alloc_delta = 0; u32 tskey = 0; @@ -1049,10 +1049,15 @@ static int __ip_append_data(struct sock *sk, cork->length += length; - hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP && - READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID; - if (hold_tskey) - tskey = atomic_inc_return(&sk->sk_tskey) - 1; + if (cork->tx_flags & SKBTX_ANY_TSTAMP && + READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) { + if (cork->flags & IPCORK_TS_OPT_ID) { + tskey = cork->ts_opt_id; + } else { + tskey = atomic_inc_return(&sk->sk_tskey) - 1; + hold_tskey = true; + } + } /* So, what's going on in the loop below? * @@ -1327,6 +1332,10 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, cork->transmit_time = ipc->sockc.transmit_time; cork->tx_flags = 0; sock_tx_timestamp(sk, ipc->sockc.tsflags, &cork->tx_flags); + if (ipc->sockc.tsflags & SOCKCM_FLAG_TS_OPT_ID) { + cork->flags |= IPCORK_TS_OPT_ID; + cork->ts_opt_id = ipc->sockc.ts_opt_id; + } return 0; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index f26841f1490f..ff6bd8d85e9a 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1402,7 +1402,10 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, cork->base.tx_flags = 0; cork->base.mark = ipc6->sockc.mark; sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags); - + if (ipc6->sockc.tsflags & SOCKCM_FLAG_TS_OPT_ID) { + cork->base.flags |= IPCORK_TS_OPT_ID; + cork->base.ts_opt_id = ipc6->sockc.ts_opt_id; + } cork->base.length = 0; cork->base.transmit_time = ipc6->sockc.transmit_time; @@ -1433,7 +1436,7 @@ static int __ip6_append_data(struct sock *sk, bool zc = false; u32 tskey = 0; struct rt6_info *rt = dst_rt6_info(cork->dst); - bool paged, hold_tskey, extra_uref = false; + bool paged, hold_tskey = false, extra_uref = false; struct ipv6_txoptions *opt = v6_cork->opt; int csummode = CHECKSUM_NONE; unsigned int maxnonfragsize, headersize; @@ -1543,10 +1546,15 @@ emsgsize: flags &= ~MSG_SPLICE_PAGES; } - hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP && - READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID; - if (hold_tskey) - tskey = atomic_inc_return(&sk->sk_tskey) - 1; + if (cork->tx_flags & SKBTX_ANY_TSTAMP && + READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) { + if (cork->flags & IPCORK_TS_OPT_ID) { + tskey = cork->ts_opt_id; + } else { + tskey = atomic_inc_return(&sk->sk_tskey) - 1; + hold_tskey = true; + } + } /* * Let's try using as much space as possible. -- cgit v1.2.3 From 822b5bc6db55f1c3ea51659c423784ac6919ddd4 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Tue, 1 Oct 2024 05:57:15 -0700 Subject: net_tstamp: add SCM_TS_OPT_ID for RAW sockets The last type of sockets which supports SOF_TIMESTAMPING_OPT_ID is RAW sockets. To add new option this patch converts all callers (direct and indirect) of _sock_tx_timestamp to provide sockcm_cookie instead of tsflags. And while here fix __sock_tx_timestamp to receive tsflags as __u32 instead of __u16. Reviewed-by: Willem de Bruijn Reviewed-by: Jason Xing Signed-off-by: Vadim Fedorenko Link: https://patch.msgid.link/20241001125716.2832769-3-vadfed@meta.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 27 ++++++++++++++++++--------- net/can/raw.c | 2 +- net/ipv4/ip_output.c | 2 +- net/ipv4/raw.c | 2 +- net/ipv4/tcp.c | 7 ++++--- net/ipv6/ip6_output.c | 2 +- net/ipv6/raw.c | 2 +- net/packet/af_packet.c | 6 +++--- net/socket.c | 2 +- 9 files changed, 31 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index ccf28c2b70b1..e282127092ab 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2660,39 +2660,48 @@ static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk, sock_write_timestamp(sk, 0); } -void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags); +void __sock_tx_timestamp(__u32 tsflags, __u8 *tx_flags); /** * _sock_tx_timestamp - checks whether the outgoing packet is to be time stamped * @sk: socket sending this packet - * @tsflags: timestamping flags to use + * @sockc: pointer to socket cmsg cookie to get timestamping info * @tx_flags: completed with instructions for time stamping * @tskey: filled in with next sk_tskey (not for TCP, which uses seqno) * * Note: callers should take care of initial ``*tx_flags`` value (usually 0) */ -static inline void _sock_tx_timestamp(struct sock *sk, __u16 tsflags, +static inline void _sock_tx_timestamp(struct sock *sk, + const struct sockcm_cookie *sockc, __u8 *tx_flags, __u32 *tskey) { + __u32 tsflags = sockc->tsflags; + if (unlikely(tsflags)) { __sock_tx_timestamp(tsflags, tx_flags); if (tsflags & SOF_TIMESTAMPING_OPT_ID && tskey && - tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK) - *tskey = atomic_inc_return(&sk->sk_tskey) - 1; + tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK) { + if (tsflags & SOCKCM_FLAG_TS_OPT_ID) + *tskey = sockc->ts_opt_id; + else + *tskey = atomic_inc_return(&sk->sk_tskey) - 1; + } } if (unlikely(sock_flag(sk, SOCK_WIFI_STATUS))) *tx_flags |= SKBTX_WIFI_STATUS; } -static inline void sock_tx_timestamp(struct sock *sk, __u16 tsflags, +static inline void sock_tx_timestamp(struct sock *sk, + const struct sockcm_cookie *sockc, __u8 *tx_flags) { - _sock_tx_timestamp(sk, tsflags, tx_flags, NULL); + _sock_tx_timestamp(sk, sockc, tx_flags, NULL); } -static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags) +static inline void skb_setup_tx_timestamp(struct sk_buff *skb, + const struct sockcm_cookie *sockc) { - _sock_tx_timestamp(skb->sk, tsflags, &skb_shinfo(skb)->tx_flags, + _sock_tx_timestamp(skb->sk, sockc, &skb_shinfo(skb)->tx_flags, &skb_shinfo(skb)->tskey); } diff --git a/net/can/raw.c b/net/can/raw.c index 00533f64d69d..255c0a8f39d6 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -966,7 +966,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) skb->mark = READ_ONCE(sk->sk_mark); skb->tstamp = sockc.transmit_time; - skb_setup_tx_timestamp(skb, sockc.tsflags); + skb_setup_tx_timestamp(skb, &sockc); err = can_send(skb, ro->loopback); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 0c7049f50369..e5c55a95063d 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1331,7 +1331,7 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, cork->priority = ipc->priority; cork->transmit_time = ipc->sockc.transmit_time; cork->tx_flags = 0; - sock_tx_timestamp(sk, ipc->sockc.tsflags, &cork->tx_flags); + sock_tx_timestamp(sk, &ipc->sockc, &cork->tx_flags); if (ipc->sockc.tsflags & SOCKCM_FLAG_TS_OPT_ID) { cork->flags |= IPCORK_TS_OPT_ID; cork->ts_opt_id = ipc->sockc.ts_opt_id; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 474dfd263c8b..0e9e01967ec9 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -370,7 +370,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, skb->ip_summed = CHECKSUM_NONE; - skb_setup_tx_timestamp(skb, sockc->tsflags); + skb_setup_tx_timestamp(skb, sockc); if (flags & MSG_CONFIRM) skb_set_dst_pending_confirm(skb, 1); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4f77bd862e95..82cc4a5633ce 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -477,15 +477,16 @@ void tcp_init_sock(struct sock *sk) } EXPORT_SYMBOL(tcp_init_sock); -static void tcp_tx_timestamp(struct sock *sk, u16 tsflags) +static void tcp_tx_timestamp(struct sock *sk, struct sockcm_cookie *sockc) { struct sk_buff *skb = tcp_write_queue_tail(sk); + u32 tsflags = sockc->tsflags; if (tsflags && skb) { struct skb_shared_info *shinfo = skb_shinfo(skb); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); - sock_tx_timestamp(sk, tsflags, &shinfo->tx_flags); + sock_tx_timestamp(sk, sockc, &shinfo->tx_flags); if (tsflags & SOF_TIMESTAMPING_TX_ACK) tcb->txstamp_ack = 1; if (tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK) @@ -1321,7 +1322,7 @@ wait_for_space: out: if (copied) { - tcp_tx_timestamp(sk, sockc.tsflags); + tcp_tx_timestamp(sk, &sockc); tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); } out_nopush: diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index ff6bd8d85e9a..205673179b3c 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1401,7 +1401,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, cork->base.gso_size = ipc6->gso_size; cork->base.tx_flags = 0; cork->base.mark = ipc6->sockc.mark; - sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags); + sock_tx_timestamp(sk, &ipc6->sockc, &cork->base.tx_flags); if (ipc6->sockc.tsflags & SOCKCM_FLAG_TS_OPT_ID) { cork->base.flags |= IPCORK_TS_OPT_ID; cork->base.ts_opt_id = ipc6->sockc.ts_opt_id; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 608fa9d05b55..8476a3944a88 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -629,7 +629,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, skb->ip_summed = CHECKSUM_NONE; - skb_setup_tx_timestamp(skb, sockc->tsflags); + skb_setup_tx_timestamp(skb, sockc); if (flags & MSG_CONFIRM) skb_set_dst_pending_confirm(skb, 1); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index a705ec214254..f8942062f776 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2118,7 +2118,7 @@ retry: skb->priority = READ_ONCE(sk->sk_priority); skb->mark = READ_ONCE(sk->sk_mark); skb_set_delivery_type_by_clockid(skb, sockc.transmit_time, sk->sk_clockid); - skb_setup_tx_timestamp(skb, sockc.tsflags); + skb_setup_tx_timestamp(skb, &sockc); if (unlikely(extra_len == 4)) skb->no_fcs = 1; @@ -2650,7 +2650,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb->priority = READ_ONCE(po->sk.sk_priority); skb->mark = READ_ONCE(po->sk.sk_mark); skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, po->sk.sk_clockid); - skb_setup_tx_timestamp(skb, sockc->tsflags); + skb_setup_tx_timestamp(skb, sockc); skb_zcopy_set_nouarg(skb, ph.raw); skb_reserve(skb, hlen); @@ -3115,7 +3115,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) goto out_free; } - skb_setup_tx_timestamp(skb, sockc.tsflags); + skb_setup_tx_timestamp(skb, &sockc); if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) && !packet_extra_vlan_len_allowed(dev, skb)) { diff --git a/net/socket.c b/net/socket.c index 601ad74930ef..3b1b65b9f471 100644 --- a/net/socket.c +++ b/net/socket.c @@ -687,7 +687,7 @@ void sock_release(struct socket *sock) } EXPORT_SYMBOL(sock_release); -void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags) +void __sock_tx_timestamp(__u32 tsflags, __u8 *tx_flags) { u8 flags = *tx_flags; -- cgit v1.2.3 From 5a9071a760a61b00260334ad576fe60debafaafc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Oct 2024 17:30:40 +0000 Subject: tcp: annotate data-races around icsk->icsk_pending icsk->icsk_pending can be read locklessly already. Following patch in the series will add another lockless read. Add smp_load_acquire() and smp_store_release() annotations because following patch will add a test in tcp_write_timer(), and READ_ONCE()/WRITE_ONCE() alone would possibly lead to races. Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20241002173042.917928-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/inet_connection_sock.h | 4 ++-- net/ipv4/inet_connection_sock.c | 6 ++++-- net/ipv4/inet_diag.c | 10 ++++++---- net/ipv4/tcp_ipv4.c | 10 ++++++---- net/ipv4/tcp_output.c | 4 ++-- net/ipv4/tcp_timer.c | 4 ++-- net/ipv6/tcp_ipv6.c | 10 ++++++---- 7 files changed, 28 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index c0deaafebfdc..914d19772704 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -197,7 +197,7 @@ static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what) struct inet_connection_sock *icsk = inet_csk(sk); if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { - icsk->icsk_pending = 0; + smp_store_release(&icsk->icsk_pending, 0); #ifdef INET_CSK_CLEAR_TIMERS sk_stop_timer(sk, &icsk->icsk_retransmit_timer); #endif @@ -229,7 +229,7 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0 || what == ICSK_TIME_LOSS_PROBE || what == ICSK_TIME_REO_TIMEOUT) { - icsk->icsk_pending = what; + smp_store_release(&icsk->icsk_pending, what); icsk->icsk_timeout = jiffies + when; sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); } else if (what == ICSK_TIME_DACK) { diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 2c5632d4fddb..8c53385cc808 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -775,7 +775,8 @@ void inet_csk_clear_xmit_timers(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); - icsk->icsk_pending = icsk->icsk_ack.pending = 0; + smp_store_release(&icsk->icsk_pending, 0); + icsk->icsk_ack.pending = 0; sk_stop_timer(sk, &icsk->icsk_retransmit_timer); sk_stop_timer(sk, &icsk->icsk_delack_timer); @@ -790,7 +791,8 @@ void inet_csk_clear_xmit_timers_sync(struct sock *sk) /* ongoing timer handlers need to acquire socket lock. */ sock_not_owned_by_me(sk); - icsk->icsk_pending = icsk->icsk_ack.pending = 0; + smp_store_release(&icsk->icsk_pending, 0); + icsk->icsk_ack.pending = 0; sk_stop_timer_sync(sk, &icsk->icsk_retransmit_timer); sk_stop_timer_sync(sk, &icsk->icsk_delack_timer); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 67639309163d..321acc8abf17 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -247,6 +247,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct nlmsghdr *nlh; struct nlattr *attr; void *info = NULL; + u8 icsk_pending; int protocol; cb_data = cb->data; @@ -307,14 +308,15 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, goto out; } - if (icsk->icsk_pending == ICSK_TIME_RETRANS || - icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || - icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { + icsk_pending = smp_load_acquire(&icsk->icsk_pending); + if (icsk_pending == ICSK_TIME_RETRANS || + icsk_pending == ICSK_TIME_REO_TIMEOUT || + icsk_pending == ICSK_TIME_LOSS_PROBE) { r->idiag_timer = 1; r->idiag_retrans = icsk->icsk_retransmits; r->idiag_expires = jiffies_delta_to_msecs(icsk->icsk_timeout - jiffies); - } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { + } else if (icsk_pending == ICSK_TIME_PROBE0) { r->idiag_timer = 4; r->idiag_retrans = icsk->icsk_probes_out; r->idiag_expires = diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5afe5e57c89b..985028434f64 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2900,15 +2900,17 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) __be32 src = inet->inet_rcv_saddr; __u16 destp = ntohs(inet->inet_dport); __u16 srcp = ntohs(inet->inet_sport); + u8 icsk_pending; int rx_queue; int state; - if (icsk->icsk_pending == ICSK_TIME_RETRANS || - icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || - icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { + icsk_pending = smp_load_acquire(&icsk->icsk_pending); + if (icsk_pending == ICSK_TIME_RETRANS || + icsk_pending == ICSK_TIME_REO_TIMEOUT || + icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; timer_expires = icsk->icsk_timeout; - } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { + } else if (icsk_pending == ICSK_TIME_PROBE0) { timer_active = 4; timer_expires = icsk->icsk_timeout; } else if (timer_pending(&sk->sk_timer)) { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 4fd746bd4d54..4d0407301603 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2960,7 +2960,7 @@ void tcp_send_loss_probe(struct sock *sk) WARN_ONCE(tp->packets_out, "invalid inflight: %u state %u cwnd %u mss %d\n", tp->packets_out, sk->sk_state, tcp_snd_cwnd(tp), mss); - inet_csk(sk)->icsk_pending = 0; + smp_store_release(&inet_csk(sk)->icsk_pending, 0); return; } @@ -2993,7 +2993,7 @@ probe_sent: NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSPROBES); /* Reset s.t. tcp_rearm_rto will restart timer from now */ - inet_csk(sk)->icsk_pending = 0; + smp_store_release(&inet_csk(sk)->icsk_pending, 0); rearm_timer: tcp_rearm_rto(sk); } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 79064580c8c0..56c597e763ac 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -701,11 +701,11 @@ void tcp_write_timer_handler(struct sock *sk) tcp_send_loss_probe(sk); break; case ICSK_TIME_RETRANS: - icsk->icsk_pending = 0; + smp_store_release(&icsk->icsk_pending, 0); tcp_retransmit_timer(sk); break; case ICSK_TIME_PROBE0: - icsk->icsk_pending = 0; + smp_store_release(&icsk->icsk_pending, 0); tcp_probe_timer(sk); break; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d71ab4e1efe1..7634c0be6acb 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2177,6 +2177,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) const struct tcp_sock *tp = tcp_sk(sp); const struct inet_connection_sock *icsk = inet_csk(sp); const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq; + u8 icsk_pending; int rx_queue; int state; @@ -2185,12 +2186,13 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) destp = ntohs(inet->inet_dport); srcp = ntohs(inet->inet_sport); - if (icsk->icsk_pending == ICSK_TIME_RETRANS || - icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || - icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { + icsk_pending = smp_load_acquire(&icsk->icsk_pending); + if (icsk_pending == ICSK_TIME_RETRANS || + icsk_pending == ICSK_TIME_REO_TIMEOUT || + icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; timer_expires = icsk->icsk_timeout; - } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { + } else if (icsk_pending == ICSK_TIME_PROBE0) { timer_active = 4; timer_expires = icsk->icsk_timeout; } else if (timer_pending(&sp->sk_timer)) { -- cgit v1.2.3 From 81df4fa94ee8c0800ed42c47357435602ed105ad Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Oct 2024 17:30:42 +0000 Subject: tcp: add a fast path in tcp_delack_timer() delack timer is not stopped from inet_csk_clear_xmit_timer() because we do not define INET_CSK_CLEAR_TIMERS. This is a conscious choice : inet_csk_clear_xmit_timer() is often called from another cpu. Calling del_timer() would cause false sharing and lock contention. This means that very often, tcp_delack_timer() is called at the timer expiration, while there is no ACK to transmit. This can be detected very early, avoiding the socket spinlock. Notes: - test about tp->compressed_ack is racy, but in the unlikely case there is a race, the dedicated compressed_ack_timer hrtimer would close it. - Even if the fast path is not taken, reading icsk->icsk_ack.pending and tp->compressed_ack before acquiring the socket spinlock reduces acquisition time and chances of contention. Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20241002173042.917928-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/inet_connection_sock.h | 5 +++-- net/ipv4/inet_connection_sock.c | 4 ++-- net/ipv4/tcp_output.c | 3 ++- net/ipv4/tcp_timer.c | 9 +++++++++ net/mptcp/protocol.c | 3 ++- 5 files changed, 18 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 914d19772704..3c82fad904d4 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -202,7 +202,7 @@ static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what) sk_stop_timer(sk, &icsk->icsk_retransmit_timer); #endif } else if (what == ICSK_TIME_DACK) { - icsk->icsk_ack.pending = 0; + smp_store_release(&icsk->icsk_ack.pending, 0); icsk->icsk_ack.retry = 0; #ifdef INET_CSK_CLEAR_TIMERS sk_stop_timer(sk, &icsk->icsk_delack_timer); @@ -233,7 +233,8 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, icsk->icsk_timeout = jiffies + when; sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); } else if (what == ICSK_TIME_DACK) { - icsk->icsk_ack.pending |= ICSK_ACK_TIMER; + smp_store_release(&icsk->icsk_ack.pending, + icsk->icsk_ack.pending | ICSK_ACK_TIMER); icsk->icsk_ack.timeout = jiffies + when; sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); } else { diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 8c53385cc808..12e975ed4910 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -776,7 +776,7 @@ void inet_csk_clear_xmit_timers(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); smp_store_release(&icsk->icsk_pending, 0); - icsk->icsk_ack.pending = 0; + smp_store_release(&icsk->icsk_ack.pending, 0); sk_stop_timer(sk, &icsk->icsk_retransmit_timer); sk_stop_timer(sk, &icsk->icsk_delack_timer); @@ -792,7 +792,7 @@ void inet_csk_clear_xmit_timers_sync(struct sock *sk) sock_not_owned_by_me(sk); smp_store_release(&icsk->icsk_pending, 0); - icsk->icsk_ack.pending = 0; + smp_store_release(&icsk->icsk_ack.pending, 0); sk_stop_timer_sync(sk, &icsk->icsk_retransmit_timer); sk_stop_timer_sync(sk, &icsk->icsk_delack_timer); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 4d0407301603..08772395690d 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -4224,7 +4224,8 @@ void tcp_send_delayed_ack(struct sock *sk) if (!time_before(timeout, icsk->icsk_ack.timeout)) timeout = icsk->icsk_ack.timeout; } - icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER; + smp_store_release(&icsk->icsk_ack.pending, + icsk->icsk_ack.pending | ICSK_ACK_SCHED | ICSK_ACK_TIMER); icsk->icsk_ack.timeout = timeout; sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout); } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index b7266b9101ce..c3a7442332d4 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -361,6 +361,14 @@ static void tcp_delack_timer(struct timer_list *t) from_timer(icsk, t, icsk_delack_timer); struct sock *sk = &icsk->icsk_inet.sk; + /* Avoid taking socket spinlock if there is no ACK to send. + * The compressed_ack check is racy, but a separate hrtimer + * will take care of it eventually. + */ + if (!(smp_load_acquire(&icsk->icsk_ack.pending) & ICSK_ACK_TIMER) && + !READ_ONCE(tcp_sk(sk)->compressed_ack)) + goto out; + bh_lock_sock(sk); if (!sock_owned_by_user(sk)) { tcp_delack_timer_handler(sk); @@ -371,6 +379,7 @@ static void tcp_delack_timer(struct timer_list *t) sock_hold(sk); } bh_unlock_sock(sk); +out: sock_put(sk); } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index c2317919fc14..e85862352084 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3504,7 +3504,8 @@ static void schedule_3rdack_retransmission(struct sock *ssk) timeout += jiffies; WARN_ON_ONCE(icsk->icsk_ack.pending & ICSK_ACK_TIMER); - icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER; + smp_store_release(&icsk->icsk_ack.pending, + icsk->icsk_ack.pending | ICSK_ACK_SCHED | ICSK_ACK_TIMER); icsk->icsk_ack.timeout = timeout; sk_reset_timer(ssk, &icsk->icsk_delack_timer, timeout); } -- cgit v1.2.3 From f858cc9eed5b05cbe38d7ffd2787c21e3718eb7d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 3 Oct 2024 12:12:18 +0000 Subject: net: add IFLA_MAX_PACING_OFFLOAD_HORIZON device attribute Some network devices have the ability to offload EDT (Earliest Departure Time) which is the model used for TCP pacing and FQ packet scheduler. Some of them implement the timing wheel mechanism described in https://saeed.github.io/files/carousel-sigcomm17.pdf with an associated 'timing wheel horizon'. This patch adds dev->max_pacing_offload_horizon expressing this timing wheel horizon in nsec units. This is a read-only attribute. Unless a driver sets it, dev->max_pacing_offload_horizon is zero. v2: addressed Jakub feedback ( https://lore.kernel.org/netdev/20240930152304.472767-2-edumazet@google.com/T/#mf6294d714c41cc459962154cc2580ce3c9693663 ) v3: added yaml doc (also per Jakub feedback) Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20241003121219.2396589-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/rt_link.yaml | 4 ++++ Documentation/networking/net_cachelines/net_device.rst | 1 + include/linux/netdevice.h | 4 ++++ include/uapi/linux/if_link.h | 1 + net/core/rtnetlink.c | 4 ++++ tools/include/uapi/linux/if_link.h | 1 + 6 files changed, 15 insertions(+) (limited to 'include') diff --git a/Documentation/netlink/specs/rt_link.yaml b/Documentation/netlink/specs/rt_link.yaml index 0c4d5d40cae9..d7131a1afadf 100644 --- a/Documentation/netlink/specs/rt_link.yaml +++ b/Documentation/netlink/specs/rt_link.yaml @@ -1137,6 +1137,10 @@ attribute-sets: name: dpll-pin type: nest nested-attributes: link-dpll-pin-attrs + - + name: max-pacing-offload-horizon + type: uint + doc: EDT offload horizon supported by the device (in nsec). - name: af-spec-attrs attributes: diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst index 22b07c814f4a..49f03cb78c6e 100644 --- a/Documentation/networking/net_cachelines/net_device.rst +++ b/Documentation/networking/net_cachelines/net_device.rst @@ -183,3 +183,4 @@ struct_devlink_port* devlink_port struct_dpll_pin* dpll_pin struct hlist_head page_pools struct dim_irq_moder* irq_moder +u64 max_pacing_offload_horizon diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4d20c776a4ff..49a7e7db0883 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2009,6 +2009,8 @@ enum netdev_reg_state { * @dpll_pin: Pointer to the SyncE source pin of a DPLL subsystem, * where the clock is recovered. * + * @max_pacing_offload_horizon: max EDT offload horizon in nsec. + * * FIXME: cleanup struct net_device such that network protocol info * moves out. */ @@ -2399,6 +2401,8 @@ struct net_device { /** @irq_moder: dim parameters used if IS_ENABLED(CONFIG_DIMLIB). */ struct dim_irq_moder *irq_moder; + u64 max_pacing_offload_horizon; + u8 priv[] ____cacheline_aligned __counted_by(priv_len); } ____cacheline_aligned; diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 6dc258993b17..506ba9c80e83 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -377,6 +377,7 @@ enum { IFLA_GSO_IPV4_MAX_SIZE, IFLA_GRO_IPV4_MAX_SIZE, IFLA_DPLL_PIN, + IFLA_MAX_PACING_OFFLOAD_HORIZON, __IFLA_MAX }; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index f0a520987085..682d8d3127db 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1118,6 +1118,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(MAX_ADDR_LEN) /* IFLA_PERM_ADDRESS */ + rtnl_devlink_port_size(dev) + rtnl_dpll_pin_size(dev) + + nla_total_size(8) /* IFLA_MAX_PACING_OFFLOAD_HORIZON */ + 0; } @@ -1867,6 +1868,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, READ_ONCE(dev->tso_max_size)) || nla_put_u32(skb, IFLA_TSO_MAX_SEGS, READ_ONCE(dev->tso_max_segs)) || + nla_put_uint(skb, IFLA_MAX_PACING_OFFLOAD_HORIZON, + READ_ONCE(dev->max_pacing_offload_horizon)) || #ifdef CONFIG_RPS nla_put_u32(skb, IFLA_NUM_RX_QUEUES, READ_ONCE(dev->num_rx_queues)) || @@ -1975,6 +1978,7 @@ nla_put_failure: } static const struct nla_policy ifla_policy[IFLA_MAX+1] = { + [IFLA_UNSPEC] = { .strict_start_type = IFLA_DPLL_PIN }, [IFLA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ-1 }, [IFLA_ADDRESS] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, [IFLA_BROADCAST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index f0d71b2a3f1e..96ec2b01e725 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -377,6 +377,7 @@ enum { IFLA_GSO_IPV4_MAX_SIZE, IFLA_GRO_IPV4_MAX_SIZE, IFLA_DPLL_PIN, + IFLA_MAX_PACING_OFFLOAD_HORIZON, __IFLA_MAX }; -- cgit v1.2.3 From f26080d47007df2ee90e65b7d390207ff3a588af Mon Sep 17 00:00:00 2001 From: Jeffrey Ji Date: Thu, 3 Oct 2024 12:12:19 +0000 Subject: net_sched: sch_fq: add the ability to offload pacing Some network devices have the ability to offload EDT (Earliest Departure Time) which is the model used for TCP pacing and FQ packet scheduler. Some of them implement the timing wheel mechanism described in https://saeed.github.io/files/carousel-sigcomm17.pdf with an associated 'timing wheel horizon'. This patchs adds to FQ packet scheduler TCA_FQ_OFFLOAD_HORIZON attribute. Its value is capped by the device max_pacing_offload_horizon, added in the prior patch. It allows FQ to let packets within pacing offload horizon to be delivered to the device, which will handle the needed delay without host involvement. Signed-off-by: Jeffrey Ji Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20241003121219.2396589-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/pkt_sched.h | 2 ++ net/sched/sch_fq.c | 33 +++++++++++++++++++++++++++------ 2 files changed, 29 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index a3cd0c2dc995..25a9a47001cd 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -836,6 +836,8 @@ enum { TCA_FQ_WEIGHTS, /* Weights for each band */ + TCA_FQ_OFFLOAD_HORIZON, /* dequeue paced packets within this horizon immediately (us units) */ + __TCA_FQ_MAX }; diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 19a49af5a9e5..aeabf45c9200 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -111,6 +111,7 @@ struct fq_perband_flows { struct fq_sched_data { /* Read mostly cache line */ + u64 offload_horizon; u32 quantum; u32 initial_quantum; u32 flow_refill_delay; @@ -299,7 +300,7 @@ static void fq_gc(struct fq_sched_data *q, } /* Fast path can be used if : - * 1) Packet tstamp is in the past. + * 1) Packet tstamp is in the past, or within the pacing offload horizon. * 2) FQ qlen == 0 OR * (no flow is currently eligible for transmit, * AND fast path queue has less than 8 packets) @@ -314,7 +315,7 @@ static bool fq_fastpath_check(const struct Qdisc *sch, struct sk_buff *skb, const struct fq_sched_data *q = qdisc_priv(sch); const struct sock *sk; - if (fq_skb_cb(skb)->time_to_send > now) + if (fq_skb_cb(skb)->time_to_send > now + q->offload_horizon) return false; if (sch->q.qlen != 0) { @@ -595,15 +596,18 @@ static void fq_check_throttled(struct fq_sched_data *q, u64 now) unsigned long sample; struct rb_node *p; - if (q->time_next_delayed_flow > now) + if (q->time_next_delayed_flow > now + q->offload_horizon) return; /* Update unthrottle latency EWMA. * This is cheap and can help diagnosing timer/latency problems. */ sample = (unsigned long)(now - q->time_next_delayed_flow); - q->unthrottle_latency_ns -= q->unthrottle_latency_ns >> 3; - q->unthrottle_latency_ns += sample >> 3; + if ((long)sample > 0) { + q->unthrottle_latency_ns -= q->unthrottle_latency_ns >> 3; + q->unthrottle_latency_ns += sample >> 3; + } + now += q->offload_horizon; q->time_next_delayed_flow = ~0ULL; while ((p = rb_first(&q->delayed)) != NULL) { @@ -687,7 +691,7 @@ begin: u64 time_next_packet = max_t(u64, fq_skb_cb(skb)->time_to_send, f->time_next_packet); - if (now < time_next_packet) { + if (now + q->offload_horizon < time_next_packet) { head->first = f->next; f->time_next_packet = time_next_packet; fq_flow_set_throttled(q, f); @@ -925,6 +929,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = { [TCA_FQ_HORIZON_DROP] = { .type = NLA_U8 }, [TCA_FQ_PRIOMAP] = NLA_POLICY_EXACT_LEN(sizeof(struct tc_prio_qopt)), [TCA_FQ_WEIGHTS] = NLA_POLICY_EXACT_LEN(FQ_BANDS * sizeof(s32)), + [TCA_FQ_OFFLOAD_HORIZON] = { .type = NLA_U32 }, }; /* compress a u8 array with all elems <= 3 to an array of 2-bit fields */ @@ -1100,6 +1105,17 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt, WRITE_ONCE(q->horizon_drop, nla_get_u8(tb[TCA_FQ_HORIZON_DROP])); + if (tb[TCA_FQ_OFFLOAD_HORIZON]) { + u64 offload_horizon = (u64)NSEC_PER_USEC * + nla_get_u32(tb[TCA_FQ_OFFLOAD_HORIZON]); + + if (offload_horizon <= qdisc_dev(sch)->max_pacing_offload_horizon) { + WRITE_ONCE(q->offload_horizon, offload_horizon); + } else { + NL_SET_ERR_MSG_MOD(extack, "invalid offload_horizon"); + err = -EINVAL; + } + } if (!err) { sch_tree_unlock(sch); @@ -1183,6 +1199,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) .bands = FQ_BANDS, }; struct nlattr *opts; + u64 offload_horizon; u64 ce_threshold; s32 weights[3]; u64 horizon; @@ -1199,6 +1216,9 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) horizon = READ_ONCE(q->horizon); do_div(horizon, NSEC_PER_USEC); + offload_horizon = READ_ONCE(q->offload_horizon); + do_div(offload_horizon, NSEC_PER_USEC); + if (nla_put_u32(skb, TCA_FQ_PLIMIT, READ_ONCE(sch->limit)) || nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, @@ -1224,6 +1244,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) nla_put_u32(skb, TCA_FQ_TIMER_SLACK, READ_ONCE(q->timer_slack)) || nla_put_u32(skb, TCA_FQ_HORIZON, (u32)horizon) || + nla_put_u32(skb, TCA_FQ_OFFLOAD_HORIZON, (u32)offload_horizon) || nla_put_u8(skb, TCA_FQ_HORIZON_DROP, READ_ONCE(q->horizon_drop))) goto nla_put_failure; -- cgit v1.2.3 From 65c4c93caaf1a9fca2855942e338530967162d25 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 10 Sep 2024 16:30:12 +0200 Subject: crypto: sig - Introduce sig_alg backend Commit 6cb8815f41a9 ("crypto: sig - Add interface for sign/verify") began a transition of asymmetric sign/verify operations from crypto_akcipher to a new crypto_sig frontend. Internally, the crypto_sig frontend still uses akcipher_alg as backend, however: "The link between sig and akcipher is meant to be temporary. The plan is to create a new low-level API for sig and then migrate the signature code over to that from akcipher." https://lore.kernel.org/r/ZrG6w9wsb-iiLZIF@gondor.apana.org.au/ "having a separate alg for sig is definitely where we want to be since there is very little that the two types actually share." https://lore.kernel.org/r/ZrHlpz4qnre0zWJO@gondor.apana.org.au/ Take the next step of that migration and augment the crypto_sig frontend with a sig_alg backend to which all algorithms can be moved. During the migration, there will briefly be signature algorithms that are still based on crypto_akcipher, whilst others are already based on crypto_sig. Allow for that by building a fork into crypto_sig_*() API calls (i.e. crypto_sig_maxsize() and friends) such that one of the two backends is selected based on the transform's cra_type. Signed-off-by: Lukas Wunner Signed-off-by: Herbert Xu --- Documentation/crypto/api-sig.rst | 14 ++++ Documentation/crypto/api.rst | 1 + Documentation/crypto/architecture.rst | 2 + crypto/sig.c | 143 +++++++++++++++++++++++++++++++++- crypto/testmgr.c | 115 +++++++++++++++++++++++++++ crypto/testmgr.h | 13 ++++ include/crypto/internal/sig.h | 80 +++++++++++++++++++ include/crypto/sig.h | 61 +++++++++++++++ include/uapi/linux/cryptouser.h | 5 ++ 9 files changed, 432 insertions(+), 2 deletions(-) create mode 100644 Documentation/crypto/api-sig.rst (limited to 'include') diff --git a/Documentation/crypto/api-sig.rst b/Documentation/crypto/api-sig.rst new file mode 100644 index 000000000000..a96dba66296b --- /dev/null +++ b/Documentation/crypto/api-sig.rst @@ -0,0 +1,14 @@ +Asymmetric Signature Algorithm Definitions +------------------------------------------ + +.. kernel-doc:: include/crypto/sig.h + :functions: sig_alg + +Asymmetric Signature API +------------------------ + +.. kernel-doc:: include/crypto/sig.h + :doc: Generic Public Key Signature API + +.. kernel-doc:: include/crypto/sig.h + :functions: crypto_alloc_sig crypto_free_sig crypto_sig_set_pubkey crypto_sig_set_privkey crypto_sig_maxsize crypto_sig_sign crypto_sig_verify diff --git a/Documentation/crypto/api.rst b/Documentation/crypto/api.rst index ff31c30561d4..8b2a90521886 100644 --- a/Documentation/crypto/api.rst +++ b/Documentation/crypto/api.rst @@ -10,4 +10,5 @@ Programming Interface api-digest api-rng api-akcipher + api-sig api-kpp diff --git a/Documentation/crypto/architecture.rst b/Documentation/crypto/architecture.rst index 646c3380a7ed..15dcd62fd22f 100644 --- a/Documentation/crypto/architecture.rst +++ b/Documentation/crypto/architecture.rst @@ -214,6 +214,8 @@ the aforementioned cipher types: - CRYPTO_ALG_TYPE_AKCIPHER Asymmetric cipher +- CRYPTO_ALG_TYPE_SIG Asymmetric signature + - CRYPTO_ALG_TYPE_PCOMPRESS Enhanced version of CRYPTO_ALG_TYPE_COMPRESS allowing for segmented compression / decompression instead of performing the operation on one segment diff --git a/crypto/sig.c b/crypto/sig.c index 7645bedf3a1f..4f36ceb7a90b 100644 --- a/crypto/sig.c +++ b/crypto/sig.c @@ -21,14 +21,38 @@ static const struct crypto_type crypto_sig_type; +static void crypto_sig_exit_tfm(struct crypto_tfm *tfm) +{ + struct crypto_sig *sig = __crypto_sig_tfm(tfm); + struct sig_alg *alg = crypto_sig_alg(sig); + + alg->exit(sig); +} + static int crypto_sig_init_tfm(struct crypto_tfm *tfm) { if (tfm->__crt_alg->cra_type != &crypto_sig_type) return crypto_init_akcipher_ops_sig(tfm); + struct crypto_sig *sig = __crypto_sig_tfm(tfm); + struct sig_alg *alg = crypto_sig_alg(sig); + + if (alg->exit) + sig->base.exit = crypto_sig_exit_tfm; + + if (alg->init) + return alg->init(sig); + return 0; } +static void crypto_sig_free_instance(struct crypto_instance *inst) +{ + struct sig_instance *sig = sig_instance(inst); + + sig->free(sig); +} + static void __maybe_unused crypto_sig_show(struct seq_file *m, struct crypto_alg *alg) { @@ -38,16 +62,17 @@ static void __maybe_unused crypto_sig_show(struct seq_file *m, static int __maybe_unused crypto_sig_report(struct sk_buff *skb, struct crypto_alg *alg) { - struct crypto_report_akcipher rsig = {}; + struct crypto_report_sig rsig = {}; strscpy(rsig.type, "sig", sizeof(rsig.type)); - return nla_put(skb, CRYPTOCFGA_REPORT_AKCIPHER, sizeof(rsig), &rsig); + return nla_put(skb, CRYPTOCFGA_REPORT_SIG, sizeof(rsig), &rsig); } static const struct crypto_type crypto_sig_type = { .extsize = crypto_alg_extsize, .init_tfm = crypto_sig_init_tfm, + .free = crypto_sig_free_instance, #ifdef CONFIG_PROC_FS .show = crypto_sig_show, #endif @@ -68,6 +93,14 @@ EXPORT_SYMBOL_GPL(crypto_alloc_sig); int crypto_sig_maxsize(struct crypto_sig *tfm) { + if (crypto_sig_tfm(tfm)->__crt_alg->cra_type != &crypto_sig_type) + goto akcipher; + + struct sig_alg *alg = crypto_sig_alg(tfm); + + return alg->max_size(tfm); + +akcipher: struct crypto_akcipher **ctx = crypto_sig_ctx(tfm); return crypto_akcipher_maxsize(*ctx); @@ -78,6 +111,14 @@ int crypto_sig_sign(struct crypto_sig *tfm, const void *src, unsigned int slen, void *dst, unsigned int dlen) { + if (crypto_sig_tfm(tfm)->__crt_alg->cra_type != &crypto_sig_type) + goto akcipher; + + struct sig_alg *alg = crypto_sig_alg(tfm); + + return alg->sign(tfm, src, slen, dst, dlen); + +akcipher: struct crypto_akcipher **ctx = crypto_sig_ctx(tfm); struct crypto_akcipher_sync_data data = { .tfm = *ctx, @@ -97,6 +138,14 @@ int crypto_sig_verify(struct crypto_sig *tfm, const void *src, unsigned int slen, const void *digest, unsigned int dlen) { + if (crypto_sig_tfm(tfm)->__crt_alg->cra_type != &crypto_sig_type) + goto akcipher; + + struct sig_alg *alg = crypto_sig_alg(tfm); + + return alg->verify(tfm, src, slen, digest, dlen); + +akcipher: struct crypto_akcipher **ctx = crypto_sig_ctx(tfm); struct crypto_akcipher_sync_data data = { .tfm = *ctx, @@ -120,6 +169,14 @@ EXPORT_SYMBOL_GPL(crypto_sig_verify); int crypto_sig_set_pubkey(struct crypto_sig *tfm, const void *key, unsigned int keylen) { + if (crypto_sig_tfm(tfm)->__crt_alg->cra_type != &crypto_sig_type) + goto akcipher; + + struct sig_alg *alg = crypto_sig_alg(tfm); + + return alg->set_pub_key(tfm, key, keylen); + +akcipher: struct crypto_akcipher **ctx = crypto_sig_ctx(tfm); return crypto_akcipher_set_pub_key(*ctx, key, keylen); @@ -129,11 +186,93 @@ EXPORT_SYMBOL_GPL(crypto_sig_set_pubkey); int crypto_sig_set_privkey(struct crypto_sig *tfm, const void *key, unsigned int keylen) { + if (crypto_sig_tfm(tfm)->__crt_alg->cra_type != &crypto_sig_type) + goto akcipher; + + struct sig_alg *alg = crypto_sig_alg(tfm); + + return alg->set_priv_key(tfm, key, keylen); + +akcipher: struct crypto_akcipher **ctx = crypto_sig_ctx(tfm); return crypto_akcipher_set_priv_key(*ctx, key, keylen); } EXPORT_SYMBOL_GPL(crypto_sig_set_privkey); +static void sig_prepare_alg(struct sig_alg *alg) +{ + struct crypto_alg *base = &alg->base; + + base->cra_type = &crypto_sig_type; + base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; + base->cra_flags |= CRYPTO_ALG_TYPE_SIG; +} + +static int sig_default_sign(struct crypto_sig *tfm, + const void *src, unsigned int slen, + void *dst, unsigned int dlen) +{ + return -ENOSYS; +} + +static int sig_default_verify(struct crypto_sig *tfm, + const void *src, unsigned int slen, + const void *dst, unsigned int dlen) +{ + return -ENOSYS; +} + +static int sig_default_set_key(struct crypto_sig *tfm, + const void *key, unsigned int keylen) +{ + return -ENOSYS; +} + +int crypto_register_sig(struct sig_alg *alg) +{ + struct crypto_alg *base = &alg->base; + + if (!alg->sign) + alg->sign = sig_default_sign; + if (!alg->verify) + alg->verify = sig_default_verify; + if (!alg->set_priv_key) + alg->set_priv_key = sig_default_set_key; + if (!alg->set_pub_key) + return -EINVAL; + if (!alg->max_size) + return -EINVAL; + + sig_prepare_alg(alg); + return crypto_register_alg(base); +} +EXPORT_SYMBOL_GPL(crypto_register_sig); + +void crypto_unregister_sig(struct sig_alg *alg) +{ + crypto_unregister_alg(&alg->base); +} +EXPORT_SYMBOL_GPL(crypto_unregister_sig); + +int sig_register_instance(struct crypto_template *tmpl, + struct sig_instance *inst) +{ + if (WARN_ON(!inst->free)) + return -EINVAL; + sig_prepare_alg(&inst->alg); + return crypto_register_instance(tmpl, sig_crypto_instance(inst)); +} +EXPORT_SYMBOL_GPL(sig_register_instance); + +int crypto_grab_sig(struct crypto_sig_spawn *spawn, + struct crypto_instance *inst, + const char *name, u32 type, u32 mask) +{ + spawn->base.frontend = &crypto_sig_type; + return crypto_grab_spawn(&spawn->base, inst, name, type, mask); +} +EXPORT_SYMBOL_GPL(crypto_grab_sig); + MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Public Key Signature Algorithms"); diff --git a/crypto/testmgr.c b/crypto/testmgr.c index ee8da628e9da..50c8d3e46e2b 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -131,6 +132,11 @@ struct akcipher_test_suite { unsigned int count; }; +struct sig_test_suite { + const struct sig_testvec *vecs; + unsigned int count; +}; + struct kpp_test_suite { const struct kpp_testvec *vecs; unsigned int count; @@ -151,6 +157,7 @@ struct alg_test_desc { struct cprng_test_suite cprng; struct drbg_test_suite drbg; struct akcipher_test_suite akcipher; + struct sig_test_suite sig; struct kpp_test_suite kpp; } suite; }; @@ -4338,6 +4345,114 @@ static int alg_test_akcipher(const struct alg_test_desc *desc, return err; } +static int test_sig_one(struct crypto_sig *tfm, const struct sig_testvec *vecs) +{ + u8 *ptr, *key __free(kfree); + int err, sig_size; + + key = kmalloc(vecs->key_len + 2 * sizeof(u32) + vecs->param_len, + GFP_KERNEL); + if (!key) + return -ENOMEM; + + /* ecrdsa expects additional parameters appended to the key */ + memcpy(key, vecs->key, vecs->key_len); + ptr = key + vecs->key_len; + ptr = test_pack_u32(ptr, vecs->algo); + ptr = test_pack_u32(ptr, vecs->param_len); + memcpy(ptr, vecs->params, vecs->param_len); + + if (vecs->public_key_vec) + err = crypto_sig_set_pubkey(tfm, key, vecs->key_len); + else + err = crypto_sig_set_privkey(tfm, key, vecs->key_len); + if (err) + return err; + + /* + * Run asymmetric signature verification first + * (which does not require a private key) + */ + err = crypto_sig_verify(tfm, vecs->c, vecs->c_size, + vecs->m, vecs->m_size); + if (err) { + pr_err("alg: sig: verify test failed: err %d\n", err); + return err; + } + + /* + * Don't invoke sign test (which requires a private key) + * for vectors with only a public key. + */ + if (vecs->public_key_vec) + return 0; + + sig_size = crypto_sig_maxsize(tfm); + if (sig_size < vecs->c_size) { + pr_err("alg: sig: invalid maxsize %u\n", sig_size); + return -EINVAL; + } + + u8 *sig __free(kfree) = kzalloc(sig_size, GFP_KERNEL); + if (!sig) + return -ENOMEM; + + /* Run asymmetric signature generation */ + err = crypto_sig_sign(tfm, vecs->m, vecs->m_size, sig, sig_size); + if (err) { + pr_err("alg: sig: sign test failed: err %d\n", err); + return err; + } + + /* Verify that generated signature equals cooked signature */ + if (memcmp(sig, vecs->c, vecs->c_size) || + memchr_inv(sig + vecs->c_size, 0, sig_size - vecs->c_size)) { + pr_err("alg: sig: sign test failed: invalid output\n"); + hexdump(sig, sig_size); + return -EINVAL; + } + + return 0; +} + +static int test_sig(struct crypto_sig *tfm, const char *alg, + const struct sig_testvec *vecs, unsigned int tcount) +{ + const char *algo = crypto_tfm_alg_driver_name(crypto_sig_tfm(tfm)); + int ret, i; + + for (i = 0; i < tcount; i++) { + ret = test_sig_one(tfm, vecs++); + if (ret) { + pr_err("alg: sig: test %d failed for %s: err %d\n", + i + 1, algo, ret); + return ret; + } + } + return 0; +} + +__maybe_unused +static int alg_test_sig(const struct alg_test_desc *desc, const char *driver, + u32 type, u32 mask) +{ + struct crypto_sig *tfm; + int err = 0; + + tfm = crypto_alloc_sig(driver, type, mask); + if (IS_ERR(tfm)) { + pr_err("alg: sig: Failed to load tfm for %s: %ld\n", + driver, PTR_ERR(tfm)); + return PTR_ERR(tfm); + } + if (desc->suite.sig.vecs) + err = test_sig(tfm, desc->alg, desc->suite.sig.vecs, + desc->suite.sig.count); + + crypto_free_sig(tfm); + return err; +} + static int alg_test_null(const struct alg_test_desc *desc, const char *driver, u32 type, u32 mask) { diff --git a/crypto/testmgr.h b/crypto/testmgr.h index ed1640f3e352..39dd1d558883 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -162,6 +162,19 @@ struct akcipher_testvec { enum OID algo; }; +struct sig_testvec { + const unsigned char *key; + const unsigned char *params; + const unsigned char *m; + const unsigned char *c; + unsigned int key_len; + unsigned int param_len; + unsigned int m_size; + unsigned int c_size; + bool public_key_vec; + enum OID algo; +}; + struct kpp_testvec { const unsigned char *secret; const unsigned char *b_secret; diff --git a/include/crypto/internal/sig.h b/include/crypto/internal/sig.h index 97cb26ef8115..b16648c1a986 100644 --- a/include/crypto/internal/sig.h +++ b/include/crypto/internal/sig.h @@ -10,8 +10,88 @@ #include #include +struct sig_instance { + void (*free)(struct sig_instance *inst); + union { + struct { + char head[offsetof(struct sig_alg, base)]; + struct crypto_instance base; + }; + struct sig_alg alg; + }; +}; + +struct crypto_sig_spawn { + struct crypto_spawn base; +}; + static inline void *crypto_sig_ctx(struct crypto_sig *tfm) { return crypto_tfm_ctx(&tfm->base); } + +/** + * crypto_register_sig() -- Register public key signature algorithm + * + * Function registers an implementation of a public key signature algorithm + * + * @alg: algorithm definition + * + * Return: zero on success; error code in case of error + */ +int crypto_register_sig(struct sig_alg *alg); + +/** + * crypto_unregister_sig() -- Unregister public key signature algorithm + * + * Function unregisters an implementation of a public key signature algorithm + * + * @alg: algorithm definition + */ +void crypto_unregister_sig(struct sig_alg *alg); + +int sig_register_instance(struct crypto_template *tmpl, + struct sig_instance *inst); + +static inline struct sig_instance *sig_instance(struct crypto_instance *inst) +{ + return container_of(&inst->alg, struct sig_instance, alg.base); +} + +static inline struct sig_instance *sig_alg_instance(struct crypto_sig *tfm) +{ + return sig_instance(crypto_tfm_alg_instance(&tfm->base)); +} + +static inline struct crypto_instance *sig_crypto_instance(struct sig_instance + *inst) +{ + return container_of(&inst->alg.base, struct crypto_instance, alg); +} + +static inline void *sig_instance_ctx(struct sig_instance *inst) +{ + return crypto_instance_ctx(sig_crypto_instance(inst)); +} + +int crypto_grab_sig(struct crypto_sig_spawn *spawn, + struct crypto_instance *inst, + const char *name, u32 type, u32 mask); + +static inline struct crypto_sig *crypto_spawn_sig(struct crypto_sig_spawn + *spawn) +{ + return crypto_spawn_tfm2(&spawn->base); +} + +static inline void crypto_drop_sig(struct crypto_sig_spawn *spawn) +{ + crypto_drop_spawn(&spawn->base); +} + +static inline struct sig_alg *crypto_spawn_sig_alg(struct crypto_sig_spawn + *spawn) +{ + return container_of(spawn->base.alg, struct sig_alg, base); +} #endif diff --git a/include/crypto/sig.h b/include/crypto/sig.h index d25186bb2be3..f0f52a7c5ae7 100644 --- a/include/crypto/sig.h +++ b/include/crypto/sig.h @@ -19,6 +19,52 @@ struct crypto_sig { struct crypto_tfm base; }; +/** + * struct sig_alg - generic public key signature algorithm + * + * @sign: Function performs a sign operation as defined by public key + * algorithm. Optional. + * @verify: Function performs a complete verify operation as defined by + * public key algorithm, returning verification status. Optional. + * @set_pub_key: Function invokes the algorithm specific set public key + * function, which knows how to decode and interpret + * the BER encoded public key and parameters. Mandatory. + * @set_priv_key: Function invokes the algorithm specific set private key + * function, which knows how to decode and interpret + * the BER encoded private key and parameters. Optional. + * @max_size: Function returns key size. Mandatory. + * @init: Initialize the cryptographic transformation object. + * This function is used to initialize the cryptographic + * transformation object. This function is called only once at + * the instantiation time, right after the transformation context + * was allocated. In case the cryptographic hardware has some + * special requirements which need to be handled by software, this + * function shall check for the precise requirement of the + * transformation and put any software fallbacks in place. + * @exit: Deinitialize the cryptographic transformation object. This is a + * counterpart to @init, used to remove various changes set in + * @init. + * + * @base: Common crypto API algorithm data structure + */ +struct sig_alg { + int (*sign)(struct crypto_sig *tfm, + const void *src, unsigned int slen, + void *dst, unsigned int dlen); + int (*verify)(struct crypto_sig *tfm, + const void *src, unsigned int slen, + const void *digest, unsigned int dlen); + int (*set_pub_key)(struct crypto_sig *tfm, + const void *key, unsigned int keylen); + int (*set_priv_key)(struct crypto_sig *tfm, + const void *key, unsigned int keylen); + unsigned int (*max_size)(struct crypto_sig *tfm); + int (*init)(struct crypto_sig *tfm); + void (*exit)(struct crypto_sig *tfm); + + struct crypto_alg base; +}; + /** * DOC: Generic Public Key Signature API * @@ -47,6 +93,21 @@ static inline struct crypto_tfm *crypto_sig_tfm(struct crypto_sig *tfm) return &tfm->base; } +static inline struct crypto_sig *__crypto_sig_tfm(struct crypto_tfm *tfm) +{ + return container_of(tfm, struct crypto_sig, base); +} + +static inline struct sig_alg *__crypto_sig_alg(struct crypto_alg *alg) +{ + return container_of(alg, struct sig_alg, base); +} + +static inline struct sig_alg *crypto_sig_alg(struct crypto_sig *tfm) +{ + return __crypto_sig_alg(crypto_sig_tfm(tfm)->__crt_alg); +} + /** * crypto_free_sig() - free signature tfm handle * diff --git a/include/uapi/linux/cryptouser.h b/include/uapi/linux/cryptouser.h index 20a6c0fc149e..db05e0419972 100644 --- a/include/uapi/linux/cryptouser.h +++ b/include/uapi/linux/cryptouser.h @@ -64,6 +64,7 @@ enum crypto_attr_type_t { CRYPTOCFGA_STAT_AKCIPHER, /* No longer supported, do not use. */ CRYPTOCFGA_STAT_KPP, /* No longer supported, do not use. */ CRYPTOCFGA_STAT_ACOMP, /* No longer supported, do not use. */ + CRYPTOCFGA_REPORT_SIG, /* struct crypto_report_sig */ __CRYPTOCFGA_MAX #define CRYPTOCFGA_MAX (__CRYPTOCFGA_MAX - 1) @@ -207,6 +208,10 @@ struct crypto_report_acomp { char type[CRYPTO_MAX_NAME]; }; +struct crypto_report_sig { + char type[CRYPTO_MAX_NAME]; +}; + #define CRYPTO_REPORT_MAXSIZE (sizeof(struct crypto_user_alg) + \ sizeof(struct crypto_report_blkcipher)) -- cgit v1.2.3 From 7964b0d4bd1271f82d6b455366a200d320f7dbf8 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 10 Sep 2024 16:30:15 +0200 Subject: crypto: rsa-pkcs1pad - Deduplicate set_{pub,priv}_key callbacks pkcs1pad_set_pub_key() and pkcs1pad_set_priv_key() are almost identical. The upcoming migration of sign/verify operations from rsa-pkcs1pad.c into a separate crypto_template will require another copy of the exact same functions. When RSASSA-PSS and RSAES-OAEP are introduced, each will need yet another copy. Deduplicate the functions into a single one which lives in a common header file for reuse by RSASSA-PKCS1-v1_5, RSASSA-PSS and RSAES-OAEP. Signed-off-by: Lukas Wunner Reviewed-by: Stefan Berger Signed-off-by: Herbert Xu --- crypto/rsa-pkcs1pad.c | 30 ++---------------------------- include/crypto/internal/rsa.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c index cd501195f34a..3c5fe8c93938 100644 --- a/crypto/rsa-pkcs1pad.c +++ b/crypto/rsa-pkcs1pad.c @@ -131,42 +131,16 @@ static int pkcs1pad_set_pub_key(struct crypto_akcipher *tfm, const void *key, unsigned int keylen) { struct pkcs1pad_ctx *ctx = akcipher_tfm_ctx(tfm); - int err; - - ctx->key_size = 0; - err = crypto_akcipher_set_pub_key(ctx->child, key, keylen); - if (err) - return err; - - /* Find out new modulus size from rsa implementation */ - err = crypto_akcipher_maxsize(ctx->child); - if (err > PAGE_SIZE) - return -ENOTSUPP; - - ctx->key_size = err; - return 0; + return rsa_set_key(ctx->child, &ctx->key_size, RSA_PUB, key, keylen); } static int pkcs1pad_set_priv_key(struct crypto_akcipher *tfm, const void *key, unsigned int keylen) { struct pkcs1pad_ctx *ctx = akcipher_tfm_ctx(tfm); - int err; - - ctx->key_size = 0; - err = crypto_akcipher_set_priv_key(ctx->child, key, keylen); - if (err) - return err; - - /* Find out new modulus size from rsa implementation */ - err = crypto_akcipher_maxsize(ctx->child); - if (err > PAGE_SIZE) - return -ENOTSUPP; - - ctx->key_size = err; - return 0; + return rsa_set_key(ctx->child, &ctx->key_size, RSA_PRIV, key, keylen); } static unsigned int pkcs1pad_get_max_size(struct crypto_akcipher *tfm) diff --git a/include/crypto/internal/rsa.h b/include/crypto/internal/rsa.h index e870133f4b77..754f687134df 100644 --- a/include/crypto/internal/rsa.h +++ b/include/crypto/internal/rsa.h @@ -8,6 +8,7 @@ #ifndef _RSA_HELPER_ #define _RSA_HELPER_ #include +#include /** * rsa_key - RSA key structure @@ -53,5 +54,32 @@ int rsa_parse_pub_key(struct rsa_key *rsa_key, const void *key, int rsa_parse_priv_key(struct rsa_key *rsa_key, const void *key, unsigned int key_len); +#define RSA_PUB (true) +#define RSA_PRIV (false) + +static inline int rsa_set_key(struct crypto_akcipher *child, + unsigned int *key_size, bool is_pubkey, + const void *key, unsigned int keylen) +{ + int err; + + *key_size = 0; + + if (is_pubkey) + err = crypto_akcipher_set_pub_key(child, key, keylen); + else + err = crypto_akcipher_set_priv_key(child, key, keylen); + if (err) + return err; + + /* Find out new modulus size from rsa implementation */ + err = crypto_akcipher_maxsize(child); + if (err > PAGE_SIZE) + return -ENOTSUPP; + + *key_size = err; + return 0; +} + extern struct crypto_template rsa_pkcs1pad_tmpl; #endif -- cgit v1.2.3 From 1e562deacecca1f1bec7d23da526904a1e87525e Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 10 Sep 2024 16:30:16 +0200 Subject: crypto: rsassa-pkcs1 - Migrate to sig_alg backend A sig_alg backend has just been introduced with the intent of moving all asymmetric sign/verify algorithms to it one by one. Migrate the sign/verify operations from rsa-pkcs1pad.c to a separate rsassa-pkcs1.c which uses the new backend. Consequently there are now two templates which build on the "rsa" akcipher_alg: * The existing "pkcs1pad" template, which is instantiated as an akcipher_instance and retains the encrypt/decrypt operations of RSAES-PKCS1-v1_5 (RFC 8017 sec 7.2). * The new "pkcs1" template, which is instantiated as a sig_instance and contains the sign/verify operations of RSASSA-PKCS1-v1_5 (RFC 8017 sec 8.2). In a separate step, rsa-pkcs1pad.c could optionally be renamed to rsaes-pkcs1.c for clarity. Additional "oaep" and "pss" templates could be added for RSAES-OAEP and RSASSA-PSS. Note that it's currently allowed to allocate a "pkcs1pad(rsa)" transform without specifying a hash algorithm. That makes sense if the transform is only used for encrypt/decrypt and continues to be supported. But for sign/verify, such transforms previously did not insert the Full Hash Prefix into the padding. The resulting message encoding was incompliant with EMSA-PKCS1-v1_5 (RFC 8017 sec 9.2) and therefore nonsensical. From here on in, it is no longer allowed to allocate a transform without specifying a hash algorithm if the transform is used for sign/verify operations. This simplifies the code because the insertion of the Full Hash Prefix is no longer optional, so various "if (digest_info)" clauses can be removed. There has been a previous attempt to forbid transform allocation without specifying a hash algorithm, namely by commit c0d20d22e0ad ("crypto: rsa-pkcs1pad - Require hash to be present"). It had to be rolled back with commit b3a8c8a5ebb5 ("crypto: rsa-pkcs1pad: Allow hash to be optional [ver #2]"), presumably because it broke allocation of a transform which was solely used for encrypt/decrypt, not sign/verify. Avoid such breakage by allowing transform allocation for encrypt/decrypt with and without specifying a hash algorithm (and simply ignoring the hash algorithm in the former case). So again, specifying a hash algorithm is now mandatory for sign/verify, but optional and ignored for encrypt/decrypt. The new sig_alg API uses kernel buffers instead of sglists, which avoids the overhead of copying signature and digest from sglists back into kernel buffers. rsassa-pkcs1.c is thus simplified quite a bit. sig_alg is always synchronous, whereas the underlying "rsa" akcipher_alg may be asynchronous. So await the result of the akcipher_alg, similar to crypto_akcipher_sync_{en,de}crypt(). As part of the migration, rename "rsa_digest_info" to "hash_prefix" to adhere to the spec language in RFC 9580. Otherwise keep the code unmodified wherever possible to ease reviewing and bisecting. Leave several simplification and hardening opportunities to separate commits. rsassa-pkcs1.c uses modern __free() syntax for allocation of buffers which need to be freed by kfree_sensitive(), hence a DEFINE_FREE() clause for kfree_sensitive() is introduced herein as a byproduct. Signed-off-by: Lukas Wunner Signed-off-by: Herbert Xu --- crypto/Kconfig | 1 + crypto/Makefile | 1 + crypto/asymmetric_keys/public_key.c | 10 +- crypto/rsa-pkcs1pad.c | 341 ++--------------------------- crypto/rsa.c | 17 +- crypto/rsassa-pkcs1.c | 422 ++++++++++++++++++++++++++++++++++++ crypto/testmgr.c | 22 +- crypto/testmgr.h | 3 +- include/crypto/internal/rsa.h | 1 + include/linux/slab.h | 1 + security/integrity/ima/ima_main.c | 6 +- 11 files changed, 480 insertions(+), 345 deletions(-) create mode 100644 crypto/rsassa-pkcs1.c (limited to 'include') diff --git a/crypto/Kconfig b/crypto/Kconfig index 989418fe75eb..b3fb3b2ae12f 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -250,6 +250,7 @@ config CRYPTO_RSA tristate "RSA (Rivest-Shamir-Adleman)" select CRYPTO_AKCIPHER select CRYPTO_MANAGER + select CRYPTO_SIG select MPILIB select ASN1 help diff --git a/crypto/Makefile b/crypto/Makefile index 4c99e5d376f6..7de29bf843e9 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -48,6 +48,7 @@ rsa_generic-y += rsaprivkey.asn1.o rsa_generic-y += rsa.o rsa_generic-y += rsa_helper.o rsa_generic-y += rsa-pkcs1pad.o +rsa_generic-y += rsassa-pkcs1.o obj-$(CONFIG_CRYPTO_RSA) += rsa_generic.o $(obj)/ecdsasignature.asn1.o: $(obj)/ecdsasignature.asn1.c $(obj)/ecdsasignature.asn1.h diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c index 422940a6706a..3fb27ecd65f6 100644 --- a/crypto/asymmetric_keys/public_key.c +++ b/crypto/asymmetric_keys/public_key.c @@ -83,13 +83,19 @@ software_key_determine_akcipher(const struct public_key *pkey, if (strcmp(encoding, "pkcs1") == 0) { *sig = op == kernel_pkey_sign || op == kernel_pkey_verify; - if (!hash_algo) { + if (!*sig) { + /* + * For encrypt/decrypt, hash_algo is not used + * but allowed to be set for historic reasons. + */ n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME, "pkcs1pad(%s)", pkey->pkey_algo); } else { + if (!hash_algo) + return -EINVAL; n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME, - "pkcs1pad(%s,%s)", + "pkcs1(%s,%s)", pkey->pkey_algo, hash_algo); } return n >= CRYPTO_MAX_ALG_NAME ? -EINVAL : 0; diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c index 3c5fe8c93938..50bdb18e7b48 100644 --- a/crypto/rsa-pkcs1pad.c +++ b/crypto/rsa-pkcs1pad.c @@ -16,101 +16,6 @@ #include #include -/* - * Hash algorithm OIDs plus ASN.1 DER wrappings [RFC4880 sec 5.2.2]. - */ -static const u8 rsa_digest_info_md5[] = { - 0x30, 0x20, 0x30, 0x0c, 0x06, 0x08, - 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x02, 0x05, /* OID */ - 0x05, 0x00, 0x04, 0x10 -}; - -static const u8 rsa_digest_info_sha1[] = { - 0x30, 0x21, 0x30, 0x09, 0x06, 0x05, - 0x2b, 0x0e, 0x03, 0x02, 0x1a, - 0x05, 0x00, 0x04, 0x14 -}; - -static const u8 rsa_digest_info_rmd160[] = { - 0x30, 0x21, 0x30, 0x09, 0x06, 0x05, - 0x2b, 0x24, 0x03, 0x02, 0x01, - 0x05, 0x00, 0x04, 0x14 -}; - -static const u8 rsa_digest_info_sha224[] = { - 0x30, 0x2d, 0x30, 0x0d, 0x06, 0x09, - 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x04, - 0x05, 0x00, 0x04, 0x1c -}; - -static const u8 rsa_digest_info_sha256[] = { - 0x30, 0x31, 0x30, 0x0d, 0x06, 0x09, - 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x01, - 0x05, 0x00, 0x04, 0x20 -}; - -static const u8 rsa_digest_info_sha384[] = { - 0x30, 0x41, 0x30, 0x0d, 0x06, 0x09, - 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x02, - 0x05, 0x00, 0x04, 0x30 -}; - -static const u8 rsa_digest_info_sha512[] = { - 0x30, 0x51, 0x30, 0x0d, 0x06, 0x09, - 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x03, - 0x05, 0x00, 0x04, 0x40 -}; - -static const u8 rsa_digest_info_sha3_256[] = { - 0x30, 0x31, 0x30, 0x0d, 0x06, 0x09, - 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x08, - 0x05, 0x00, 0x04, 0x20 -}; - -static const u8 rsa_digest_info_sha3_384[] = { - 0x30, 0x41, 0x30, 0x0d, 0x06, 0x09, - 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x09, - 0x05, 0x00, 0x04, 0x30 -}; - -static const u8 rsa_digest_info_sha3_512[] = { - 0x30, 0x51, 0x30, 0x0d, 0x06, 0x09, - 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x0A, - 0x05, 0x00, 0x04, 0x40 -}; - -static const struct rsa_asn1_template { - const char *name; - const u8 *data; - size_t size; -} rsa_asn1_templates[] = { -#define _(X) { #X, rsa_digest_info_##X, sizeof(rsa_digest_info_##X) } - _(md5), - _(sha1), - _(rmd160), - _(sha256), - _(sha384), - _(sha512), - _(sha224), -#undef _ -#define _(X) { "sha3-" #X, rsa_digest_info_sha3_##X, sizeof(rsa_digest_info_sha3_##X) } - _(256), - _(384), - _(512), -#undef _ - { NULL } -}; - -static const struct rsa_asn1_template *rsa_lookup_asn1(const char *name) -{ - const struct rsa_asn1_template *p; - - for (p = rsa_asn1_templates; p->name; p++) - if (strcmp(name, p->name) == 0) - return p; - return NULL; -} - struct pkcs1pad_ctx { struct crypto_akcipher *child; unsigned int key_size; @@ -118,7 +23,6 @@ struct pkcs1pad_ctx { struct pkcs1pad_inst_ctx { struct crypto_akcipher_spawn spawn; - const struct rsa_asn1_template *digest_info; }; struct pkcs1pad_request { @@ -148,9 +52,9 @@ static unsigned int pkcs1pad_get_max_size(struct crypto_akcipher *tfm) struct pkcs1pad_ctx *ctx = akcipher_tfm_ctx(tfm); /* - * The maximum destination buffer size for the encrypt/sign operations + * The maximum destination buffer size for the encrypt operation * will be the same as for RSA, even though it's smaller for - * decrypt/verify. + * decrypt. */ return ctx->key_size; @@ -168,7 +72,7 @@ static void pkcs1pad_sg_set_buf(struct scatterlist *sg, void *buf, size_t len, sg_chain(sg, nsegs, next); } -static int pkcs1pad_encrypt_sign_complete(struct akcipher_request *req, int err) +static int pkcs1pad_encrypt_complete(struct akcipher_request *req, int err) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); struct pkcs1pad_ctx *ctx = akcipher_tfm_ctx(tfm); @@ -207,14 +111,14 @@ out: return err; } -static void pkcs1pad_encrypt_sign_complete_cb(void *data, int err) +static void pkcs1pad_encrypt_complete_cb(void *data, int err) { struct akcipher_request *req = data; if (err == -EINPROGRESS) goto out; - err = pkcs1pad_encrypt_sign_complete(req, err); + err = pkcs1pad_encrypt_complete(req, err); out: akcipher_request_complete(req, err); @@ -255,7 +159,7 @@ static int pkcs1pad_encrypt(struct akcipher_request *req) akcipher_request_set_tfm(&req_ctx->child_req, ctx->child); akcipher_request_set_callback(&req_ctx->child_req, req->base.flags, - pkcs1pad_encrypt_sign_complete_cb, req); + pkcs1pad_encrypt_complete_cb, req); /* Reuse output buffer */ akcipher_request_set_crypt(&req_ctx->child_req, req_ctx->in_sg, @@ -263,7 +167,7 @@ static int pkcs1pad_encrypt(struct akcipher_request *req) err = crypto_akcipher_encrypt(&req_ctx->child_req); if (err != -EINPROGRESS && err != -EBUSY) - return pkcs1pad_encrypt_sign_complete(req, err); + return pkcs1pad_encrypt_complete(req, err); return err; } @@ -368,195 +272,6 @@ static int pkcs1pad_decrypt(struct akcipher_request *req) return err; } -static int pkcs1pad_sign(struct akcipher_request *req) -{ - struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct pkcs1pad_ctx *ctx = akcipher_tfm_ctx(tfm); - struct pkcs1pad_request *req_ctx = akcipher_request_ctx(req); - struct akcipher_instance *inst = akcipher_alg_instance(tfm); - struct pkcs1pad_inst_ctx *ictx = akcipher_instance_ctx(inst); - const struct rsa_asn1_template *digest_info = ictx->digest_info; - int err; - unsigned int ps_end, digest_info_size = 0; - - if (!ctx->key_size) - return -EINVAL; - - if (digest_info) - digest_info_size = digest_info->size; - - if (req->src_len + digest_info_size > ctx->key_size - 11) - return -EOVERFLOW; - - if (req->dst_len < ctx->key_size) { - req->dst_len = ctx->key_size; - return -EOVERFLOW; - } - - req_ctx->in_buf = kmalloc(ctx->key_size - 1 - req->src_len, - GFP_KERNEL); - if (!req_ctx->in_buf) - return -ENOMEM; - - ps_end = ctx->key_size - digest_info_size - req->src_len - 2; - req_ctx->in_buf[0] = 0x01; - memset(req_ctx->in_buf + 1, 0xff, ps_end - 1); - req_ctx->in_buf[ps_end] = 0x00; - - if (digest_info) - memcpy(req_ctx->in_buf + ps_end + 1, digest_info->data, - digest_info->size); - - pkcs1pad_sg_set_buf(req_ctx->in_sg, req_ctx->in_buf, - ctx->key_size - 1 - req->src_len, req->src); - - akcipher_request_set_tfm(&req_ctx->child_req, ctx->child); - akcipher_request_set_callback(&req_ctx->child_req, req->base.flags, - pkcs1pad_encrypt_sign_complete_cb, req); - - /* Reuse output buffer */ - akcipher_request_set_crypt(&req_ctx->child_req, req_ctx->in_sg, - req->dst, ctx->key_size - 1, req->dst_len); - - err = crypto_akcipher_decrypt(&req_ctx->child_req); - if (err != -EINPROGRESS && err != -EBUSY) - return pkcs1pad_encrypt_sign_complete(req, err); - - return err; -} - -static int pkcs1pad_verify_complete(struct akcipher_request *req, int err) -{ - struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct pkcs1pad_ctx *ctx = akcipher_tfm_ctx(tfm); - struct pkcs1pad_request *req_ctx = akcipher_request_ctx(req); - struct akcipher_instance *inst = akcipher_alg_instance(tfm); - struct pkcs1pad_inst_ctx *ictx = akcipher_instance_ctx(inst); - const struct rsa_asn1_template *digest_info = ictx->digest_info; - const unsigned int sig_size = req->src_len; - const unsigned int digest_size = req->dst_len; - unsigned int dst_len; - unsigned int pos; - u8 *out_buf; - - if (err) - goto done; - - err = -EINVAL; - dst_len = req_ctx->child_req.dst_len; - if (dst_len < ctx->key_size - 1) - goto done; - - out_buf = req_ctx->out_buf; - if (dst_len == ctx->key_size) { - if (out_buf[0] != 0x00) - /* Decrypted value had no leading 0 byte */ - goto done; - - dst_len--; - out_buf++; - } - - err = -EBADMSG; - if (out_buf[0] != 0x01) - goto done; - - for (pos = 1; pos < dst_len; pos++) - if (out_buf[pos] != 0xff) - break; - - if (pos < 9 || pos == dst_len || out_buf[pos] != 0x00) - goto done; - pos++; - - if (digest_info) { - if (digest_info->size > dst_len - pos) - goto done; - if (crypto_memneq(out_buf + pos, digest_info->data, - digest_info->size)) - goto done; - - pos += digest_info->size; - } - - err = 0; - - if (digest_size != dst_len - pos) { - err = -EKEYREJECTED; - req->dst_len = dst_len - pos; - goto done; - } - /* Extract appended digest. */ - sg_pcopy_to_buffer(req->src, - sg_nents_for_len(req->src, sig_size + digest_size), - req_ctx->out_buf + ctx->key_size, - digest_size, sig_size); - /* Do the actual verification step. */ - if (memcmp(req_ctx->out_buf + ctx->key_size, out_buf + pos, - digest_size) != 0) - err = -EKEYREJECTED; -done: - kfree_sensitive(req_ctx->out_buf); - - return err; -} - -static void pkcs1pad_verify_complete_cb(void *data, int err) -{ - struct akcipher_request *req = data; - - if (err == -EINPROGRESS) - goto out; - - err = pkcs1pad_verify_complete(req, err); - -out: - akcipher_request_complete(req, err); -} - -/* - * The verify operation is here for completeness similar to the verification - * defined in RFC2313 section 10.2 except that block type 0 is not accepted, - * as in RFC2437. RFC2437 section 9.2 doesn't define any operation to - * retrieve the DigestInfo from a signature, instead the user is expected - * to call the sign operation to generate the expected signature and compare - * signatures instead of the message-digests. - */ -static int pkcs1pad_verify(struct akcipher_request *req) -{ - struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct pkcs1pad_ctx *ctx = akcipher_tfm_ctx(tfm); - struct pkcs1pad_request *req_ctx = akcipher_request_ctx(req); - const unsigned int sig_size = req->src_len; - const unsigned int digest_size = req->dst_len; - int err; - - if (WARN_ON(req->dst) || WARN_ON(!digest_size) || - !ctx->key_size || sig_size != ctx->key_size) - return -EINVAL; - - req_ctx->out_buf = kmalloc(ctx->key_size + digest_size, GFP_KERNEL); - if (!req_ctx->out_buf) - return -ENOMEM; - - pkcs1pad_sg_set_buf(req_ctx->out_sg, req_ctx->out_buf, - ctx->key_size, NULL); - - akcipher_request_set_tfm(&req_ctx->child_req, ctx->child); - akcipher_request_set_callback(&req_ctx->child_req, req->base.flags, - pkcs1pad_verify_complete_cb, req); - - /* Reuse input buffer, output to a new buffer */ - akcipher_request_set_crypt(&req_ctx->child_req, req->src, - req_ctx->out_sg, sig_size, ctx->key_size); - - err = crypto_akcipher_encrypt(&req_ctx->child_req); - if (err != -EINPROGRESS && err != -EBUSY) - return pkcs1pad_verify_complete(req, err); - - return err; -} - static int pkcs1pad_init_tfm(struct crypto_akcipher *tfm) { struct akcipher_instance *inst = akcipher_alg_instance(tfm); @@ -598,7 +313,6 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb) struct akcipher_instance *inst; struct pkcs1pad_inst_ctx *ctx; struct akcipher_alg *rsa_alg; - const char *hash_name; int err; err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_AKCIPHER, &mask); @@ -624,36 +338,15 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb) } err = -ENAMETOOLONG; - hash_name = crypto_attr_alg_name(tb[2]); - if (IS_ERR(hash_name)) { - if (snprintf(inst->alg.base.cra_name, - CRYPTO_MAX_ALG_NAME, "pkcs1pad(%s)", - rsa_alg->base.cra_name) >= CRYPTO_MAX_ALG_NAME) - goto err_free_inst; - - if (snprintf(inst->alg.base.cra_driver_name, - CRYPTO_MAX_ALG_NAME, "pkcs1pad(%s)", - rsa_alg->base.cra_driver_name) >= - CRYPTO_MAX_ALG_NAME) - goto err_free_inst; - } else { - ctx->digest_info = rsa_lookup_asn1(hash_name); - if (!ctx->digest_info) { - err = -EINVAL; - goto err_free_inst; - } - - if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, - "pkcs1pad(%s,%s)", rsa_alg->base.cra_name, - hash_name) >= CRYPTO_MAX_ALG_NAME) - goto err_free_inst; - - if (snprintf(inst->alg.base.cra_driver_name, - CRYPTO_MAX_ALG_NAME, "pkcs1pad(%s,%s)", - rsa_alg->base.cra_driver_name, - hash_name) >= CRYPTO_MAX_ALG_NAME) - goto err_free_inst; - } + if (snprintf(inst->alg.base.cra_name, + CRYPTO_MAX_ALG_NAME, "pkcs1pad(%s)", + rsa_alg->base.cra_name) >= CRYPTO_MAX_ALG_NAME) + goto err_free_inst; + + if (snprintf(inst->alg.base.cra_driver_name, + CRYPTO_MAX_ALG_NAME, "pkcs1pad(%s)", + rsa_alg->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME) + goto err_free_inst; inst->alg.base.cra_priority = rsa_alg->base.cra_priority; inst->alg.base.cra_ctxsize = sizeof(struct pkcs1pad_ctx); @@ -663,8 +356,6 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb) inst->alg.encrypt = pkcs1pad_encrypt; inst->alg.decrypt = pkcs1pad_decrypt; - inst->alg.sign = pkcs1pad_sign; - inst->alg.verify = pkcs1pad_verify; inst->alg.set_pub_key = pkcs1pad_set_pub_key; inst->alg.set_priv_key = pkcs1pad_set_priv_key; inst->alg.max_size = pkcs1pad_get_max_size; diff --git a/crypto/rsa.c b/crypto/rsa.c index 78b28d14ced3..b7d21529c552 100644 --- a/crypto/rsa.c +++ b/crypto/rsa.c @@ -407,16 +407,25 @@ static int __init rsa_init(void) return err; err = crypto_register_template(&rsa_pkcs1pad_tmpl); - if (err) { - crypto_unregister_akcipher(&rsa); - return err; - } + if (err) + goto err_unregister_rsa; + + err = crypto_register_template(&rsassa_pkcs1_tmpl); + if (err) + goto err_unregister_rsa_pkcs1pad; return 0; + +err_unregister_rsa_pkcs1pad: + crypto_unregister_template(&rsa_pkcs1pad_tmpl); +err_unregister_rsa: + crypto_unregister_akcipher(&rsa); + return err; } static void __exit rsa_exit(void) { + crypto_unregister_template(&rsassa_pkcs1_tmpl); crypto_unregister_template(&rsa_pkcs1pad_tmpl); crypto_unregister_akcipher(&rsa); } diff --git a/crypto/rsassa-pkcs1.c b/crypto/rsassa-pkcs1.c new file mode 100644 index 000000000000..779c080fc013 --- /dev/null +++ b/crypto/rsassa-pkcs1.c @@ -0,0 +1,422 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RSA Signature Scheme with Appendix - PKCS #1 v1.5 (RFC 8017 sec 8.2) + * + * https://www.rfc-editor.org/rfc/rfc8017#section-8.2 + * + * Copyright (c) 2015 - 2024 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Full Hash Prefix for EMSA-PKCS1-v1_5 encoding method (RFC 9580 table 24) + * + * RSA keys are usually much larger than the hash of the message to be signed. + * The hash is therefore prepended by the Full Hash Prefix and a 0xff padding. + * The Full Hash Prefix is an ASN.1 SEQUENCE containing the hash algorithm OID. + * + * https://www.rfc-editor.org/rfc/rfc9580#table-24 + */ + +static const u8 hash_prefix_md5[] = { + 0x30, 0x20, 0x30, 0x0c, 0x06, 0x08, /* SEQUENCE (SEQUENCE (OID */ + 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x02, 0x05, /* , */ + 0x05, 0x00, 0x04, 0x10 /* NULL), OCTET STRING ) */ +}; + +static const u8 hash_prefix_sha1[] = { + 0x30, 0x21, 0x30, 0x09, 0x06, 0x05, + 0x2b, 0x0e, 0x03, 0x02, 0x1a, + 0x05, 0x00, 0x04, 0x14 +}; + +static const u8 hash_prefix_rmd160[] = { + 0x30, 0x21, 0x30, 0x09, 0x06, 0x05, + 0x2b, 0x24, 0x03, 0x02, 0x01, + 0x05, 0x00, 0x04, 0x14 +}; + +static const u8 hash_prefix_sha224[] = { + 0x30, 0x2d, 0x30, 0x0d, 0x06, 0x09, + 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x04, + 0x05, 0x00, 0x04, 0x1c +}; + +static const u8 hash_prefix_sha256[] = { + 0x30, 0x31, 0x30, 0x0d, 0x06, 0x09, + 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x01, + 0x05, 0x00, 0x04, 0x20 +}; + +static const u8 hash_prefix_sha384[] = { + 0x30, 0x41, 0x30, 0x0d, 0x06, 0x09, + 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x02, + 0x05, 0x00, 0x04, 0x30 +}; + +static const u8 hash_prefix_sha512[] = { + 0x30, 0x51, 0x30, 0x0d, 0x06, 0x09, + 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x03, + 0x05, 0x00, 0x04, 0x40 +}; + +static const u8 hash_prefix_sha3_256[] = { + 0x30, 0x31, 0x30, 0x0d, 0x06, 0x09, + 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x08, + 0x05, 0x00, 0x04, 0x20 +}; + +static const u8 hash_prefix_sha3_384[] = { + 0x30, 0x41, 0x30, 0x0d, 0x06, 0x09, + 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x09, + 0x05, 0x00, 0x04, 0x30 +}; + +static const u8 hash_prefix_sha3_512[] = { + 0x30, 0x51, 0x30, 0x0d, 0x06, 0x09, + 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x0a, + 0x05, 0x00, 0x04, 0x40 +}; + +static const struct hash_prefix { + const char *name; + const u8 *data; + size_t size; +} hash_prefixes[] = { +#define _(X) { #X, hash_prefix_##X, sizeof(hash_prefix_##X) } + _(md5), + _(sha1), + _(rmd160), + _(sha256), + _(sha384), + _(sha512), + _(sha224), +#undef _ +#define _(X) { "sha3-" #X, hash_prefix_sha3_##X, sizeof(hash_prefix_sha3_##X) } + _(256), + _(384), + _(512), +#undef _ + { NULL } +}; + +static const struct hash_prefix *rsassa_pkcs1_find_hash_prefix(const char *name) +{ + const struct hash_prefix *p; + + for (p = hash_prefixes; p->name; p++) + if (strcmp(name, p->name) == 0) + return p; + return NULL; +} + +struct rsassa_pkcs1_ctx { + struct crypto_akcipher *child; + unsigned int key_size; +}; + +struct rsassa_pkcs1_inst_ctx { + struct crypto_akcipher_spawn spawn; + const struct hash_prefix *hash_prefix; +}; + +static int rsassa_pkcs1_sign(struct crypto_sig *tfm, + const void *src, unsigned int slen, + void *dst, unsigned int dlen) +{ + struct sig_instance *inst = sig_alg_instance(tfm); + struct rsassa_pkcs1_inst_ctx *ictx = sig_instance_ctx(inst); + const struct hash_prefix *hash_prefix = ictx->hash_prefix; + struct rsassa_pkcs1_ctx *ctx = crypto_sig_ctx(tfm); + unsigned int child_reqsize = crypto_akcipher_reqsize(ctx->child); + struct akcipher_request *child_req __free(kfree_sensitive) = NULL; + struct scatterlist in_sg[2], out_sg; + struct crypto_wait cwait; + unsigned int pad_len; + unsigned int ps_end; + unsigned int len; + u8 *in_buf; + int err; + + if (!ctx->key_size) + return -EINVAL; + + if (dlen < ctx->key_size) + return -EOVERFLOW; + + if (slen + hash_prefix->size > ctx->key_size - 11) + return -EOVERFLOW; + + child_req = kmalloc(sizeof(*child_req) + child_reqsize + + ctx->key_size - 1 - slen, GFP_KERNEL); + if (!child_req) + return -ENOMEM; + + /* RFC 8017 sec 8.2.1 step 1 - EMSA-PKCS1-v1_5 encoding generation */ + in_buf = (u8 *)(child_req + 1) + child_reqsize; + ps_end = ctx->key_size - hash_prefix->size - slen - 2; + in_buf[0] = 0x01; + memset(in_buf + 1, 0xff, ps_end - 1); + in_buf[ps_end] = 0x00; + memcpy(in_buf + ps_end + 1, hash_prefix->data, hash_prefix->size); + + /* RFC 8017 sec 8.2.1 step 2 - RSA signature */ + crypto_init_wait(&cwait); + sg_init_table(in_sg, 2); + sg_set_buf(&in_sg[0], in_buf, ctx->key_size - 1 - slen); + sg_set_buf(&in_sg[1], src, slen); + sg_init_one(&out_sg, dst, dlen); + akcipher_request_set_tfm(child_req, ctx->child); + akcipher_request_set_crypt(child_req, in_sg, &out_sg, + ctx->key_size - 1, dlen); + akcipher_request_set_callback(child_req, CRYPTO_TFM_REQ_MAY_SLEEP, + crypto_req_done, &cwait); + + err = crypto_akcipher_decrypt(child_req); + err = crypto_wait_req(err, &cwait); + if (err) + return err; + + len = child_req->dst_len; + pad_len = ctx->key_size - len; + + /* Four billion to one */ + if (unlikely(pad_len)) { + memmove(dst + pad_len, dst, len); + memset(dst, 0, pad_len); + } + + return 0; +} + +static int rsassa_pkcs1_verify(struct crypto_sig *tfm, + const void *src, unsigned int slen, + const void *digest, unsigned int dlen) +{ + struct sig_instance *inst = sig_alg_instance(tfm); + struct rsassa_pkcs1_inst_ctx *ictx = sig_instance_ctx(inst); + const struct hash_prefix *hash_prefix = ictx->hash_prefix; + struct rsassa_pkcs1_ctx *ctx = crypto_sig_ctx(tfm); + unsigned int child_reqsize = crypto_akcipher_reqsize(ctx->child); + struct akcipher_request *child_req __free(kfree_sensitive) = NULL; + struct scatterlist in_sg, out_sg; + struct crypto_wait cwait; + unsigned int dst_len; + unsigned int pos; + u8 *out_buf; + int err; + + /* RFC 8017 sec 8.2.2 step 1 - length checking */ + if (!ctx->key_size || + slen != ctx->key_size || + !dlen) + return -EINVAL; + + /* RFC 8017 sec 8.2.2 step 2 - RSA verification */ + child_req = kmalloc(sizeof(*child_req) + child_reqsize + ctx->key_size, + GFP_KERNEL); + if (!child_req) + return -ENOMEM; + + out_buf = (u8 *)(child_req + 1) + child_reqsize; + + crypto_init_wait(&cwait); + sg_init_one(&in_sg, src, slen); + sg_init_one(&out_sg, out_buf, ctx->key_size); + akcipher_request_set_tfm(child_req, ctx->child); + akcipher_request_set_crypt(child_req, &in_sg, &out_sg, + slen, ctx->key_size); + akcipher_request_set_callback(child_req, CRYPTO_TFM_REQ_MAY_SLEEP, + crypto_req_done, &cwait); + + err = crypto_akcipher_encrypt(child_req); + err = crypto_wait_req(err, &cwait); + if (err) + return err; + + /* RFC 8017 sec 8.2.2 step 3 - EMSA-PKCS1-v1_5 encoding verification */ + dst_len = child_req->dst_len; + if (dst_len < ctx->key_size - 1) + return -EINVAL; + + if (dst_len == ctx->key_size) { + if (out_buf[0] != 0x00) + /* Encrypted value had no leading 0 byte */ + return -EINVAL; + + dst_len--; + out_buf++; + } + + if (out_buf[0] != 0x01) + return -EBADMSG; + + for (pos = 1; pos < dst_len; pos++) + if (out_buf[pos] != 0xff) + break; + + if (pos < 9 || pos == dst_len || out_buf[pos] != 0x00) + return -EBADMSG; + pos++; + + if (hash_prefix->size > dst_len - pos) + return -EBADMSG; + if (crypto_memneq(out_buf + pos, hash_prefix->data, hash_prefix->size)) + return -EBADMSG; + pos += hash_prefix->size; + + /* RFC 8017 sec 8.2.2 step 4 - comparison of digest with out_buf */ + if (dlen != dst_len - pos) + return -EKEYREJECTED; + if (memcmp(digest, out_buf + pos, dlen) != 0) + return -EKEYREJECTED; + + return 0; +} + +static unsigned int rsassa_pkcs1_max_size(struct crypto_sig *tfm) +{ + struct rsassa_pkcs1_ctx *ctx = crypto_sig_ctx(tfm); + + return ctx->key_size; +} + +static int rsassa_pkcs1_set_pub_key(struct crypto_sig *tfm, + const void *key, unsigned int keylen) +{ + struct rsassa_pkcs1_ctx *ctx = crypto_sig_ctx(tfm); + + return rsa_set_key(ctx->child, &ctx->key_size, RSA_PUB, key, keylen); +} + +static int rsassa_pkcs1_set_priv_key(struct crypto_sig *tfm, + const void *key, unsigned int keylen) +{ + struct rsassa_pkcs1_ctx *ctx = crypto_sig_ctx(tfm); + + return rsa_set_key(ctx->child, &ctx->key_size, RSA_PRIV, key, keylen); +} + +static int rsassa_pkcs1_init_tfm(struct crypto_sig *tfm) +{ + struct sig_instance *inst = sig_alg_instance(tfm); + struct rsassa_pkcs1_inst_ctx *ictx = sig_instance_ctx(inst); + struct rsassa_pkcs1_ctx *ctx = crypto_sig_ctx(tfm); + struct crypto_akcipher *child_tfm; + + child_tfm = crypto_spawn_akcipher(&ictx->spawn); + if (IS_ERR(child_tfm)) + return PTR_ERR(child_tfm); + + ctx->child = child_tfm; + + return 0; +} + +static void rsassa_pkcs1_exit_tfm(struct crypto_sig *tfm) +{ + struct rsassa_pkcs1_ctx *ctx = crypto_sig_ctx(tfm); + + crypto_free_akcipher(ctx->child); +} + +static void rsassa_pkcs1_free(struct sig_instance *inst) +{ + struct rsassa_pkcs1_inst_ctx *ctx = sig_instance_ctx(inst); + struct crypto_akcipher_spawn *spawn = &ctx->spawn; + + crypto_drop_akcipher(spawn); + kfree(inst); +} + +static int rsassa_pkcs1_create(struct crypto_template *tmpl, struct rtattr **tb) +{ + struct rsassa_pkcs1_inst_ctx *ctx; + struct akcipher_alg *rsa_alg; + struct sig_instance *inst; + const char *hash_name; + u32 mask; + int err; + + err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SIG, &mask); + if (err) + return err; + + inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); + if (!inst) + return -ENOMEM; + + ctx = sig_instance_ctx(inst); + + err = crypto_grab_akcipher(&ctx->spawn, sig_crypto_instance(inst), + crypto_attr_alg_name(tb[1]), 0, mask); + if (err) + goto err_free_inst; + + rsa_alg = crypto_spawn_akcipher_alg(&ctx->spawn); + + if (strcmp(rsa_alg->base.cra_name, "rsa") != 0) { + err = -EINVAL; + goto err_free_inst; + } + + hash_name = crypto_attr_alg_name(tb[2]); + if (IS_ERR(hash_name)) { + err = PTR_ERR(hash_name); + goto err_free_inst; + } + + ctx->hash_prefix = rsassa_pkcs1_find_hash_prefix(hash_name); + if (!ctx->hash_prefix) { + err = -EINVAL; + goto err_free_inst; + } + + err = -ENAMETOOLONG; + if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, + "pkcs1(%s,%s)", rsa_alg->base.cra_name, + hash_name) >= CRYPTO_MAX_ALG_NAME) + goto err_free_inst; + + if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, + "pkcs1(%s,%s)", rsa_alg->base.cra_driver_name, + hash_name) >= CRYPTO_MAX_ALG_NAME) + goto err_free_inst; + + inst->alg.base.cra_priority = rsa_alg->base.cra_priority; + inst->alg.base.cra_ctxsize = sizeof(struct rsassa_pkcs1_ctx); + + inst->alg.init = rsassa_pkcs1_init_tfm; + inst->alg.exit = rsassa_pkcs1_exit_tfm; + + inst->alg.sign = rsassa_pkcs1_sign; + inst->alg.verify = rsassa_pkcs1_verify; + inst->alg.max_size = rsassa_pkcs1_max_size; + inst->alg.set_pub_key = rsassa_pkcs1_set_pub_key; + inst->alg.set_priv_key = rsassa_pkcs1_set_priv_key; + + inst->free = rsassa_pkcs1_free; + + err = sig_register_instance(tmpl, inst); + if (err) { +err_free_inst: + rsassa_pkcs1_free(inst); + } + return err; +} + +struct crypto_template rsassa_pkcs1_tmpl = { + .name = "pkcs1", + .create = rsassa_pkcs1_create, + .module = THIS_MODULE, +}; + +MODULE_ALIAS_CRYPTO("pkcs1"); diff --git a/crypto/testmgr.c b/crypto/testmgr.c index ed971d857057..76401b3b634f 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -5569,34 +5569,38 @@ static const struct alg_test_desc alg_test_descs[] = { .cipher = __VECS(fcrypt_pcbc_tv_template) } }, { - .alg = "pkcs1pad(rsa,sha224)", + .alg = "pkcs1(rsa,sha224)", .test = alg_test_null, .fips_allowed = 1, }, { - .alg = "pkcs1pad(rsa,sha256)", - .test = alg_test_akcipher, + .alg = "pkcs1(rsa,sha256)", + .test = alg_test_sig, .fips_allowed = 1, .suite = { - .akcipher = __VECS(pkcs1pad_rsa_tv_template) + .sig = __VECS(pkcs1_rsa_tv_template) } }, { - .alg = "pkcs1pad(rsa,sha3-256)", + .alg = "pkcs1(rsa,sha3-256)", + .test = alg_test_null, + .fips_allowed = 1, + }, { + .alg = "pkcs1(rsa,sha3-384)", .test = alg_test_null, .fips_allowed = 1, }, { - .alg = "pkcs1pad(rsa,sha3-384)", + .alg = "pkcs1(rsa,sha3-512)", .test = alg_test_null, .fips_allowed = 1, }, { - .alg = "pkcs1pad(rsa,sha3-512)", + .alg = "pkcs1(rsa,sha384)", .test = alg_test_null, .fips_allowed = 1, }, { - .alg = "pkcs1pad(rsa,sha384)", + .alg = "pkcs1(rsa,sha512)", .test = alg_test_null, .fips_allowed = 1, }, { - .alg = "pkcs1pad(rsa,sha512)", + .alg = "pkcs1pad(rsa)", .test = alg_test_null, .fips_allowed = 1, }, { diff --git a/crypto/testmgr.h b/crypto/testmgr.h index fd4823c26d93..d29d03fec852 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -1268,7 +1268,7 @@ static const struct sig_testvec ecrdsa_tv_template[] = { /* * PKCS#1 RSA test vectors. Obtained from CAVS testing. */ -static const struct akcipher_testvec pkcs1pad_rsa_tv_template[] = { +static const struct sig_testvec pkcs1_rsa_tv_template[] = { { .key = "\x30\x82\x04\xa5\x02\x01\x00\x02\x82\x01\x01\x00\xd7\x1e\x77\x82" @@ -1380,7 +1380,6 @@ static const struct akcipher_testvec pkcs1pad_rsa_tv_template[] = { "\xda\x62\x8d\xe1\x2a\x71\x91\x43\x40\x61\x3c\x5a\xbe\x86\xfc\x5b" "\xe6\xf9\xa9\x16\x31\x1f\xaf\x25\x6d\xc2\x4a\x23\x6e\x63\x02\xa2", .c_size = 256, - .siggen_sigver_test = true, } }; diff --git a/include/crypto/internal/rsa.h b/include/crypto/internal/rsa.h index 754f687134df..071a1951b992 100644 --- a/include/crypto/internal/rsa.h +++ b/include/crypto/internal/rsa.h @@ -82,4 +82,5 @@ static inline int rsa_set_key(struct crypto_akcipher *child, } extern struct crypto_template rsa_pkcs1pad_tmpl; +extern struct crypto_template rsassa_pkcs1_tmpl; #endif diff --git a/include/linux/slab.h b/include/linux/slab.h index b35e2db7eb0e..0268ea7abf8b 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -448,6 +448,7 @@ void kfree_sensitive(const void *objp); size_t __ksize(const void *objp); DEFINE_FREE(kfree, void *, if (!IS_ERR_OR_NULL(_T)) kfree(_T)) +DEFINE_FREE(kfree_sensitive, void *, if (_T) kfree_sensitive(_T)) /** * ksize - Report actual allocation size of associated object diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index 06132cf47016..34ea02addb70 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -1114,7 +1114,7 @@ EXPORT_SYMBOL_GPL(ima_measure_critical_data); #ifdef CONFIG_INTEGRITY_ASYMMETRIC_KEYS /** - * ima_kernel_module_request - Prevent crypto-pkcs1pad(rsa,*) requests + * ima_kernel_module_request - Prevent crypto-pkcs1(rsa,*) requests * @kmod_name: kernel module name * * Avoid a verification loop where verifying the signature of the modprobe @@ -1128,7 +1128,7 @@ EXPORT_SYMBOL_GPL(ima_measure_critical_data); * algorithm on the fly, but crypto_larval_lookup() will try to use alg_name * in order to load a kernel module with same name. * - * Since we don't have any real "crypto-pkcs1pad(rsa,*)" kernel modules, + * Since we don't have any real "crypto-pkcs1(rsa,*)" kernel modules, * we are safe to fail such module request from crypto_larval_lookup(), and * avoid the verification loop. * @@ -1136,7 +1136,7 @@ EXPORT_SYMBOL_GPL(ima_measure_critical_data); */ static int ima_kernel_module_request(char *kmod_name) { - if (strncmp(kmod_name, "crypto-pkcs1pad(rsa,", 20) == 0) + if (strncmp(kmod_name, "crypto-pkcs1(rsa,", 17) == 0) return -EINVAL; return 0; -- cgit v1.2.3 From 5b553e06b3215fa97d222ebddc2bc964f1824c5b Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 10 Sep 2024 16:30:19 +0200 Subject: crypto: virtio - Drop sign/verify operations The virtio crypto driver exposes akcipher sign/verify operations in a user space ABI. This blocks removal of sign/verify from akcipher_alg. Herbert opines: "I would say that this is something that we can break. Breaking it is no different to running virtio on a host that does not support these algorithms. After all, a software implementation must always be present. I deliberately left akcipher out of crypto_user because the API is still in flux. We should not let virtio constrain ourselves." https://lore.kernel.org/all/ZtqoNAgcnXnrYhZZ@gondor.apana.org.au/ "I would remove virtio akcipher support in its entirety. This API was never meant to be exposed outside of the kernel." https://lore.kernel.org/all/Ztqql_gqgZiMW8zz@gondor.apana.org.au/ Drop sign/verify support from virtio crypto. There's no strong reason to also remove encrypt/decrypt support, so keep it. A key selling point of virtio crypto is to allow guest access to crypto accelerators on the host. So far the only akcipher algorithm supported by virtio crypto is RSA. Dropping sign/verify merely means that the PKCS#1 padding is now always generated or verified inside the guest, but the actual signature generation/verification (which is an RSA decrypt/encrypt operation) may still use an accelerator on the host. Generating or verifying the PKCS#1 padding is cheap, so a hardware accelerator won't be of much help there. Which begs the question whether virtio crypto support for sign/verify makes sense at all. It would make sense for the sign operation if the host has a security chip to store asymmetric private keys. But the kernel doesn't even have an asymmetric_key_subtype yet for hardware-based private keys. There's at least one rudimentary driver for such chips (atmel-ecc.c for ATECC508A), but it doesn't implement the sign operation. The kernel would first have to grow support for a hardware asymmetric_key_subtype and at least one driver implementing the sign operation before exposure to guests via virtio makes sense. Signed-off-by: Lukas Wunner Signed-off-by: Herbert Xu --- .../crypto/virtio/virtio_crypto_akcipher_algs.c | 65 +++++++--------------- include/uapi/linux/virtio_crypto.h | 1 + 2 files changed, 22 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c index cb92b7fa99c6..48fee07b7e51 100644 --- a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c +++ b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c @@ -83,23 +83,16 @@ static void virtio_crypto_dataq_akcipher_callback(struct virtio_crypto_request * case VIRTIO_CRYPTO_BADMSG: error = -EBADMSG; break; - - case VIRTIO_CRYPTO_KEY_REJECTED: - error = -EKEYREJECTED; - break; - default: error = -EIO; break; } akcipher_req = vc_akcipher_req->akcipher_req; - if (vc_akcipher_req->opcode != VIRTIO_CRYPTO_AKCIPHER_VERIFY) { - /* actuall length maybe less than dst buffer */ - akcipher_req->dst_len = len - sizeof(vc_req->status); - sg_copy_from_buffer(akcipher_req->dst, sg_nents(akcipher_req->dst), - vc_akcipher_req->dst_buf, akcipher_req->dst_len); - } + /* actual length maybe less than dst buffer */ + akcipher_req->dst_len = len - sizeof(vc_req->status); + sg_copy_from_buffer(akcipher_req->dst, sg_nents(akcipher_req->dst), + vc_akcipher_req->dst_buf, akcipher_req->dst_len); virtio_crypto_akcipher_finalize_req(vc_akcipher_req, akcipher_req, error); } @@ -230,36 +223,27 @@ static int __virtio_crypto_akcipher_do_req(struct virtio_crypto_akcipher_request int node = dev_to_node(&vcrypto->vdev->dev); unsigned long flags; int ret; - bool verify = vc_akcipher_req->opcode == VIRTIO_CRYPTO_AKCIPHER_VERIFY; - unsigned int src_len = verify ? req->src_len + req->dst_len : req->src_len; /* out header */ sg_init_one(&outhdr_sg, req_data, sizeof(*req_data)); sgs[num_out++] = &outhdr_sg; /* src data */ - src_buf = kcalloc_node(src_len, 1, GFP_KERNEL, node); + src_buf = kcalloc_node(req->src_len, 1, GFP_KERNEL, node); if (!src_buf) return -ENOMEM; - if (verify) { - /* for verify operation, both src and dst data work as OUT direction */ - sg_copy_to_buffer(req->src, sg_nents(req->src), src_buf, src_len); - sg_init_one(&srcdata_sg, src_buf, src_len); - sgs[num_out++] = &srcdata_sg; - } else { - sg_copy_to_buffer(req->src, sg_nents(req->src), src_buf, src_len); - sg_init_one(&srcdata_sg, src_buf, src_len); - sgs[num_out++] = &srcdata_sg; + sg_copy_to_buffer(req->src, sg_nents(req->src), src_buf, req->src_len); + sg_init_one(&srcdata_sg, src_buf, req->src_len); + sgs[num_out++] = &srcdata_sg; - /* dst data */ - dst_buf = kcalloc_node(req->dst_len, 1, GFP_KERNEL, node); - if (!dst_buf) - goto free_src; + /* dst data */ + dst_buf = kcalloc_node(req->dst_len, 1, GFP_KERNEL, node); + if (!dst_buf) + goto free_src; - sg_init_one(&dstdata_sg, dst_buf, req->dst_len); - sgs[num_out + num_in++] = &dstdata_sg; - } + sg_init_one(&dstdata_sg, dst_buf, req->dst_len); + sgs[num_out + num_in++] = &dstdata_sg; vc_akcipher_req->src_buf = src_buf; vc_akcipher_req->dst_buf = dst_buf; @@ -352,16 +336,6 @@ static int virtio_crypto_rsa_decrypt(struct akcipher_request *req) return virtio_crypto_rsa_req(req, VIRTIO_CRYPTO_AKCIPHER_DECRYPT); } -static int virtio_crypto_rsa_sign(struct akcipher_request *req) -{ - return virtio_crypto_rsa_req(req, VIRTIO_CRYPTO_AKCIPHER_SIGN); -} - -static int virtio_crypto_rsa_verify(struct akcipher_request *req) -{ - return virtio_crypto_rsa_req(req, VIRTIO_CRYPTO_AKCIPHER_VERIFY); -} - static int virtio_crypto_rsa_set_key(struct crypto_akcipher *tfm, const void *key, unsigned int keylen, @@ -524,16 +498,19 @@ static struct virtio_crypto_akcipher_algo virtio_crypto_akcipher_algs[] = { .algo.base = { .encrypt = virtio_crypto_rsa_encrypt, .decrypt = virtio_crypto_rsa_decrypt, - .sign = virtio_crypto_rsa_sign, - .verify = virtio_crypto_rsa_verify, + /* + * Must specify an arbitrary hash algorithm upon + * set_{pub,priv}_key (even though it's not used + * by encrypt/decrypt) because qemu checks for it. + */ .set_pub_key = virtio_crypto_p1pad_rsa_sha1_set_pub_key, .set_priv_key = virtio_crypto_p1pad_rsa_sha1_set_priv_key, .max_size = virtio_crypto_rsa_max_size, .init = virtio_crypto_rsa_init_tfm, .exit = virtio_crypto_rsa_exit_tfm, .base = { - .cra_name = "pkcs1pad(rsa,sha1)", - .cra_driver_name = "virtio-pkcs1-rsa-with-sha1", + .cra_name = "pkcs1pad(rsa)", + .cra_driver_name = "virtio-pkcs1-rsa", .cra_priority = 150, .cra_module = THIS_MODULE, .cra_ctxsize = sizeof(struct virtio_crypto_akcipher_ctx), diff --git a/include/uapi/linux/virtio_crypto.h b/include/uapi/linux/virtio_crypto.h index 71a54a6849ca..2fccb64c9d6b 100644 --- a/include/uapi/linux/virtio_crypto.h +++ b/include/uapi/linux/virtio_crypto.h @@ -329,6 +329,7 @@ struct virtio_crypto_op_header { VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x00) #define VIRTIO_CRYPTO_AKCIPHER_DECRYPT \ VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x01) + /* akcipher sign/verify opcodes are deprecated */ #define VIRTIO_CRYPTO_AKCIPHER_SIGN \ VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x02) #define VIRTIO_CRYPTO_AKCIPHER_VERIFY \ -- cgit v1.2.3 From 6b34562f0cfe81f1f207fc7c146c4ff4b31eb625 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 10 Sep 2024 16:30:21 +0200 Subject: crypto: akcipher - Drop sign/verify operations A sig_alg backend has just been introduced and all asymmetric sign/verify algorithms have been migrated to it. The sign/verify operations can thus be dropped from akcipher_alg. It is now purely for asymmetric encrypt/decrypt. Move struct crypto_akcipher_sync_data from internal.h to akcipher.c and unexport crypto_akcipher_sync_{prep,post}(): They're no longer used by sig.c but only locally in akcipher.c. In crypto_akcipher_sync_{prep,post}(), drop various NULL pointer checks for data->dst as they were only necessary for the verify operation. In the crypto_sig_*() API calls, remove the forks that were necessary while algorithms were converted from crypto_akcipher to crypto_sig one by one. In struct akcipher_testvec, remove the "params", "param_len" and "algo" elements as they were only needed for the ecrdsa verify operation. Remove corresponding dead code from test_akcipher_one() as well. Signed-off-by: Lukas Wunner Signed-off-by: Herbert Xu --- Documentation/crypto/api-akcipher.rst | 2 +- crypto/akcipher.c | 64 ++++++------------ crypto/internal.h | 19 ------ crypto/sig.c | 70 -------------------- crypto/testmgr.c | 119 ++++++++++------------------------ crypto/testmgr.h | 4 -- include/crypto/akcipher.h | 69 +++----------------- include/crypto/internal/akcipher.h | 4 +- 8 files changed, 66 insertions(+), 285 deletions(-) (limited to 'include') diff --git a/Documentation/crypto/api-akcipher.rst b/Documentation/crypto/api-akcipher.rst index 40aa8746e2a1..6f47cc70eca0 100644 --- a/Documentation/crypto/api-akcipher.rst +++ b/Documentation/crypto/api-akcipher.rst @@ -11,7 +11,7 @@ Asymmetric Cipher API :doc: Generic Public Key API .. kernel-doc:: include/crypto/akcipher.h - :functions: crypto_alloc_akcipher crypto_free_akcipher crypto_akcipher_set_pub_key crypto_akcipher_set_priv_key crypto_akcipher_maxsize crypto_akcipher_encrypt crypto_akcipher_decrypt crypto_akcipher_sign crypto_akcipher_verify + :functions: crypto_alloc_akcipher crypto_free_akcipher crypto_akcipher_set_pub_key crypto_akcipher_set_priv_key crypto_akcipher_maxsize crypto_akcipher_encrypt crypto_akcipher_decrypt Asymmetric Cipher Request Handle -------------------------------- diff --git a/crypto/akcipher.c b/crypto/akcipher.c index e0ff5f4dda6d..72c82d9aa077 100644 --- a/crypto/akcipher.c +++ b/crypto/akcipher.c @@ -20,6 +20,19 @@ #define CRYPTO_ALG_TYPE_AHASH_MASK 0x0000000e +struct crypto_akcipher_sync_data { + struct crypto_akcipher *tfm; + const void *src; + void *dst; + unsigned int slen; + unsigned int dlen; + + struct akcipher_request *req; + struct crypto_wait cwait; + struct scatterlist sg; + u8 *buf; +}; + static int __maybe_unused crypto_akcipher_report( struct sk_buff *skb, struct crypto_alg *alg) { @@ -126,10 +139,6 @@ int crypto_register_akcipher(struct akcipher_alg *alg) { struct crypto_alg *base = &alg->base; - if (!alg->sign) - alg->sign = akcipher_default_op; - if (!alg->verify) - alg->verify = akcipher_default_op; if (!alg->encrypt) alg->encrypt = akcipher_default_op; if (!alg->decrypt) @@ -158,7 +167,7 @@ int akcipher_register_instance(struct crypto_template *tmpl, } EXPORT_SYMBOL_GPL(akcipher_register_instance); -int crypto_akcipher_sync_prep(struct crypto_akcipher_sync_data *data) +static int crypto_akcipher_sync_prep(struct crypto_akcipher_sync_data *data) { unsigned int reqsize = crypto_akcipher_reqsize(data->tfm); struct akcipher_request *req; @@ -167,10 +176,7 @@ int crypto_akcipher_sync_prep(struct crypto_akcipher_sync_data *data) unsigned int len; u8 *buf; - if (data->dst) - mlen = max(data->slen, data->dlen); - else - mlen = data->slen + data->dlen; + mlen = max(data->slen, data->dlen); len = sizeof(*req) + reqsize + mlen; if (len < mlen) @@ -189,8 +195,7 @@ int crypto_akcipher_sync_prep(struct crypto_akcipher_sync_data *data) sg = &data->sg; sg_init_one(sg, buf, mlen); - akcipher_request_set_crypt(req, sg, data->dst ? sg : NULL, - data->slen, data->dlen); + akcipher_request_set_crypt(req, sg, sg, data->slen, data->dlen); crypto_init_wait(&data->cwait); akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, @@ -198,18 +203,16 @@ int crypto_akcipher_sync_prep(struct crypto_akcipher_sync_data *data) return 0; } -EXPORT_SYMBOL_GPL(crypto_akcipher_sync_prep); -int crypto_akcipher_sync_post(struct crypto_akcipher_sync_data *data, int err) +static int crypto_akcipher_sync_post(struct crypto_akcipher_sync_data *data, + int err) { err = crypto_wait_req(err, &data->cwait); - if (data->dst) - memcpy(data->dst, data->buf, data->dlen); + memcpy(data->dst, data->buf, data->dlen); data->dlen = data->req->dst_len; kfree_sensitive(data->req); return err; } -EXPORT_SYMBOL_GPL(crypto_akcipher_sync_post); int crypto_akcipher_sync_encrypt(struct crypto_akcipher *tfm, const void *src, unsigned int slen, @@ -248,34 +251,5 @@ int crypto_akcipher_sync_decrypt(struct crypto_akcipher *tfm, } EXPORT_SYMBOL_GPL(crypto_akcipher_sync_decrypt); -static void crypto_exit_akcipher_ops_sig(struct crypto_tfm *tfm) -{ - struct crypto_akcipher **ctx = crypto_tfm_ctx(tfm); - - crypto_free_akcipher(*ctx); -} - -int crypto_init_akcipher_ops_sig(struct crypto_tfm *tfm) -{ - struct crypto_akcipher **ctx = crypto_tfm_ctx(tfm); - struct crypto_alg *calg = tfm->__crt_alg; - struct crypto_akcipher *akcipher; - - if (!crypto_mod_get(calg)) - return -EAGAIN; - - akcipher = crypto_create_tfm(calg, &crypto_akcipher_type); - if (IS_ERR(akcipher)) { - crypto_mod_put(calg); - return PTR_ERR(akcipher); - } - - *ctx = akcipher; - tfm->exit = crypto_exit_akcipher_ops_sig; - - return 0; -} -EXPORT_SYMBOL_GPL(crypto_init_akcipher_ops_sig); - MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Generic public key cipher type"); diff --git a/crypto/internal.h b/crypto/internal.h index 711a6a5bfa2b..46b661be0f90 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -22,8 +22,6 @@ #include #include -struct akcipher_request; -struct crypto_akcipher; struct crypto_instance; struct crypto_template; @@ -35,19 +33,6 @@ struct crypto_larval { bool test_started; }; -struct crypto_akcipher_sync_data { - struct crypto_akcipher *tfm; - const void *src; - void *dst; - unsigned int slen; - unsigned int dlen; - - struct akcipher_request *req; - struct crypto_wait cwait; - struct scatterlist sg; - u8 *buf; -}; - enum { CRYPTOA_UNSPEC, CRYPTOA_ALG, @@ -129,10 +114,6 @@ void *crypto_create_tfm_node(struct crypto_alg *alg, void *crypto_clone_tfm(const struct crypto_type *frontend, struct crypto_tfm *otfm); -int crypto_akcipher_sync_prep(struct crypto_akcipher_sync_data *data); -int crypto_akcipher_sync_post(struct crypto_akcipher_sync_data *data, int err); -int crypto_init_akcipher_ops_sig(struct crypto_tfm *tfm); - static inline void *crypto_create_tfm(struct crypto_alg *alg, const struct crypto_type *frontend) { diff --git a/crypto/sig.c b/crypto/sig.c index 4f36ceb7a90b..1e6b0d677472 100644 --- a/crypto/sig.c +++ b/crypto/sig.c @@ -5,12 +5,10 @@ * Copyright (c) 2023 Herbert Xu */ -#include #include #include #include #include -#include #include #include #include @@ -19,8 +17,6 @@ #define CRYPTO_ALG_TYPE_SIG_MASK 0x0000000e -static const struct crypto_type crypto_sig_type; - static void crypto_sig_exit_tfm(struct crypto_tfm *tfm) { struct crypto_sig *sig = __crypto_sig_tfm(tfm); @@ -31,9 +27,6 @@ static void crypto_sig_exit_tfm(struct crypto_tfm *tfm) static int crypto_sig_init_tfm(struct crypto_tfm *tfm) { - if (tfm->__crt_alg->cra_type != &crypto_sig_type) - return crypto_init_akcipher_ops_sig(tfm); - struct crypto_sig *sig = __crypto_sig_tfm(tfm); struct sig_alg *alg = crypto_sig_alg(sig); @@ -93,17 +86,9 @@ EXPORT_SYMBOL_GPL(crypto_alloc_sig); int crypto_sig_maxsize(struct crypto_sig *tfm) { - if (crypto_sig_tfm(tfm)->__crt_alg->cra_type != &crypto_sig_type) - goto akcipher; - struct sig_alg *alg = crypto_sig_alg(tfm); return alg->max_size(tfm); - -akcipher: - struct crypto_akcipher **ctx = crypto_sig_ctx(tfm); - - return crypto_akcipher_maxsize(*ctx); } EXPORT_SYMBOL_GPL(crypto_sig_maxsize); @@ -111,26 +96,9 @@ int crypto_sig_sign(struct crypto_sig *tfm, const void *src, unsigned int slen, void *dst, unsigned int dlen) { - if (crypto_sig_tfm(tfm)->__crt_alg->cra_type != &crypto_sig_type) - goto akcipher; - struct sig_alg *alg = crypto_sig_alg(tfm); return alg->sign(tfm, src, slen, dst, dlen); - -akcipher: - struct crypto_akcipher **ctx = crypto_sig_ctx(tfm); - struct crypto_akcipher_sync_data data = { - .tfm = *ctx, - .src = src, - .dst = dst, - .slen = slen, - .dlen = dlen, - }; - - return crypto_akcipher_sync_prep(&data) ?: - crypto_akcipher_sync_post(&data, - crypto_akcipher_sign(data.req)); } EXPORT_SYMBOL_GPL(crypto_sig_sign); @@ -138,65 +106,27 @@ int crypto_sig_verify(struct crypto_sig *tfm, const void *src, unsigned int slen, const void *digest, unsigned int dlen) { - if (crypto_sig_tfm(tfm)->__crt_alg->cra_type != &crypto_sig_type) - goto akcipher; - struct sig_alg *alg = crypto_sig_alg(tfm); return alg->verify(tfm, src, slen, digest, dlen); - -akcipher: - struct crypto_akcipher **ctx = crypto_sig_ctx(tfm); - struct crypto_akcipher_sync_data data = { - .tfm = *ctx, - .src = src, - .slen = slen, - .dlen = dlen, - }; - int err; - - err = crypto_akcipher_sync_prep(&data); - if (err) - return err; - - memcpy(data.buf + slen, digest, dlen); - - return crypto_akcipher_sync_post(&data, - crypto_akcipher_verify(data.req)); } EXPORT_SYMBOL_GPL(crypto_sig_verify); int crypto_sig_set_pubkey(struct crypto_sig *tfm, const void *key, unsigned int keylen) { - if (crypto_sig_tfm(tfm)->__crt_alg->cra_type != &crypto_sig_type) - goto akcipher; - struct sig_alg *alg = crypto_sig_alg(tfm); return alg->set_pub_key(tfm, key, keylen); - -akcipher: - struct crypto_akcipher **ctx = crypto_sig_ctx(tfm); - - return crypto_akcipher_set_pub_key(*ctx, key, keylen); } EXPORT_SYMBOL_GPL(crypto_sig_set_pubkey); int crypto_sig_set_privkey(struct crypto_sig *tfm, const void *key, unsigned int keylen) { - if (crypto_sig_tfm(tfm)->__crt_alg->cra_type != &crypto_sig_type) - goto akcipher; - struct sig_alg *alg = crypto_sig_alg(tfm); return alg->set_priv_key(tfm, key, keylen); - -akcipher: - struct crypto_akcipher **ctx = crypto_sig_ctx(tfm); - - return crypto_akcipher_set_priv_key(*ctx, key, keylen); } EXPORT_SYMBOL_GPL(crypto_sig_set_privkey); diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 76401b3b634f..48de2fc1e965 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -4131,11 +4131,9 @@ static int test_akcipher_one(struct crypto_akcipher *tfm, struct crypto_wait wait; unsigned int out_len_max, out_len = 0; int err = -ENOMEM; - struct scatterlist src, dst, src_tab[3]; - const char *m, *c; - unsigned int m_size, c_size; - const char *op; - u8 *key, *ptr; + struct scatterlist src, dst, src_tab[2]; + const char *c; + unsigned int c_size; if (testmgr_alloc_buf(xbuf)) return err; @@ -4146,92 +4144,53 @@ static int test_akcipher_one(struct crypto_akcipher *tfm, crypto_init_wait(&wait); - key = kmalloc(vecs->key_len + sizeof(u32) * 2 + vecs->param_len, - GFP_KERNEL); - if (!key) - goto free_req; - memcpy(key, vecs->key, vecs->key_len); - ptr = key + vecs->key_len; - ptr = test_pack_u32(ptr, vecs->algo); - ptr = test_pack_u32(ptr, vecs->param_len); - memcpy(ptr, vecs->params, vecs->param_len); - if (vecs->public_key_vec) - err = crypto_akcipher_set_pub_key(tfm, key, vecs->key_len); + err = crypto_akcipher_set_pub_key(tfm, vecs->key, + vecs->key_len); else - err = crypto_akcipher_set_priv_key(tfm, key, vecs->key_len); + err = crypto_akcipher_set_priv_key(tfm, vecs->key, + vecs->key_len); if (err) - goto free_key; + goto free_req; - /* - * First run test which do not require a private key, such as - * encrypt or verify. - */ + /* First run encrypt test which does not require a private key */ err = -ENOMEM; out_len_max = crypto_akcipher_maxsize(tfm); outbuf_enc = kzalloc(out_len_max, GFP_KERNEL); if (!outbuf_enc) - goto free_key; - - if (!vecs->siggen_sigver_test) { - m = vecs->m; - m_size = vecs->m_size; - c = vecs->c; - c_size = vecs->c_size; - op = "encrypt"; - } else { - /* Swap args so we could keep plaintext (digest) - * in vecs->m, and cooked signature in vecs->c. - */ - m = vecs->c; /* signature */ - m_size = vecs->c_size; - c = vecs->m; /* digest */ - c_size = vecs->m_size; - op = "verify"; - } + goto free_req; + + c = vecs->c; + c_size = vecs->c_size; err = -E2BIG; - if (WARN_ON(m_size > PAGE_SIZE)) + if (WARN_ON(vecs->m_size > PAGE_SIZE)) goto free_all; - memcpy(xbuf[0], m, m_size); + memcpy(xbuf[0], vecs->m, vecs->m_size); - sg_init_table(src_tab, 3); + sg_init_table(src_tab, 2); sg_set_buf(&src_tab[0], xbuf[0], 8); - sg_set_buf(&src_tab[1], xbuf[0] + 8, m_size - 8); - if (vecs->siggen_sigver_test) { - if (WARN_ON(c_size > PAGE_SIZE)) - goto free_all; - memcpy(xbuf[1], c, c_size); - sg_set_buf(&src_tab[2], xbuf[1], c_size); - akcipher_request_set_crypt(req, src_tab, NULL, m_size, c_size); - } else { - sg_init_one(&dst, outbuf_enc, out_len_max); - akcipher_request_set_crypt(req, src_tab, &dst, m_size, - out_len_max); - } + sg_set_buf(&src_tab[1], xbuf[0] + 8, vecs->m_size - 8); + sg_init_one(&dst, outbuf_enc, out_len_max); + akcipher_request_set_crypt(req, src_tab, &dst, vecs->m_size, + out_len_max); akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, crypto_req_done, &wait); - err = crypto_wait_req(vecs->siggen_sigver_test ? - /* Run asymmetric signature verification */ - crypto_akcipher_verify(req) : - /* Run asymmetric encrypt */ - crypto_akcipher_encrypt(req), &wait); + err = crypto_wait_req(crypto_akcipher_encrypt(req), &wait); if (err) { - pr_err("alg: akcipher: %s test failed. err %d\n", op, err); + pr_err("alg: akcipher: encrypt test failed. err %d\n", err); goto free_all; } - if (!vecs->siggen_sigver_test && c) { + if (c) { if (req->dst_len != c_size) { - pr_err("alg: akcipher: %s test failed. Invalid output len\n", - op); + pr_err("alg: akcipher: encrypt test failed. Invalid output len\n"); err = -EINVAL; goto free_all; } /* verify that encrypted message is equal to expected */ if (memcmp(c, outbuf_enc, c_size) != 0) { - pr_err("alg: akcipher: %s test failed. Invalid output\n", - op); + pr_err("alg: akcipher: encrypt test failed. Invalid output\n"); hexdump(outbuf_enc, c_size); err = -EINVAL; goto free_all; @@ -4239,7 +4198,7 @@ static int test_akcipher_one(struct crypto_akcipher *tfm, } /* - * Don't invoke (decrypt or sign) test which require a private key + * Don't invoke decrypt test which requires a private key * for vectors with only a public key. */ if (vecs->public_key_vec) { @@ -4252,13 +4211,12 @@ static int test_akcipher_one(struct crypto_akcipher *tfm, goto free_all; } - if (!vecs->siggen_sigver_test && !c) { + if (!c) { c = outbuf_enc; c_size = req->dst_len; } err = -E2BIG; - op = vecs->siggen_sigver_test ? "sign" : "decrypt"; if (WARN_ON(c_size > PAGE_SIZE)) goto free_all; memcpy(xbuf[0], c, c_size); @@ -4268,34 +4226,29 @@ static int test_akcipher_one(struct crypto_akcipher *tfm, crypto_init_wait(&wait); akcipher_request_set_crypt(req, &src, &dst, c_size, out_len_max); - err = crypto_wait_req(vecs->siggen_sigver_test ? - /* Run asymmetric signature generation */ - crypto_akcipher_sign(req) : - /* Run asymmetric decrypt */ - crypto_akcipher_decrypt(req), &wait); + err = crypto_wait_req(crypto_akcipher_decrypt(req), &wait); if (err) { - pr_err("alg: akcipher: %s test failed. err %d\n", op, err); + pr_err("alg: akcipher: decrypt test failed. err %d\n", err); goto free_all; } out_len = req->dst_len; - if (out_len < m_size) { - pr_err("alg: akcipher: %s test failed. Invalid output len %u\n", - op, out_len); + if (out_len < vecs->m_size) { + pr_err("alg: akcipher: decrypt test failed. Invalid output len %u\n", + out_len); err = -EINVAL; goto free_all; } /* verify that decrypted message is equal to the original msg */ - if (memchr_inv(outbuf_dec, 0, out_len - m_size) || - memcmp(m, outbuf_dec + out_len - m_size, m_size)) { - pr_err("alg: akcipher: %s test failed. Invalid output\n", op); + if (memchr_inv(outbuf_dec, 0, out_len - vecs->m_size) || + memcmp(vecs->m, outbuf_dec + out_len - vecs->m_size, + vecs->m_size)) { + pr_err("alg: akcipher: decrypt test failed. Invalid output\n"); hexdump(outbuf_dec, out_len); err = -EINVAL; } free_all: kfree(outbuf_dec); kfree(outbuf_enc); -free_key: - kfree(key); free_req: akcipher_request_free(req); free_xbuf: diff --git a/crypto/testmgr.h b/crypto/testmgr.h index d29d03fec852..e10b6d7f2cd9 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -150,16 +150,12 @@ struct drbg_testvec { struct akcipher_testvec { const unsigned char *key; - const unsigned char *params; const unsigned char *m; const unsigned char *c; unsigned int key_len; - unsigned int param_len; unsigned int m_size; unsigned int c_size; bool public_key_vec; - bool siggen_sigver_test; - enum OID algo; }; struct sig_testvec { diff --git a/include/crypto/akcipher.h b/include/crypto/akcipher.h index 18a10cad07aa..cdf7da74bf2f 100644 --- a/include/crypto/akcipher.h +++ b/include/crypto/akcipher.h @@ -12,24 +12,19 @@ #include /** - * struct akcipher_request - public key request + * struct akcipher_request - public key cipher request * * @base: Common attributes for async crypto requests * @src: Source data - * For verify op this is signature + digest, in that case - * total size of @src is @src_len + @dst_len. - * @dst: Destination data (Should be NULL for verify op) + * @dst: Destination data * @src_len: Size of the input buffer - * For verify op it's size of signature part of @src, this part - * is supposed to be operated by cipher. - * @dst_len: Size of @dst buffer (for all ops except verify). + * @dst_len: Size of @dst buffer * It needs to be at least as big as the expected result * depending on the operation. * After operation it will be updated with the actual size of the * result. * In case of error where the dst sgl size was insufficient, * it will be updated to the size required for the operation. - * For verify op this is size of digest part in @src. * @__ctx: Start of private context data */ struct akcipher_request { @@ -55,15 +50,8 @@ struct crypto_akcipher { }; /** - * struct akcipher_alg - generic public key algorithm + * struct akcipher_alg - generic public key cipher algorithm * - * @sign: Function performs a sign operation as defined by public key - * algorithm. In case of error, where the dst_len was insufficient, - * the req->dst_len will be updated to the size required for the - * operation - * @verify: Function performs a complete verify operation as defined by - * public key algorithm, returning verification status. Requires - * digest value as input parameter. * @encrypt: Function performs an encrypt operation as defined by public key * algorithm. In case of error, where the dst_len was insufficient, * the req->dst_len will be updated to the size required for the @@ -94,8 +82,6 @@ struct crypto_akcipher { * @base: Common crypto API algorithm data structure */ struct akcipher_alg { - int (*sign)(struct akcipher_request *req); - int (*verify)(struct akcipher_request *req); int (*encrypt)(struct akcipher_request *req); int (*decrypt)(struct akcipher_request *req); int (*set_pub_key)(struct crypto_akcipher *tfm, const void *key, @@ -110,9 +96,9 @@ struct akcipher_alg { }; /** - * DOC: Generic Public Key API + * DOC: Generic Public Key Cipher API * - * The Public Key API is used with the algorithms of type + * The Public Key Cipher API is used with the algorithms of type * CRYPTO_ALG_TYPE_AKCIPHER (listed as type "akcipher" in /proc/crypto) */ @@ -243,10 +229,9 @@ static inline void akcipher_request_set_callback(struct akcipher_request *req, * * @req: public key request * @src: ptr to input scatter list - * @dst: ptr to output scatter list or NULL for verify op + * @dst: ptr to output scatter list * @src_len: size of the src input scatter list to be processed - * @dst_len: size of the dst output scatter list or size of signature - * portion in @src for verify op + * @dst_len: size of the dst output scatter list */ static inline void akcipher_request_set_crypt(struct akcipher_request *req, struct scatterlist *src, @@ -347,44 +332,6 @@ int crypto_akcipher_sync_decrypt(struct crypto_akcipher *tfm, const void *src, unsigned int slen, void *dst, unsigned int dlen); -/** - * crypto_akcipher_sign() - Invoke public key sign operation - * - * Function invokes the specific public key sign operation for a given - * public key algorithm - * - * @req: asymmetric key request - * - * Return: zero on success; error code in case of error - */ -static inline int crypto_akcipher_sign(struct akcipher_request *req) -{ - struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - - return crypto_akcipher_alg(tfm)->sign(req); -} - -/** - * crypto_akcipher_verify() - Invoke public key signature verification - * - * Function invokes the specific public key signature verification operation - * for a given public key algorithm. - * - * @req: asymmetric key request - * - * Note: req->dst should be NULL, req->src should point to SG of size - * (req->src_size + req->dst_size), containing signature (of req->src_size - * length) with appended digest (of req->dst_size length). - * - * Return: zero on verification success; error code in case of error. - */ -static inline int crypto_akcipher_verify(struct akcipher_request *req) -{ - struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - - return crypto_akcipher_alg(tfm)->verify(req); -} - /** * crypto_akcipher_set_pub_key() - Invoke set public key operation * diff --git a/include/crypto/internal/akcipher.h b/include/crypto/internal/akcipher.h index a0fba4b2eccf..14ee62bc52b6 100644 --- a/include/crypto/internal/akcipher.h +++ b/include/crypto/internal/akcipher.h @@ -124,7 +124,7 @@ static inline struct akcipher_alg *crypto_spawn_akcipher_alg( /** * crypto_register_akcipher() -- Register public key algorithm * - * Function registers an implementation of a public key verify algorithm + * Function registers an implementation of a public key cipher algorithm * * @alg: algorithm definition * @@ -135,7 +135,7 @@ int crypto_register_akcipher(struct akcipher_alg *alg); /** * crypto_unregister_akcipher() -- Unregister public key algorithm * - * Function unregisters an implementation of a public key verify algorithm + * Function unregisters an implementation of a public key cipher algorithm * * @alg: algorithm definition */ -- cgit v1.2.3 From 5ba296674e468ddd40b6ef5aa845c2d2ee794b84 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 10 Sep 2024 16:30:22 +0200 Subject: crypto: sig - Move crypto_sig_*() API calls to include file The crypto_sig_*() API calls lived in sig.c so far because they needed access to struct crypto_sig_type: This was necessary to differentiate between signature algorithms that had already been migrated from crypto_akcipher to crypto_sig and those that hadn't yet. Now that all algorithms have been migrated, the API calls can become static inlines in to mimic what is doing. Signed-off-by: Lukas Wunner Reviewed-by: Stefan Berger Signed-off-by: Herbert Xu --- crypto/sig.c | 46 ---------------------------------------------- include/crypto/sig.h | 47 ++++++++++++++++++++++++++++++++++++----------- 2 files changed, 36 insertions(+), 57 deletions(-) (limited to 'include') diff --git a/crypto/sig.c b/crypto/sig.c index 1e6b0d677472..84d0ea9fd73b 100644 --- a/crypto/sig.c +++ b/crypto/sig.c @@ -84,52 +84,6 @@ struct crypto_sig *crypto_alloc_sig(const char *alg_name, u32 type, u32 mask) } EXPORT_SYMBOL_GPL(crypto_alloc_sig); -int crypto_sig_maxsize(struct crypto_sig *tfm) -{ - struct sig_alg *alg = crypto_sig_alg(tfm); - - return alg->max_size(tfm); -} -EXPORT_SYMBOL_GPL(crypto_sig_maxsize); - -int crypto_sig_sign(struct crypto_sig *tfm, - const void *src, unsigned int slen, - void *dst, unsigned int dlen) -{ - struct sig_alg *alg = crypto_sig_alg(tfm); - - return alg->sign(tfm, src, slen, dst, dlen); -} -EXPORT_SYMBOL_GPL(crypto_sig_sign); - -int crypto_sig_verify(struct crypto_sig *tfm, - const void *src, unsigned int slen, - const void *digest, unsigned int dlen) -{ - struct sig_alg *alg = crypto_sig_alg(tfm); - - return alg->verify(tfm, src, slen, digest, dlen); -} -EXPORT_SYMBOL_GPL(crypto_sig_verify); - -int crypto_sig_set_pubkey(struct crypto_sig *tfm, - const void *key, unsigned int keylen) -{ - struct sig_alg *alg = crypto_sig_alg(tfm); - - return alg->set_pub_key(tfm, key, keylen); -} -EXPORT_SYMBOL_GPL(crypto_sig_set_pubkey); - -int crypto_sig_set_privkey(struct crypto_sig *tfm, - const void *key, unsigned int keylen) -{ - struct sig_alg *alg = crypto_sig_alg(tfm); - - return alg->set_priv_key(tfm, key, keylen); -} -EXPORT_SYMBOL_GPL(crypto_sig_set_privkey); - static void sig_prepare_alg(struct sig_alg *alg) { struct crypto_alg *base = &alg->base; diff --git a/include/crypto/sig.h b/include/crypto/sig.h index f0f52a7c5ae7..bbc902642bf5 100644 --- a/include/crypto/sig.h +++ b/include/crypto/sig.h @@ -130,7 +130,12 @@ static inline void crypto_free_sig(struct crypto_sig *tfm) * * @tfm: signature tfm handle allocated with crypto_alloc_sig() */ -int crypto_sig_maxsize(struct crypto_sig *tfm); +static inline int crypto_sig_maxsize(struct crypto_sig *tfm) +{ + struct sig_alg *alg = crypto_sig_alg(tfm); + + return alg->max_size(tfm); +} /** * crypto_sig_sign() - Invoke signing operation @@ -145,9 +150,14 @@ int crypto_sig_maxsize(struct crypto_sig *tfm); * * Return: zero on success; error code in case of error */ -int crypto_sig_sign(struct crypto_sig *tfm, - const void *src, unsigned int slen, - void *dst, unsigned int dlen); +static inline int crypto_sig_sign(struct crypto_sig *tfm, + const void *src, unsigned int slen, + void *dst, unsigned int dlen) +{ + struct sig_alg *alg = crypto_sig_alg(tfm); + + return alg->sign(tfm, src, slen, dst, dlen); +} /** * crypto_sig_verify() - Invoke signature verification @@ -163,9 +173,14 @@ int crypto_sig_sign(struct crypto_sig *tfm, * * Return: zero on verification success; error code in case of error. */ -int crypto_sig_verify(struct crypto_sig *tfm, - const void *src, unsigned int slen, - const void *digest, unsigned int dlen); +static inline int crypto_sig_verify(struct crypto_sig *tfm, + const void *src, unsigned int slen, + const void *digest, unsigned int dlen) +{ + struct sig_alg *alg = crypto_sig_alg(tfm); + + return alg->verify(tfm, src, slen, digest, dlen); +} /** * crypto_sig_set_pubkey() - Invoke set public key operation @@ -180,8 +195,13 @@ int crypto_sig_verify(struct crypto_sig *tfm, * * Return: zero on success; error code in case of error */ -int crypto_sig_set_pubkey(struct crypto_sig *tfm, - const void *key, unsigned int keylen); +static inline int crypto_sig_set_pubkey(struct crypto_sig *tfm, + const void *key, unsigned int keylen) +{ + struct sig_alg *alg = crypto_sig_alg(tfm); + + return alg->set_pub_key(tfm, key, keylen); +} /** * crypto_sig_set_privkey() - Invoke set private key operation @@ -196,6 +216,11 @@ int crypto_sig_set_pubkey(struct crypto_sig *tfm, * * Return: zero on success; error code in case of error */ -int crypto_sig_set_privkey(struct crypto_sig *tfm, - const void *key, unsigned int keylen); +static inline int crypto_sig_set_privkey(struct crypto_sig *tfm, + const void *key, unsigned int keylen) +{ + struct sig_alg *alg = crypto_sig_alg(tfm); + + return alg->set_priv_key(tfm, key, keylen); +} #endif -- cgit v1.2.3 From 4df86c6ea5c37fe0452638f39a1e4b189da75c54 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 10 Sep 2024 16:30:23 +0200 Subject: ASN.1: Clean up include statements in public headers If is the first header included from a .c file (due to headers being sorted alphabetically), the compiler complains: include/linux/asn1_decoder.h:18:29: error: unknown type name 'size_t' Avoid by including . Jonathan notes that the counterpart already includes , but additionally includes the unnecessary . Drop it. Signed-off-by: Lukas Wunner Reviewed-by: Stefan Berger Reviewed-by: Jonathan Cameron Signed-off-by: Herbert Xu --- include/linux/asn1_decoder.h | 1 + include/linux/asn1_encoder.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/asn1_decoder.h b/include/linux/asn1_decoder.h index 83f9c6e1e5e9..b41bce82a191 100644 --- a/include/linux/asn1_decoder.h +++ b/include/linux/asn1_decoder.h @@ -9,6 +9,7 @@ #define _LINUX_ASN1_DECODER_H #include +#include struct asn1_decoder; diff --git a/include/linux/asn1_encoder.h b/include/linux/asn1_encoder.h index 08cd0c2ad34f..d17484dffb74 100644 --- a/include/linux/asn1_encoder.h +++ b/include/linux/asn1_encoder.h @@ -6,7 +6,6 @@ #include #include #include -#include #define asn1_oid_len(oid) (sizeof(oid)/sizeof(u32)) unsigned char * -- cgit v1.2.3 From d6793ff974e07e4eea151d1f0805e92d042825a1 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 10 Sep 2024 16:30:25 +0200 Subject: crypto: ecdsa - Move X9.62 signature decoding into template Unlike the rsa driver, which separates signature decoding and signature verification into two steps, the ecdsa driver does both in one. This restricts users to the one signature format currently supported (X9.62) and prevents addition of others such as P1363, which is needed by the forthcoming SPDM library (Security Protocol and Data Model) for PCI device authentication. Per Herbert's suggestion, change ecdsa to use a "raw" signature encoding and then implement X9.62 and P1363 as templates which convert their respective encodings to the raw one. One may then specify "x962(ecdsa-nist-XXX)" or "p1363(ecdsa-nist-XXX)" to pick the encoding. The present commit moves X9.62 decoding to a template. A separate commit is going to introduce another template for P1363 decoding. The ecdsa driver internally represents a signature as two u64 arrays of size ECC_MAX_BYTES. This appears to be the most natural choice for the raw format as it can directly be used for verification without having to further decode signature data or copy it around. Repurpose all the existing test vectors for "x962(ecdsa-nist-XXX)" and create a duplicate of them to test the raw encoding. Link: https://lore.kernel.org/all/ZoHXyGwRzVvYkcTP@gondor.apana.org.au/ Signed-off-by: Lukas Wunner Tested-by: Stefan Berger Signed-off-by: Herbert Xu --- crypto/Makefile | 3 +- crypto/asymmetric_keys/public_key.c | 3 + crypto/ecdsa-x962.c | 201 +++++++++++ crypto/ecdsa.c | 79 +--- crypto/testmgr.c | 27 ++ crypto/testmgr.h | 699 +++++++++++++++++++++++++++++++++++- include/crypto/internal/ecc.h | 13 + 7 files changed, 955 insertions(+), 70 deletions(-) create mode 100644 crypto/ecdsa-x962.c (limited to 'include') diff --git a/crypto/Makefile b/crypto/Makefile index 7de29bf843e9..af43a1bd1cfa 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -52,8 +52,9 @@ rsa_generic-y += rsassa-pkcs1.o obj-$(CONFIG_CRYPTO_RSA) += rsa_generic.o $(obj)/ecdsasignature.asn1.o: $(obj)/ecdsasignature.asn1.c $(obj)/ecdsasignature.asn1.h -$(obj)/ecdsa.o: $(obj)/ecdsasignature.asn1.h +$(obj)/ecdsa-x962.o: $(obj)/ecdsasignature.asn1.h ecdsa_generic-y += ecdsa.o +ecdsa_generic-y += ecdsa-x962.o ecdsa_generic-y += ecdsasignature.asn1.o obj-$(CONFIG_CRYPTO_ECDSA) += ecdsa_generic.o diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c index 3fb27ecd65f6..cc6d48cafa2b 100644 --- a/crypto/asymmetric_keys/public_key.c +++ b/crypto/asymmetric_keys/public_key.c @@ -130,6 +130,9 @@ software_key_determine_akcipher(const struct public_key *pkey, strcmp(hash_algo, "sha3-384") != 0 && strcmp(hash_algo, "sha3-512") != 0) return -EINVAL; + n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME, "%s(%s)", + encoding, pkey->pkey_algo); + return n >= CRYPTO_MAX_ALG_NAME ? -EINVAL : 0; } else if (strcmp(pkey->pkey_algo, "ecrdsa") == 0) { if (strcmp(encoding, "raw") != 0) return -EINVAL; diff --git a/crypto/ecdsa-x962.c b/crypto/ecdsa-x962.c new file mode 100644 index 000000000000..022e654c075d --- /dev/null +++ b/crypto/ecdsa-x962.c @@ -0,0 +1,201 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * ECDSA X9.62 signature encoding + * + * Copyright (c) 2021 IBM Corporation + * Copyright (c) 2024 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "ecdsasignature.asn1.h" + +struct ecdsa_x962_ctx { + struct crypto_sig *child; +}; + +struct ecdsa_x962_signature_ctx { + struct ecdsa_raw_sig sig; + unsigned int ndigits; +}; + +/* Get the r and s components of a signature from the X.509 certificate. */ +static int ecdsa_get_signature_rs(u64 *dest, size_t hdrlen, unsigned char tag, + const void *value, size_t vlen, + unsigned int ndigits) +{ + size_t bufsize = ndigits * sizeof(u64); + const char *d = value; + + if (!value || !vlen || vlen > bufsize + 1) + return -EINVAL; + + /* + * vlen may be 1 byte larger than bufsize due to a leading zero byte + * (necessary if the most significant bit of the integer is set). + */ + if (vlen > bufsize) { + /* skip over leading zeros that make 'value' a positive int */ + if (*d == 0) { + vlen -= 1; + d++; + } else { + return -EINVAL; + } + } + + ecc_digits_from_bytes(d, vlen, dest, ndigits); + + return 0; +} + +int ecdsa_get_signature_r(void *context, size_t hdrlen, unsigned char tag, + const void *value, size_t vlen) +{ + struct ecdsa_x962_signature_ctx *sig_ctx = context; + + return ecdsa_get_signature_rs(sig_ctx->sig.r, hdrlen, tag, value, vlen, + sig_ctx->ndigits); +} + +int ecdsa_get_signature_s(void *context, size_t hdrlen, unsigned char tag, + const void *value, size_t vlen) +{ + struct ecdsa_x962_signature_ctx *sig_ctx = context; + + return ecdsa_get_signature_rs(sig_ctx->sig.s, hdrlen, tag, value, vlen, + sig_ctx->ndigits); +} + +static int ecdsa_x962_verify(struct crypto_sig *tfm, + const void *src, unsigned int slen, + const void *digest, unsigned int dlen) +{ + struct ecdsa_x962_ctx *ctx = crypto_sig_ctx(tfm); + struct ecdsa_x962_signature_ctx sig_ctx; + int err; + + sig_ctx.ndigits = DIV_ROUND_UP(crypto_sig_maxsize(ctx->child), + sizeof(u64)); + + err = asn1_ber_decoder(&ecdsasignature_decoder, &sig_ctx, src, slen); + if (err < 0) + return err; + + return crypto_sig_verify(ctx->child, &sig_ctx.sig, sizeof(sig_ctx.sig), + digest, dlen); +} + +static unsigned int ecdsa_x962_max_size(struct crypto_sig *tfm) +{ + struct ecdsa_x962_ctx *ctx = crypto_sig_ctx(tfm); + + return crypto_sig_maxsize(ctx->child); +} + +static int ecdsa_x962_set_pub_key(struct crypto_sig *tfm, + const void *key, unsigned int keylen) +{ + struct ecdsa_x962_ctx *ctx = crypto_sig_ctx(tfm); + + return crypto_sig_set_pubkey(ctx->child, key, keylen); +} + +static int ecdsa_x962_init_tfm(struct crypto_sig *tfm) +{ + struct sig_instance *inst = sig_alg_instance(tfm); + struct crypto_sig_spawn *spawn = sig_instance_ctx(inst); + struct ecdsa_x962_ctx *ctx = crypto_sig_ctx(tfm); + struct crypto_sig *child_tfm; + + child_tfm = crypto_spawn_sig(spawn); + if (IS_ERR(child_tfm)) + return PTR_ERR(child_tfm); + + ctx->child = child_tfm; + + return 0; +} + +static void ecdsa_x962_exit_tfm(struct crypto_sig *tfm) +{ + struct ecdsa_x962_ctx *ctx = crypto_sig_ctx(tfm); + + crypto_free_sig(ctx->child); +} + +static void ecdsa_x962_free(struct sig_instance *inst) +{ + struct crypto_sig_spawn *spawn = sig_instance_ctx(inst); + + crypto_drop_sig(spawn); + kfree(inst); +} + +static int ecdsa_x962_create(struct crypto_template *tmpl, struct rtattr **tb) +{ + struct crypto_sig_spawn *spawn; + struct sig_instance *inst; + struct sig_alg *ecdsa_alg; + u32 mask; + int err; + + err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SIG, &mask); + if (err) + return err; + + inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); + if (!inst) + return -ENOMEM; + + spawn = sig_instance_ctx(inst); + + err = crypto_grab_sig(spawn, sig_crypto_instance(inst), + crypto_attr_alg_name(tb[1]), 0, mask); + if (err) + goto err_free_inst; + + ecdsa_alg = crypto_spawn_sig_alg(spawn); + + err = -EINVAL; + if (strncmp(ecdsa_alg->base.cra_name, "ecdsa", 5) != 0) + goto err_free_inst; + + err = crypto_inst_setname(sig_crypto_instance(inst), tmpl->name, + &ecdsa_alg->base); + if (err) + goto err_free_inst; + + inst->alg.base.cra_priority = ecdsa_alg->base.cra_priority; + inst->alg.base.cra_ctxsize = sizeof(struct ecdsa_x962_ctx); + + inst->alg.init = ecdsa_x962_init_tfm; + inst->alg.exit = ecdsa_x962_exit_tfm; + + inst->alg.verify = ecdsa_x962_verify; + inst->alg.max_size = ecdsa_x962_max_size; + inst->alg.set_pub_key = ecdsa_x962_set_pub_key; + + inst->free = ecdsa_x962_free; + + err = sig_register_instance(tmpl, inst); + if (err) { +err_free_inst: + ecdsa_x962_free(inst); + } + return err; +} + +struct crypto_template ecdsa_x962_tmpl = { + .name = "x962", + .create = ecdsa_x962_create, + .module = THIS_MODULE, +}; + +MODULE_ALIAS_CRYPTO("x962"); diff --git a/crypto/ecdsa.c b/crypto/ecdsa.c index 4a0ca93c99ea..1f7c29468a86 100644 --- a/crypto/ecdsa.c +++ b/crypto/ecdsa.c @@ -8,9 +8,6 @@ #include #include #include -#include - -#include "ecdsasignature.asn1.h" struct ecc_ctx { unsigned int curve_id; @@ -22,61 +19,6 @@ struct ecc_ctx { struct ecc_point pub_key; }; -struct ecdsa_signature_ctx { - const struct ecc_curve *curve; - u64 r[ECC_MAX_DIGITS]; - u64 s[ECC_MAX_DIGITS]; -}; - -/* - * Get the r and s components of a signature from the X509 certificate. - */ -static int ecdsa_get_signature_rs(u64 *dest, size_t hdrlen, unsigned char tag, - const void *value, size_t vlen, unsigned int ndigits) -{ - size_t bufsize = ndigits * sizeof(u64); - const char *d = value; - - if (!value || !vlen || vlen > bufsize + 1) - return -EINVAL; - - /* - * vlen may be 1 byte larger than bufsize due to a leading zero byte - * (necessary if the most significant bit of the integer is set). - */ - if (vlen > bufsize) { - /* skip over leading zeros that make 'value' a positive int */ - if (*d == 0) { - vlen -= 1; - d++; - } else { - return -EINVAL; - } - } - - ecc_digits_from_bytes(d, vlen, dest, ndigits); - - return 0; -} - -int ecdsa_get_signature_r(void *context, size_t hdrlen, unsigned char tag, - const void *value, size_t vlen) -{ - struct ecdsa_signature_ctx *sig = context; - - return ecdsa_get_signature_rs(sig->r, hdrlen, tag, value, vlen, - sig->curve->g.ndigits); -} - -int ecdsa_get_signature_s(void *context, size_t hdrlen, unsigned char tag, - const void *value, size_t vlen) -{ - struct ecdsa_signature_ctx *sig = context; - - return ecdsa_get_signature_rs(sig->s, hdrlen, tag, value, vlen, - sig->curve->g.ndigits); -} - static int _ecdsa_verify(struct ecc_ctx *ctx, const u64 *hash, const u64 *r, const u64 *s) { const struct ecc_curve *curve = ctx->curve; @@ -126,25 +68,21 @@ static int ecdsa_verify(struct crypto_sig *tfm, { struct ecc_ctx *ctx = crypto_sig_ctx(tfm); size_t bufsize = ctx->curve->g.ndigits * sizeof(u64); - struct ecdsa_signature_ctx sig_ctx = { - .curve = ctx->curve, - }; + const struct ecdsa_raw_sig *sig = src; u64 hash[ECC_MAX_DIGITS]; - int ret; if (unlikely(!ctx->pub_key_set)) return -EINVAL; - ret = asn1_ber_decoder(&ecdsasignature_decoder, &sig_ctx, src, slen); - if (ret < 0) - return ret; + if (slen != sizeof(*sig)) + return -EINVAL; if (bufsize > dlen) bufsize = dlen; ecc_digits_from_bytes(digest, bufsize, hash, ctx->curve->g.ndigits); - return _ecdsa_verify(ctx, hash, sig_ctx.r, sig_ctx.s); + return _ecdsa_verify(ctx, hash, sig->r, sig->s); } static int ecdsa_ecc_ctx_init(struct ecc_ctx *ctx, unsigned int curve_id) @@ -340,8 +278,15 @@ static int __init ecdsa_init(void) if (ret) goto nist_p521_error; + ret = crypto_register_template(&ecdsa_x962_tmpl); + if (ret) + goto x962_tmpl_error; + return 0; +x962_tmpl_error: + crypto_unregister_sig(&ecdsa_nist_p521); + nist_p521_error: crypto_unregister_sig(&ecdsa_nist_p384); @@ -356,6 +301,8 @@ nist_p256_error: static void __exit ecdsa_exit(void) { + crypto_unregister_template(&ecdsa_x962_tmpl); + if (ecdsa_nist_p192_registered) crypto_unregister_sig(&ecdsa_nist_p192); crypto_unregister_sig(&ecdsa_nist_p256); diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 48de2fc1e965..5760615e27b1 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -5750,6 +5750,33 @@ static const struct alg_test_desc alg_test_descs[] = { .suite = { .hash = __VECS(wp512_tv_template) } + }, { + .alg = "x962(ecdsa-nist-p192)", + .test = alg_test_sig, + .suite = { + .sig = __VECS(x962_ecdsa_nist_p192_tv_template) + } + }, { + .alg = "x962(ecdsa-nist-p256)", + .test = alg_test_sig, + .fips_allowed = 1, + .suite = { + .sig = __VECS(x962_ecdsa_nist_p256_tv_template) + } + }, { + .alg = "x962(ecdsa-nist-p384)", + .test = alg_test_sig, + .fips_allowed = 1, + .suite = { + .sig = __VECS(x962_ecdsa_nist_p384_tv_template) + } + }, { + .alg = "x962(ecdsa-nist-p521)", + .test = alg_test_sig, + .fips_allowed = 1, + .suite = { + .sig = __VECS(x962_ecdsa_nist_p521_tv_template) + } }, { .alg = "xcbc(aes)", .test = alg_test_hash, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index e10b6d7f2cd9..2bd77eaafdf6 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -21,6 +21,7 @@ #define _CRYPTO_TESTMGR_H #include +#include #define MAX_IVLEN 32 @@ -656,10 +657,702 @@ static const struct akcipher_testvec rsa_tv_template[] = { } }; +#ifdef CONFIG_CPU_BIG_ENDIAN +#define be64_to_cpua(b1, b2, b3, b4, b5, b6, b7, b8) \ + 0x##b1, 0x##b2, 0x##b3, 0x##b4, 0x##b5, 0x##b6, 0x##b7, 0x##b8 +#else +#define be64_to_cpua(b1, b2, b3, b4, b5, b6, b7, b8) \ + 0x##b8, 0x##b7, 0x##b6, 0x##b5, 0x##b4, 0x##b3, 0x##b2, 0x##b1 +#endif + /* * ECDSA test vectors. */ static const struct sig_testvec ecdsa_nist_p192_tv_template[] = { + { + .key = /* secp192r1(sha1) */ + "\x04\xf7\x46\xf8\x2f\x15\xf6\x22\x8e\xd7\x57\x4f\xcc\xe7\xbb\xc1" + "\xd4\x09\x73\xcf\xea\xd0\x15\x07\x3d\xa5\x8a\x8a\x95\x43\xe4\x68" + "\xea\xc6\x25\xc1\xc1\x01\x25\x4c\x7e\xc3\x3c\xa6\x04\x0a\xe7\x08" + "\x98", + .key_len = 49, + .m = + "\xcd\xb9\xd2\x1c\xb7\x6f\xcd\x44\xb3\xfd\x63\xea\xa3\x66\x7f\xae" + "\x63\x85\xe7\x82", + .m_size = 20, + .c = (const unsigned char[]){ + be64_to_cpua(ad, 59, ad, 88, 27, d6, 92, 6b), + be64_to_cpua(a0, 27, 91, c6, f6, 7f, c3, 09), + be64_to_cpua(ba, e5, 93, 83, 6e, b6, 3b, 63), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(86, 80, 6f, a5, 79, 77, da, d0), + be64_to_cpua(ef, 95, 52, 7b, a0, 0f, e4, 18), + be64_to_cpua(10, 68, 01, 9d, ba, ce, 83, 08), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00) }, + .c_size = ECC_MAX_BYTES * 2, + .public_key_vec = true, + }, { + .key = /* secp192r1(sha224) */ + "\x04\xb6\x4b\xb1\xd1\xac\xba\x24\x8f\x65\xb2\x60\x00\x90\xbf\xbd" + "\x78\x05\x73\xe9\x79\x1d\x6f\x7c\x0b\xd2\xc3\x93\xa7\x28\xe1\x75" + "\xf7\xd5\x95\x1d\x28\x10\xc0\x75\x50\x5c\x1a\x4f\x3f\x8f\xa5\xee" + "\xa3", + .key_len = 49, + .m = + "\x8d\xd6\xb8\x3e\xe5\xff\x23\xf6\x25\xa2\x43\x42\x74\x45\xa7\x40" + "\x3a\xff\x2f\xe1\xd3\xf6\x9f\xe8\x33\xcb\x12\x11", + .m_size = 28, + .c = (const unsigned char[]){ + be64_to_cpua(83, 7b, 12, e6, b6, 5b, cb, d4), + be64_to_cpua(14, f8, 11, 2b, 55, dc, ae, 37), + be64_to_cpua(5a, 8b, 82, 69, 7e, 8a, 0a, 09), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(a3, e3, 5c, 99, db, 92, 5b, 36), + be64_to_cpua(eb, c3, 92, 0f, 1e, 72, ee, c4), + be64_to_cpua(6a, 14, 4f, 53, 75, c8, 02, 48), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00) }, + .c_size = ECC_MAX_BYTES * 2, + .public_key_vec = true, + }, { + .key = /* secp192r1(sha256) */ + "\x04\xe2\x51\x24\x9b\xf7\xb6\x32\x82\x39\x66\x3d\x5b\xec\x3b\xae" + "\x0c\xd5\xf2\x67\xd1\xc7\xe1\x02\xe4\xbf\x90\x62\xb8\x55\x75\x56" + "\x69\x20\x5e\xcb\x4e\xca\x33\xd6\xcb\x62\x6b\x94\xa9\xa2\xe9\x58" + "\x91", + .key_len = 49, + .m = + "\x35\xec\xa1\xa0\x9e\x14\xde\x33\x03\xb6\xf6\xbd\x0c\x2f\xb2\xfd" + "\x1f\x27\x82\xa5\xd7\x70\x3f\xef\xa0\x82\x69\x8e\x73\x31\x8e\xd7", + .m_size = 32, + .c = (const unsigned char[]){ + be64_to_cpua(01, 48, fb, 5f, 72, 2a, d4, 8f), + be64_to_cpua(6b, 1a, 58, 56, f1, 8f, f7, fd), + be64_to_cpua(3f, 72, 3f, 1f, 42, d2, 3f, 1d), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(7d, 3a, 97, d9, cd, 1a, 6a, 49), + be64_to_cpua(32, dd, 41, 74, 6a, 51, c7, d9), + be64_to_cpua(b3, 69, 43, fd, 48, 19, 86, cf), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00) }, + .c_size = ECC_MAX_BYTES * 2, + .public_key_vec = true, + }, { + .key = /* secp192r1(sha384) */ + "\x04\x5a\x13\xfe\x68\x86\x4d\xf4\x17\xc7\xa4\xe5\x8c\x65\x57\xb7" + "\x03\x73\x26\x57\xfb\xe5\x58\x40\xd8\xfd\x49\x05\xab\xf1\x66\x1f" + "\xe2\x9d\x93\x9e\xc2\x22\x5a\x8b\x4f\xf3\x77\x22\x59\x7e\xa6\x4e" + "\x8b", + .key_len = 49, + .m = + "\x9d\x2e\x1a\x8f\xed\x6c\x4b\x61\xae\xac\xd5\x19\x79\xce\x67\xf9" + "\xa0\x34\xeb\xb0\x81\xf9\xd9\xdc\x6e\xb3\x5c\xa8\x69\xfc\x8a\x61" + "\x39\x81\xfb\xfd\x5c\x30\x6b\xa8\xee\xed\x89\xaf\xa3\x05\xe4\x78", + .m_size = 48, + .c = (const unsigned char[]){ + be64_to_cpua(dd, 15, bb, d6, 8c, a7, 03, 78), + be64_to_cpua(cf, 7f, 34, b4, b4, e5, c5, 00), + be64_to_cpua(f0, a3, 38, ce, 2b, f8, 9d, 1a), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(93, 12, 3b, 3b, 28, fb, 6d, e1), + be64_to_cpua(d1, 01, 77, 44, 5d, 53, a4, 7c), + be64_to_cpua(64, bc, 5a, 1f, 82, 96, 61, d7), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00) }, + .c_size = ECC_MAX_BYTES * 2, + .public_key_vec = true, + }, { + .key = /* secp192r1(sha512) */ + "\x04\xd5\xf2\x6e\xc3\x94\x5c\x52\xbc\xdf\x86\x6c\x14\xd1\xca\xea" + "\xcc\x72\x3a\x8a\xf6\x7a\x3a\x56\x36\x3b\xca\xc6\x94\x0e\x17\x1d" + "\x9e\xa0\x58\x28\xf9\x4b\xe6\xd1\xa5\x44\x91\x35\x0d\xe7\xf5\x11" + "\x57", + .key_len = 49, + .m = + "\xd5\x4b\xe9\x36\xda\xd8\x6e\xc0\x50\x03\xbe\x00\x43\xff\xf0\x23" + "\xac\xa2\x42\xe7\x37\x77\x79\x52\x8f\x3e\xc0\x16\xc1\xfc\x8c\x67" + "\x16\xbc\x8a\x5d\x3b\xd3\x13\xbb\xb6\xc0\x26\x1b\xeb\x33\xcc\x70" + "\x4a\xf2\x11\x37\xe8\x1b\xba\x55\xac\x69\xe1\x74\x62\x7c\x6e\xb5", + .m_size = 64, + .c = (const unsigned char[]){ + be64_to_cpua(2b, 11, 2d, 1c, b6, 06, c9, 6c), + be64_to_cpua(dd, 3f, 07, 87, 12, a0, d4, ac), + be64_to_cpua(88, 5b, 8f, 59, 43, bf, cf, c6), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(28, 6a, df, 97, fd, 82, 76, 24), + be64_to_cpua(a9, 14, 2a, 5e, f5, e5, fb, 72), + be64_to_cpua(73, b4, 22, 9a, 98, 73, 3c, 83), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00) }, + .c_size = ECC_MAX_BYTES * 2, + .public_key_vec = true, + }, +}; + +static const struct sig_testvec ecdsa_nist_p256_tv_template[] = { + { + .key = /* secp256r1(sha1) */ + "\x04\xb9\x7b\xbb\xd7\x17\x64\xd2\x7e\xfc\x81\x5d\x87\x06\x83\x41" + "\x22\xd6\x9a\xaa\x87\x17\xec\x4f\x63\x55\x2f\x94\xba\xdd\x83\xe9" + "\x34\x4b\xf3\xe9\x91\x13\x50\xb6\xcb\xca\x62\x08\xe7\x3b\x09\xdc" + "\xc3\x63\x4b\x2d\xb9\x73\x53\xe4\x45\xe6\x7c\xad\xe7\x6b\xb0\xe8" + "\xaf", + .key_len = 65, + .m = + "\xc2\x2b\x5f\x91\x78\x34\x26\x09\x42\x8d\x6f\x51\xb2\xc5\xaf\x4c" + "\x0b\xde\x6a\x42", + .m_size = 20, + .c = (const unsigned char[]){ + be64_to_cpua(ee, ca, 6a, 52, 0e, 48, 4d, cc), + be64_to_cpua(f7, d4, ad, 8d, 94, 5a, 69, 89), + be64_to_cpua(cf, d4, e7, b7, f0, 82, 56, 41), + be64_to_cpua(f9, 25, ce, 9f, 3a, a6, 35, 81), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(fb, 9d, 8b, de, d4, 8d, 6f, ad), + be64_to_cpua(f1, 03, 03, f3, 3b, e2, 73, f7), + be64_to_cpua(8a, fa, 54, 93, 29, a7, 70, 86), + be64_to_cpua(d7, e4, ef, 52, 66, d3, 5b, 9d), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00) }, + .c_size = ECC_MAX_BYTES * 2, + .public_key_vec = true, + }, { + .key = /* secp256r1(sha224) */ + "\x04\x8b\x6d\xc0\x33\x8e\x2d\x8b\x67\xf5\xeb\xc4\x7f\xa0\xf5\xd9" + "\x7b\x03\xa5\x78\x9a\xb5\xea\x14\xe4\x23\xd0\xaf\xd7\x0e\x2e\xa0" + "\xc9\x8b\xdb\x95\xf8\xb3\xaf\xac\x00\x2c\x2c\x1f\x7a\xfd\x95\x88" + "\x43\x13\xbf\xf3\x1c\x05\x1a\x14\x18\x09\x3f\xd6\x28\x3e\xc5\xa0" + "\xd4", + .key_len = 65, + .m = + "\x1a\x15\xbc\xa3\xe4\xed\x3a\xb8\x23\x67\xc6\xc4\x34\xf8\x6c\x41" + "\x04\x0b\xda\xc5\x77\xfa\x1c\x2d\xe6\x2c\x3b\xe0", + .m_size = 28, + .c = (const unsigned char[]){ + be64_to_cpua(7d, 25, d8, 25, f5, 81, d2, 1e), + be64_to_cpua(34, 62, 79, cb, 6a, 91, 67, 2e), + be64_to_cpua(ae, ce, 77, 59, 1a, db, 59, d5), + be64_to_cpua(20, 43, fa, c0, 9f, 9d, 7b, e7), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(ce, d5, 2e, 8b, de, 5a, 04, 0e), + be64_to_cpua(bf, 50, 05, 58, 39, 0e, 26, 92), + be64_to_cpua(76, 20, 4a, 77, 22, ec, c8, 66), + be64_to_cpua(5f, f8, 74, f8, 57, d0, 5e, 54), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00) }, + .c_size = ECC_MAX_BYTES * 2, + .public_key_vec = true, + }, { + .key = /* secp256r1(sha256) */ + "\x04\xf1\xea\xc4\x53\xf3\xb9\x0e\x9f\x7e\xad\xe3\xea\xd7\x0e\x0f" + "\xd6\x98\x9a\xca\x92\x4d\x0a\x80\xdb\x2d\x45\xc7\xec\x4b\x97\x00" + "\x2f\xe9\x42\x6c\x29\xdc\x55\x0e\x0b\x53\x12\x9b\x2b\xad\x2c\xe9" + "\x80\xe6\xc5\x43\xc2\x1d\x5e\xbb\x65\x21\x50\xb6\x37\xb0\x03\x8e" + "\xb8", + .key_len = 65, + .m = + "\x8f\x43\x43\x46\x64\x8f\x6b\x96\xdf\x89\xdd\xa9\x01\xc5\x17\x6b" + "\x10\xa6\xd8\x39\x61\xdd\x3c\x1a\xc8\x8b\x59\xb2\xdc\x32\x7a\xa4", + .m_size = 32, + .c = (const unsigned char[]){ + be64_to_cpua(91, dc, 02, 67, dc, 0c, d0, 82), + be64_to_cpua(ac, 44, c3, e8, 24, 11, 2d, a4), + be64_to_cpua(09, dc, 29, 63, a8, 1a, ad, fc), + be64_to_cpua(08, 31, fa, 74, 0d, 1d, 21, 5d), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(4f, 2a, 65, 35, 23, e3, 1d, fa), + be64_to_cpua(0a, 6e, 1b, c4, af, e1, 83, c3), + be64_to_cpua(f9, a9, 81, ac, 4a, 50, d0, 91), + be64_to_cpua(bd, ff, ce, ee, 42, c3, 97, ff), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00) }, + .c_size = ECC_MAX_BYTES * 2, + .public_key_vec = true, + }, { + .key = /* secp256r1(sha384) */ + "\x04\xc5\xc6\xea\x60\xc9\xce\xad\x02\x8d\xf5\x3e\x24\xe3\x52\x1d" + "\x28\x47\x3b\xc3\x6b\xa4\x99\x35\x99\x11\x88\x88\xc8\xf4\xee\x7e" + "\x8c\x33\x8f\x41\x03\x24\x46\x2b\x1a\x82\xf9\x9f\xe1\x97\x1b\x00" + "\xda\x3b\x24\x41\xf7\x66\x33\x58\x3d\x3a\x81\xad\xcf\x16\xe9\xe2" + "\x7c", + .key_len = 65, + .m = + "\x3e\x78\x70\xfb\xcd\x66\xba\x91\xa1\x79\xff\x1e\x1c\x6b\x78\xe6" + "\xc0\x81\x3a\x65\x97\x14\x84\x36\x14\x1a\x9a\xb7\xc5\xab\x84\x94" + "\x5e\xbb\x1b\x34\x71\xcb\x41\xe1\xf6\xfc\x92\x7b\x34\xbb\x86\xbb", + .m_size = 48, + .c = (const unsigned char[]){ + be64_to_cpua(f2, e4, 6c, c7, 94, b1, d5, fe), + be64_to_cpua(08, b2, 6b, 24, 94, 48, 46, 5e), + be64_to_cpua(d0, 2e, 95, 54, d1, 95, 64, 93), + be64_to_cpua(8e, f3, 6f, dc, f8, 69, a6, 2e), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(c0, 60, 11, 92, dc, 17, 89, 12), + be64_to_cpua(69, f4, 3b, 4f, 47, cf, 9b, 16), + be64_to_cpua(19, fb, 5f, 92, f4, c9, 23, 37), + be64_to_cpua(eb, a7, 80, 26, dc, f9, 3a, 44), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00) }, + .c_size = ECC_MAX_BYTES * 2, + .public_key_vec = true, + }, { + .key = /* secp256r1(sha512) */ + "\x04\xd7\x27\x46\x49\xf6\x26\x85\x12\x40\x76\x8e\xe2\xe6\x2a\x7a" + "\x83\xb1\x4e\x7a\xeb\x3b\x5c\x67\x4a\xb5\xa4\x92\x8c\x69\xff\x38" + "\xee\xd9\x4e\x13\x29\x59\xad\xde\x6b\xbb\x45\x31\xee\xfd\xd1\x1b" + "\x64\xd3\xb5\xfc\xaf\x9b\x4b\x88\x3b\x0e\xb7\xd6\xdf\xf1\xd5\x92" + "\xbf", + .key_len = 65, + .m = + "\x57\xb7\x9e\xe9\x05\x0a\x8c\x1b\xc9\x13\xe5\x4a\x24\xc7\xe2\xe9" + "\x43\xc3\xd1\x76\x62\xf4\x98\x1a\x9c\x13\xb0\x20\x1b\xe5\x39\xca" + "\x4f\xd9\x85\x34\x95\xa2\x31\xbc\xbb\xde\xdd\x76\xbb\x61\xe3\xcf" + "\x9d\xc0\x49\x7a\xf3\x7a\xc4\x7d\xa8\x04\x4b\x8d\xb4\x4d\x5b\xd6", + .m_size = 64, + .c = (const unsigned char[]){ + be64_to_cpua(76, f6, 04, 99, 09, 37, 4d, fa), + be64_to_cpua(ed, 8c, 73, 30, 6c, 22, b3, 97), + be64_to_cpua(40, ea, 44, 81, 00, 4e, 29, 08), + be64_to_cpua(b8, 6d, 87, 81, 43, df, fb, 9f), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(76, 31, 79, 4a, e9, 81, 6a, ee), + be64_to_cpua(5c, ad, c3, 78, 1c, c2, c1, 19), + be64_to_cpua(f8, 00, dd, ab, d4, c0, 2b, e6), + be64_to_cpua(1e, b9, 75, 31, f6, 04, a5, 4d), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00) }, + .c_size = ECC_MAX_BYTES * 2, + .public_key_vec = true, + }, +}; + +static const struct sig_testvec ecdsa_nist_p384_tv_template[] = { + { + .key = /* secp384r1(sha1) */ + "\x04\x89\x25\xf3\x97\x88\xcb\xb0\x78\xc5\x72\x9a\x14\x6e\x7a\xb1" + "\x5a\xa5\x24\xf1\x95\x06\x9e\x28\xfb\xc4\xb9\xbe\x5a\x0d\xd9\x9f" + "\xf3\xd1\x4d\x2d\x07\x99\xbd\xda\xa7\x66\xec\xbb\xea\xba\x79\x42" + "\xc9\x34\x89\x6a\xe7\x0b\xc3\xf2\xfe\x32\x30\xbe\xba\xf9\xdf\x7e" + "\x4b\x6a\x07\x8e\x26\x66\x3f\x1d\xec\xa2\x57\x91\x51\xdd\x17\x0e" + "\x0b\x25\xd6\x80\x5c\x3b\xe6\x1a\x98\x48\x91\x45\x7a\x73\xb0\xc3" + "\xf1", + .key_len = 97, + .m = + "\x12\x55\x28\xf0\x77\xd5\xb6\x21\x71\x32\x48\xcd\x28\xa8\x25\x22" + "\x3a\x69\xc1\x93", + .m_size = 20, + .c = (const unsigned char[]){ + be64_to_cpua(ec, 7c, 7e, d0, 87, d7, d7, 6e), + be64_to_cpua(78, f1, 4c, 26, e6, 5b, 86, cf), + be64_to_cpua(3a, c6, f1, 32, 3c, ce, 70, 2b), + be64_to_cpua(8d, 26, 8e, ae, 63, 3f, bc, 20), + be64_to_cpua(57, 55, 07, 20, 43, 30, de, a0), + be64_to_cpua(f5, 0f, 24, 4c, 07, 93, 6f, 21), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(79, 12, 2a, b7, c5, 15, 92, c5), + be64_to_cpua(4a, a1, 59, f1, 1c, a4, 58, 26), + be64_to_cpua(74, a0, 0f, bf, af, c3, 36, 76), + be64_to_cpua(df, 28, 8c, 1b, fa, f9, 95, 88), + be64_to_cpua(5f, 63, b1, be, 5e, 4c, 0e, a1), + be64_to_cpua(cd, bb, 7e, 81, 5d, 8f, 63, c0), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00) }, + .c_size = ECC_MAX_BYTES * 2, + .public_key_vec = true, + }, { + .key = /* secp384r1(sha224) */ + "\x04\x69\x6c\xcf\x62\xee\xd0\x0d\xe5\xb5\x2f\x70\x54\xcf\x26\xa0" + "\xd9\x98\x8d\x92\x2a\xab\x9b\x11\xcb\x48\x18\xa1\xa9\x0d\xd5\x18" + "\x3e\xe8\x29\x6e\xf6\xe4\xb5\x8e\xc7\x4a\xc2\x5f\x37\x13\x99\x05" + "\xb6\xa4\x9d\xf9\xfb\x79\x41\xe7\xd7\x96\x9f\x73\x3b\x39\x43\xdc" + "\xda\xf4\x06\xb9\xa5\x29\x01\x9d\x3b\xe1\xd8\x68\x77\x2a\xf4\x50" + "\x6b\x93\x99\x6c\x66\x4c\x42\x3f\x65\x60\x6c\x1c\x0b\x93\x9b\x9d" + "\xe0", + .key_len = 97, + .m = + "\x12\x80\xb6\xeb\x25\xe2\x3d\xf0\x21\x32\x96\x17\x3a\x38\x39\xfd" + "\x1f\x05\x34\x7b\xb8\xf9\x71\x66\x03\x4f\xd5\xe5", + .m_size = 28, + .c = (const unsigned char[]){ + be64_to_cpua(3f, dd, 15, 1b, 68, 2b, 9d, 8b), + be64_to_cpua(c9, 9c, 11, b8, 10, 01, c5, 41), + be64_to_cpua(c5, da, b4, e3, 93, 07, e0, 99), + be64_to_cpua(97, f1, c8, 72, 26, cf, 5a, 5e), + be64_to_cpua(ec, cb, e4, 89, 47, b2, f7, bc), + be64_to_cpua(8a, 51, 84, ce, 13, 1e, d2, dc), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(88, 2b, 82, 26, 5e, 1c, da, fb), + be64_to_cpua(9f, 19, d0, 42, 8b, 93, c2, 11), + be64_to_cpua(4d, d0, c6, 6e, b0, e9, fc, 14), + be64_to_cpua(df, d8, 68, a2, 64, 42, 65, f3), + be64_to_cpua(4b, 00, 08, 31, 6c, f5, d5, f6), + be64_to_cpua(8b, 03, 2c, fc, 1f, d1, a9, a4), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00) }, + .c_size = ECC_MAX_BYTES * 2, + .public_key_vec = true, + }, { + .key = /* secp384r1(sha256) */ + "\x04\xee\xd6\xda\x3e\x94\x90\x00\x27\xed\xf8\x64\x55\xd6\x51\x9a" + "\x1f\x52\x00\x63\x78\xf1\xa9\xfd\x75\x4c\x9e\xb2\x20\x1a\x91\x5a" + "\xba\x7a\xa3\xe5\x6c\xb6\x25\x68\x4b\xe8\x13\xa6\x54\x87\x2c\x0e" + "\xd0\x83\x95\xbc\xbf\xc5\x28\x4f\x77\x1c\x46\xa6\xf0\xbc\xd4\xa4" + "\x8d\xc2\x8f\xb3\x32\x37\x40\xd6\xca\xf8\xae\x07\x34\x52\x39\x52" + "\x17\xc3\x34\x29\xd6\x40\xea\x5c\xb9\x3f\xfb\x32\x2e\x12\x33\xbc" + "\xab", + .key_len = 97, + .m = + "\xaa\xe7\xfd\x03\x26\xcb\x94\x71\xe4\xce\x0f\xc5\xff\xa6\x29\xa3" + "\xe1\xcc\x4c\x35\x4e\xde\xca\x80\xab\x26\x0c\x25\xe6\x68\x11\xc2", + .m_size = 32, + .c = (const unsigned char[]){ + be64_to_cpua(c8, 8d, 2c, 79, 3a, 8e, 32, c4), + be64_to_cpua(b6, c6, fc, 70, 2e, 66, 3c, 77), + be64_to_cpua(af, 06, 3f, 84, 04, e2, f9, 67), + be64_to_cpua(cc, 47, 53, 87, bc, bd, 83, 3f), + be64_to_cpua(8e, 3f, 7e, ce, 0a, 9b, aa, 59), + be64_to_cpua(08, 09, 12, 9d, 6e, 96, 64, a6), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(10, 0e, f4, 1f, 39, ca, 4d, 43), + be64_to_cpua(4f, 8d, de, 1e, 93, 8d, 95, bb), + be64_to_cpua(15, 68, c0, 75, 3e, 23, 5e, 36), + be64_to_cpua(dd, ce, bc, b2, 97, f4, 9c, f3), + be64_to_cpua(26, a2, b0, 89, 42, 0a, da, d9), + be64_to_cpua(40, 34, b8, 90, a9, 80, ab, 47), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00) }, + .c_size = ECC_MAX_BYTES * 2, + .public_key_vec = true, + }, { + .key = /* secp384r1(sha384) */ + "\x04\x3a\x2f\x62\xe7\x1a\xcf\x24\xd0\x0b\x7c\xe0\xed\x46\x0a\x4f" + "\x74\x16\x43\xe9\x1a\x25\x7c\x55\xff\xf0\x29\x68\x66\x20\x91\xf9" + "\xdb\x2b\xf6\xb3\x6c\x54\x01\xca\xc7\x6a\x5c\x0d\xeb\x68\xd9\x3c" + "\xf1\x01\x74\x1f\xf9\x6c\xe5\x5b\x60\xe9\x7f\x5d\xb3\x12\x80\x2a" + "\xd8\x67\x92\xc9\x0e\x4c\x4c\x6b\xa1\xb2\xa8\x1e\xac\x1c\x97\xd9" + "\x21\x67\xe5\x1b\x5a\x52\x31\x68\xd6\xee\xf0\x19\xb0\x55\xed\x89" + "\x9e", + .key_len = 97, + .m = + "\x8d\xf2\xc0\xe9\xa8\xf3\x8e\x44\xc4\x8c\x1a\xa0\xb8\xd7\x17\xdf" + "\xf2\x37\x1b\xc6\xe3\xf5\x62\xcc\x68\xf5\xd5\x0b\xbf\x73\x2b\xb1" + "\xb0\x4c\x04\x00\x31\xab\xfe\xc8\xd6\x09\xc8\xf2\xea\xd3\x28\xff", + .m_size = 48, + .c = (const unsigned char[]){ + be64_to_cpua(a2, a4, c8, f2, ea, 9d, 11, 1f), + be64_to_cpua(3b, 1f, 07, 8f, 15, 02, fe, 1d), + be64_to_cpua(29, e6, fb, ca, 8c, d6, b6, b4), + be64_to_cpua(2d, 7a, 91, 5f, 49, 2d, 22, 08), + be64_to_cpua(ee, 2e, 62, 35, 46, fa, 00, d8), + be64_to_cpua(9b, 28, 68, c0, a1, ea, 8c, 50), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(ab, 8d, 4e, de, e6, 6d, 9b, 66), + be64_to_cpua(96, 17, 04, c9, 05, 77, f1, 8e), + be64_to_cpua(44, 92, 8c, 86, 99, 65, b3, 97), + be64_to_cpua(71, cd, 8f, 18, 99, f0, 0f, 13), + be64_to_cpua(bf, e3, 75, 24, 49, ac, fb, c8), + be64_to_cpua(fc, 50, f6, 43, bd, 50, 82, 0e), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00) }, + .c_size = ECC_MAX_BYTES * 2, + .public_key_vec = true, + }, { + .key = /* secp384r1(sha512) */ + "\x04\xb4\xe7\xc1\xeb\x64\x25\x22\x46\xc3\x86\x61\x80\xbe\x1e\x46" + "\xcb\xf6\x05\xc2\xee\x73\x83\xbc\xea\x30\x61\x4d\x40\x05\x41\xf4" + "\x8c\xe3\x0e\x5c\xf0\x50\xf2\x07\x19\xe8\x4f\x25\xbe\xee\x0c\x95" + "\x54\x36\x86\xec\xc2\x20\x75\xf3\x89\xb5\x11\xa1\xb7\xf5\xaf\xbe" + "\x81\xe4\xc3\x39\x06\xbd\xe4\xfe\x68\x1c\x6d\x99\x2b\x1b\x63\xfa" + "\xdf\x42\x5c\xc2\x5a\xc7\x0c\xf4\x15\xf7\x1b\xa3\x2e\xd7\x00\xac" + "\xa3", + .key_len = 97, + .m = + "\xe8\xb7\x52\x7d\x1a\x44\x20\x05\x53\x6b\x3a\x68\xf2\xe7\x6c\xa1" + "\xae\x9d\x84\xbb\xba\x52\x43\x3e\x2c\x42\x78\x49\xbf\x78\xb2\x71" + "\xeb\xe1\xe0\xe8\x42\x7b\x11\xad\x2b\x99\x05\x1d\x36\xe6\xac\xfc" + "\x55\x73\xf0\x15\x63\x39\xb8\x6a\x6a\xc5\x91\x5b\xca\x6a\xa8\x0e", + .m_size = 64, + .c = (const unsigned char[]){ + be64_to_cpua(3e, b3, c7, a8, b3, 17, 77, d1), + be64_to_cpua(dc, 2b, 43, 0e, 6a, b3, 53, 6f), + be64_to_cpua(4c, fc, 6f, 80, e3, af, b3, d9), + be64_to_cpua(9a, 02, de, 93, e8, 83, e4, 84), + be64_to_cpua(4d, c6, ef, da, 02, e7, 0f, 52), + be64_to_cpua(00, 1d, 20, 94, 77, fe, 31, fa), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(4e, 45, cf, 3c, 93, ff, 50, 5d), + be64_to_cpua(34, e4, 8b, 80, a5, b6, da, 2c), + be64_to_cpua(c4, 6a, 03, 5f, 8d, 7a, f9, fb), + be64_to_cpua(ec, 63, e3, 0c, ec, 50, dc, cc), + be64_to_cpua(de, 3a, 3d, 16, af, b4, 52, 6a), + be64_to_cpua(63, f6, f0, 3d, 5f, 5f, 99, 3f), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 00) }, + .c_size = ECC_MAX_BYTES * 2, + .public_key_vec = true, + }, +}; + +static const struct sig_testvec ecdsa_nist_p521_tv_template[] = { + { + .key = /* secp521r1(sha224) */ + "\x04\x01\x4f\x43\x18\xb6\xa9\xc9\x5d\x68\xd3\xa9\x42\xf8\x98\xc0" + "\xd2\xd1\xa9\x50\x3b\xe8\xc4\x40\xe6\x11\x78\x88\x4b\xbd\x76\xa7" + "\x9a\xe0\xdd\x31\xa4\x67\x78\x45\x33\x9e\x8c\xd1\xc7\x44\xac\x61" + "\x68\xc8\x04\xe7\x5c\x79\xb1\xf1\x41\x0c\x71\xc0\x53\xa8\xbc\xfb" + "\xf5\xca\xd4\x01\x40\xfd\xa3\x45\xda\x08\xe0\xb4\xcb\x28\x3b\x0a" + "\x02\x35\x5f\x02\x9f\x3f\xcd\xef\x08\x22\x40\x97\x74\x65\xb7\x76" + "\x85\xc7\xc0\x5c\xfb\x81\xe1\xa5\xde\x0c\x4e\x8b\x12\x31\xb6\x47" + "\xed\x37\x0f\x99\x3f\x26\xba\xa3\x8e\xff\x79\x34\x7c\x3a\xfe\x1f" + "\x3b\x83\x82\x2f\x14", + .key_len = 133, + .m = + "\xa2\x3a\x6a\x8c\x7b\x3c\xf2\x51\xf8\xbe\x5f\x4f\x3b\x15\x05\xc4" + "\xb5\xbc\x19\xe7\x21\x85\xe9\x23\x06\x33\x62\xfb", + .m_size = 28, + .c = (const unsigned char[]){ + be64_to_cpua(46, 6b, c7, af, 7a, b9, 19, 0a), + be64_to_cpua(6c, a6, 9b, 89, 8b, 1e, fd, 09), + be64_to_cpua(98, 85, 29, 88, ff, 0b, 94, 94), + be64_to_cpua(18, c6, 37, 8a, cb, a7, d8, 7d), + be64_to_cpua(f8, 3f, 59, 0f, 74, f0, 3f, d8), + be64_to_cpua(e2, ef, 07, 92, ee, 60, 94, 06), + be64_to_cpua(35, f6, dc, 6d, 02, 7b, 22, ac), + be64_to_cpua(d6, 43, e7, ff, 42, b2, ba, 74), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 01), + be64_to_cpua(50, b1, a5, 98, 92, 2a, a5, 52), + be64_to_cpua(1c, ad, 22, da, 82, 00, 35, a3), + be64_to_cpua(0e, 64, cc, c4, e8, 43, d9, 0e), + be64_to_cpua(30, 90, 0f, 1c, 8f, 78, d3, 9f), + be64_to_cpua(26, 0b, 5f, 49, 32, 6b, 91, 99), + be64_to_cpua(0f, f8, 65, 97, 6b, 09, 4d, 22), + be64_to_cpua(5e, f9, 88, f3, d2, 32, 90, 57), + be64_to_cpua(26, 0d, 55, cd, 23, 1e, 7d, a0), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 3a) }, + .c_size = ECC_MAX_BYTES * 2, + .public_key_vec = true, + }, + { + .key = /* secp521r1(sha256) */ + "\x04\x01\x05\x3a\x6b\x3b\x5a\x0f\xa7\xb9\xb7\x32\x53\x4e\xe2\xae" + "\x0a\x52\xc5\xda\xdd\x5a\x79\x1c\x30\x2d\x33\x07\x79\xd5\x70\x14" + "\x61\x0c\xec\x26\x4d\xd8\x35\x57\x04\x1d\x88\x33\x4d\xce\x05\x36" + "\xa5\xaf\x56\x84\xfa\x0b\x9e\xff\x7b\x30\x4b\x92\x1d\x06\xf8\x81" + "\x24\x1e\x51\x00\x09\x21\x51\xf7\x46\x0a\x77\xdb\xb5\x0c\xe7\x9c" + "\xff\x27\x3c\x02\x71\xd7\x85\x36\xf1\xaa\x11\x59\xd8\xb8\xdc\x09" + "\xdc\x6d\x5a\x6f\x63\x07\x6c\xe1\xe5\x4d\x6e\x0f\x6e\xfb\x7c\x05" + "\x8a\xe9\x53\xa8\xcf\xce\x43\x0e\x82\x20\x86\xbc\x88\x9c\xb7\xe3" + "\xe6\x77\x1e\x1f\x8a", + .key_len = 133, + .m = + "\xcc\x97\x73\x0c\x73\xa2\x53\x2b\xfa\xd7\x83\x1d\x0c\x72\x1b\x39" + "\x80\x71\x8d\xdd\xc5\x9b\xff\x55\x32\x98\x25\xa2\x58\x2e\xb7\x73", + .m_size = 32, + .c = (const unsigned char[]){ + be64_to_cpua(de, 7e, d7, 59, 10, e9, d9, d5), + be64_to_cpua(38, 1f, 46, 0b, 04, 64, 34, 79), + be64_to_cpua(ae, ce, 54, 76, 9a, c2, 8f, b8), + be64_to_cpua(95, 35, 6f, 02, 0e, af, e1, 4c), + be64_to_cpua(56, 3c, f6, f0, d8, e1, b7, 5d), + be64_to_cpua(50, 9f, 7d, 1f, ca, 8b, a8, 2d), + be64_to_cpua(06, 0f, fd, 83, fc, 0e, d9, ce), + be64_to_cpua(a5, 5f, 57, 52, 27, 78, 3a, b5), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, cd), + be64_to_cpua(55, 38, b6, f6, 34, 65, c7, bd), + be64_to_cpua(1c, 57, 56, 8f, 12, b7, 1d, 91), + be64_to_cpua(03, 42, 02, 5f, 50, f0, a2, 0d), + be64_to_cpua(fa, 10, dd, 9b, fb, 36, 1a, 31), + be64_to_cpua(e0, 87, 2c, 44, 4b, 5a, ee, af), + be64_to_cpua(a9, 79, 24, b9, 37, 35, dd, a0), + be64_to_cpua(6b, 35, ae, 65, b5, 99, 12, 0a), + be64_to_cpua(50, 85, 38, f9, 15, 83, 18, 04), + be64_to_cpua(00, 00, 00, 00, 00, 00, 01, cf) }, + .c_size = ECC_MAX_BYTES * 2, + .public_key_vec = true, + }, + { + .key = /* secp521r1(sha384) */ + "\x04\x00\x2e\xd6\x21\x04\x75\xc3\xdc\x7d\xff\x0e\xf3\x70\x25\x2b" + "\xad\x72\xfc\x5a\x91\xf1\xd5\x9c\x64\xf3\x1f\x47\x11\x10\x62\x33" + "\xfd\x2e\xe8\x32\xca\x9e\x6f\x0a\x4c\x5b\x35\x9a\x46\xc5\xe7\xd4" + "\x38\xda\xb2\xf0\xf4\x87\xf3\x86\xf4\xea\x70\xad\x1e\xd4\x78\x8c" + "\x36\x18\x17\x00\xa2\xa0\x34\x1b\x2e\x6a\xdf\x06\xd6\x99\x2d\x47" + "\x50\x92\x1a\x8a\x72\x9c\x23\x44\xfa\xa7\xa9\xed\xa6\xef\x26\x14" + "\xb3\x9d\xfe\x5e\xa3\x8c\xd8\x29\xf8\xdf\xad\xa6\xab\xfc\xdd\x46" + "\x22\x6e\xd7\x35\xc7\x23\xb7\x13\xae\xb6\x34\xff\xd7\x80\xe5\x39" + "\xb3\x3b\x5b\x1b\x94", + .key_len = 133, + .m = + "\x36\x98\xd6\x82\xfa\xad\xed\x3c\xb9\x40\xb6\x4d\x9e\xb7\x04\x26" + "\xad\x72\x34\x44\xd2\x81\xb4\x9b\xbe\x01\x04\x7a\xd8\x50\xf8\x59" + "\xba\xad\x23\x85\x6b\x59\xbe\xfb\xf6\x86\xd4\x67\xa8\x43\x28\x76", + .m_size = 48, + .c = (const unsigned char[]){ + be64_to_cpua(b8, 6a, dd, fb, e6, 63, 4e, 28), + be64_to_cpua(84, 59, fd, 1a, c4, 40, dd, 43), + be64_to_cpua(32, 76, 06, d0, f9, c0, e4, e6), + be64_to_cpua(e4, df, 9b, 7d, 9e, 47, ca, 33), + be64_to_cpua(7e, 42, 71, 86, 57, 2d, f1, 7d), + be64_to_cpua(f2, 4b, 64, 98, f7, ec, da, c7), + be64_to_cpua(ec, 51, dc, e8, 35, 5e, ae, 16), + be64_to_cpua(96, 76, 3c, 27, ea, aa, 9c, 26), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, 93), + be64_to_cpua(c6, 4f, ab, 2b, 62, c1, 42, b1), + be64_to_cpua(e5, 5a, 94, 56, cf, 8f, b4, 22), + be64_to_cpua(6a, c3, f3, 7a, d1, fa, e7, a7), + be64_to_cpua(df, c4, c0, db, 54, db, 8a, 0d), + be64_to_cpua(da, a7, cd, 26, 28, 76, 3b, 52), + be64_to_cpua(e4, 3c, bc, 93, 65, 57, 1c, 30), + be64_to_cpua(55, ce, 37, 97, c9, 05, 51, e5), + be64_to_cpua(c3, 6a, 87, 6e, b5, 13, 1f, 20), + be64_to_cpua(00, 00, 00, 00, 00, 00, 00, ff) }, + .c_size = ECC_MAX_BYTES * 2, + .public_key_vec = true, + }, + { + .key = /* secp521r1(sha512) */ + "\x04\x00\xc7\x65\xee\x0b\x86\x7d\x8f\x02\xf1\x74\x5b\xb0\x4c\x3f" + "\xa6\x35\x60\x9f\x55\x23\x11\xcc\xdf\xb8\x42\x99\xee\x6c\x96\x6a" + "\x27\xa2\x56\xb2\x2b\x03\xad\x0f\xe7\x97\xde\x09\x5d\xb4\xc5\x5f" + "\xbd\x87\x37\xbf\x5a\x16\x35\x56\x08\xfd\x6f\x06\x1a\x1c\x84\xee" + "\xc3\x64\xb3\x00\x9e\xbd\x6e\x60\x76\xee\x69\xfd\x3a\xb8\xcd\x7e" + "\x91\x68\x53\x57\x44\x13\x2e\x77\x09\x2a\xbe\x48\xbd\x91\xd8\xf6" + "\x21\x16\x53\x99\xd5\xf0\x40\xad\xa6\xf8\x58\x26\xb6\x9a\xf8\x77" + "\xfe\x3a\x05\x1a\xdb\xa9\x0f\xc0\x6c\x76\x30\x8c\xd8\xde\x44\xae" + "\xd0\x17\xdf\x49\x6a", + .key_len = 133, + .m = + "\x5c\xa6\xbc\x79\xb8\xa0\x1e\x11\x83\xf7\xe9\x05\xdf\xba\xf7\x69" + "\x97\x22\x32\xe4\x94\x7c\x65\xbd\x74\xc6\x9a\x8b\xbd\x0d\xdc\xed" + "\xf5\x9c\xeb\xe1\xc5\x68\x40\xf2\xc7\x04\xde\x9e\x0d\x76\xc5\xa3" + "\xf9\x3c\x6c\x98\x08\x31\xbd\x39\xe8\x42\x7f\x80\x39\x6f\xfe\x68", + .m_size = 64, + .c = (const unsigned char[]){ + be64_to_cpua(28, b5, 04, b0, b6, 33, 1c, 7e), + be64_to_cpua(80, a6, 13, fc, b6, 90, f7, bb), + be64_to_cpua(27, 93, e8, 6c, 49, 7d, 28, fc), + be64_to_cpua(1f, 12, 3e, b7, 7e, 51, ff, 7f), + be64_to_cpua(fb, 62, 1e, 42, 03, 6c, 74, 8a), + be64_to_cpua(63, 0e, 02, cc, 94, a9, 05, b9), + be64_to_cpua(aa, 86, ec, a8, 05, 03, 52, 56), + be64_to_cpua(71, 86, 96, ac, 21, 33, 7e, 4e), + be64_to_cpua(00, 00, 00, 00, 00, 00, 01, 5c), + be64_to_cpua(46, 1e, 77, 44, 78, e0, d1, 04), + be64_to_cpua(72, 74, 13, 63, 39, a6, e5, 25), + be64_to_cpua(00, 55, bb, 6a, b4, 73, 00, d2), + be64_to_cpua(71, d0, e9, ca, a7, c0, cb, aa), + be64_to_cpua(7a, 76, 37, 51, 47, 49, 98, 12), + be64_to_cpua(88, 05, 3e, 43, 39, 01, bd, b7), + be64_to_cpua(95, 35, 89, 4f, 41, 5f, 9e, 19), + be64_to_cpua(43, 52, 1d, e3, c6, bd, 5a, 40), + be64_to_cpua(00, 00, 00, 00, 00, 00, 01, 70) }, + .c_size = ECC_MAX_BYTES * 2, + .public_key_vec = true, + }, +}; + +/* + * ECDSA X9.62 test vectors. + * + * Identical to ECDSA test vectors, except signature in "c" is X9.62 encoded. + */ +static const struct sig_testvec x962_ecdsa_nist_p192_tv_template[] = { { .key = /* secp192r1(sha1) */ "\x04\xf7\x46\xf8\x2f\x15\xf6\x22\x8e\xd7\x57\x4f\xcc\xe7\xbb\xc1" @@ -756,7 +1449,7 @@ static const struct sig_testvec ecdsa_nist_p192_tv_template[] = { }, }; -static const struct sig_testvec ecdsa_nist_p256_tv_template[] = { +static const struct sig_testvec x962_ecdsa_nist_p256_tv_template[] = { { .key = /* secp256r1(sha1) */ "\x04\xb9\x7b\xbb\xd7\x17\x64\xd2\x7e\xfc\x81\x5d\x87\x06\x83\x41" @@ -863,7 +1556,7 @@ static const struct sig_testvec ecdsa_nist_p256_tv_template[] = { }, }; -static const struct sig_testvec ecdsa_nist_p384_tv_template[] = { +static const struct sig_testvec x962_ecdsa_nist_p384_tv_template[] = { { .key = /* secp384r1(sha1) */ "\x04\x89\x25\xf3\x97\x88\xcb\xb0\x78\xc5\x72\x9a\x14\x6e\x7a\xb1" @@ -990,7 +1683,7 @@ static const struct sig_testvec ecdsa_nist_p384_tv_template[] = { }, }; -static const struct sig_testvec ecdsa_nist_p521_tv_template[] = { +static const struct sig_testvec x962_ecdsa_nist_p521_tv_template[] = { { .key = /* secp521r1(sha224) */ "\x04\x01\x4f\x43\x18\xb6\xa9\xc9\x5d\x68\xd3\xa9\x42\xf8\x98\xc0" diff --git a/include/crypto/internal/ecc.h b/include/crypto/internal/ecc.h index 0717a53ae732..68db975e0963 100644 --- a/include/crypto/internal/ecc.h +++ b/include/crypto/internal/ecc.h @@ -42,6 +42,18 @@ #define ECC_POINT_INIT(x, y, ndigits) (struct ecc_point) { x, y, ndigits } +/* + * The integers r and s making up the signature are expected to be + * formatted as two consecutive u64 arrays of size ECC_MAX_BYTES. + * The bytes within each u64 digit are in native endianness, + * but the order of the u64 digits themselves is little endian. + * This format allows direct use by internal vli_*() functions. + */ +struct ecdsa_raw_sig { + u64 r[ECC_MAX_DIGITS]; + u64 s[ECC_MAX_DIGITS]; +}; + /** * ecc_swap_digits() - Copy ndigits from big endian array to native array * @in: Input array @@ -293,4 +305,5 @@ void ecc_point_mult_shamir(const struct ecc_point *result, const u64 *y, const struct ecc_point *q, const struct ecc_curve *curve); +extern struct crypto_template ecdsa_x962_tmpl; #endif -- cgit v1.2.3 From 221f00418e726237dbe38ba627ce08b22d3667f7 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 10 Sep 2024 16:30:26 +0200 Subject: crypto: sig - Rename crypto_sig_maxsize() to crypto_sig_keysize() crypto_sig_maxsize() is a bit of a misnomer as it doesn't return the maximum signature size, but rather the key size. Rename it as well as all implementations of the ->max_size callback. A subsequent commit introduces a crypto_sig_maxsize() function which returns the actual maximum signature size. While at it, change the return type of crypto_sig_keysize() from int to unsigned int for consistency with crypto_akcipher_maxsize(). None of the callers checks for a negative return value and an error condition can always be indicated by returning zero. Signed-off-by: Lukas Wunner Signed-off-by: Herbert Xu --- Documentation/crypto/api-sig.rst | 3 ++- crypto/asymmetric_keys/public_key.c | 4 ++-- crypto/ecdsa-x962.c | 8 ++++---- crypto/ecdsa.c | 10 +++++----- crypto/ecrdsa.c | 4 ++-- crypto/rsassa-pkcs1.c | 4 ++-- crypto/sig.c | 2 +- crypto/testmgr.c | 2 +- include/crypto/sig.h | 14 +++++++------- 9 files changed, 26 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/Documentation/crypto/api-sig.rst b/Documentation/crypto/api-sig.rst index a96dba66296b..10dabc87df02 100644 --- a/Documentation/crypto/api-sig.rst +++ b/Documentation/crypto/api-sig.rst @@ -11,4 +11,5 @@ Asymmetric Signature API :doc: Generic Public Key Signature API .. kernel-doc:: include/crypto/sig.h - :functions: crypto_alloc_sig crypto_free_sig crypto_sig_set_pubkey crypto_sig_set_privkey crypto_sig_maxsize crypto_sig_sign crypto_sig_verify + :functions: crypto_alloc_sig crypto_free_sig crypto_sig_set_pubkey crypto_sig_set_privkey crypto_sig_keysize crypto_sig_sign crypto_sig_verify + diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c index cc6d48cafa2b..8bf5aa329c26 100644 --- a/crypto/asymmetric_keys/public_key.c +++ b/crypto/asymmetric_keys/public_key.c @@ -201,7 +201,7 @@ static int software_key_query(const struct kernel_pkey_params *params, if (ret < 0) goto error_free_tfm; - len = crypto_sig_maxsize(sig); + len = crypto_sig_keysize(sig); info->supported_ops = KEYCTL_SUPPORTS_VERIFY; if (pkey->key_is_private) @@ -332,7 +332,7 @@ static int software_key_eds_op(struct kernel_pkey_params *params, if (ret) goto error_free_tfm; - ksz = crypto_sig_maxsize(sig); + ksz = crypto_sig_keysize(sig); } else { tfm = crypto_alloc_akcipher(alg_name, 0, 0); if (IS_ERR(tfm)) { diff --git a/crypto/ecdsa-x962.c b/crypto/ecdsa-x962.c index 022e654c075d..8a15232dfa77 100644 --- a/crypto/ecdsa-x962.c +++ b/crypto/ecdsa-x962.c @@ -81,7 +81,7 @@ static int ecdsa_x962_verify(struct crypto_sig *tfm, struct ecdsa_x962_signature_ctx sig_ctx; int err; - sig_ctx.ndigits = DIV_ROUND_UP(crypto_sig_maxsize(ctx->child), + sig_ctx.ndigits = DIV_ROUND_UP(crypto_sig_keysize(ctx->child), sizeof(u64)); err = asn1_ber_decoder(&ecdsasignature_decoder, &sig_ctx, src, slen); @@ -92,11 +92,11 @@ static int ecdsa_x962_verify(struct crypto_sig *tfm, digest, dlen); } -static unsigned int ecdsa_x962_max_size(struct crypto_sig *tfm) +static unsigned int ecdsa_x962_key_size(struct crypto_sig *tfm) { struct ecdsa_x962_ctx *ctx = crypto_sig_ctx(tfm); - return crypto_sig_maxsize(ctx->child); + return crypto_sig_keysize(ctx->child); } static int ecdsa_x962_set_pub_key(struct crypto_sig *tfm, @@ -179,7 +179,7 @@ static int ecdsa_x962_create(struct crypto_template *tmpl, struct rtattr **tb) inst->alg.exit = ecdsa_x962_exit_tfm; inst->alg.verify = ecdsa_x962_verify; - inst->alg.max_size = ecdsa_x962_max_size; + inst->alg.key_size = ecdsa_x962_key_size; inst->alg.set_pub_key = ecdsa_x962_set_pub_key; inst->free = ecdsa_x962_free; diff --git a/crypto/ecdsa.c b/crypto/ecdsa.c index 1f7c29468a86..6cb0a6ce9de1 100644 --- a/crypto/ecdsa.c +++ b/crypto/ecdsa.c @@ -162,7 +162,7 @@ static void ecdsa_exit_tfm(struct crypto_sig *tfm) ecdsa_ecc_ctx_deinit(ctx); } -static unsigned int ecdsa_max_size(struct crypto_sig *tfm) +static unsigned int ecdsa_key_size(struct crypto_sig *tfm) { struct ecc_ctx *ctx = crypto_sig_ctx(tfm); @@ -179,7 +179,7 @@ static int ecdsa_nist_p521_init_tfm(struct crypto_sig *tfm) static struct sig_alg ecdsa_nist_p521 = { .verify = ecdsa_verify, .set_pub_key = ecdsa_set_pub_key, - .max_size = ecdsa_max_size, + .key_size = ecdsa_key_size, .init = ecdsa_nist_p521_init_tfm, .exit = ecdsa_exit_tfm, .base = { @@ -201,7 +201,7 @@ static int ecdsa_nist_p384_init_tfm(struct crypto_sig *tfm) static struct sig_alg ecdsa_nist_p384 = { .verify = ecdsa_verify, .set_pub_key = ecdsa_set_pub_key, - .max_size = ecdsa_max_size, + .key_size = ecdsa_key_size, .init = ecdsa_nist_p384_init_tfm, .exit = ecdsa_exit_tfm, .base = { @@ -223,7 +223,7 @@ static int ecdsa_nist_p256_init_tfm(struct crypto_sig *tfm) static struct sig_alg ecdsa_nist_p256 = { .verify = ecdsa_verify, .set_pub_key = ecdsa_set_pub_key, - .max_size = ecdsa_max_size, + .key_size = ecdsa_key_size, .init = ecdsa_nist_p256_init_tfm, .exit = ecdsa_exit_tfm, .base = { @@ -245,7 +245,7 @@ static int ecdsa_nist_p192_init_tfm(struct crypto_sig *tfm) static struct sig_alg ecdsa_nist_p192 = { .verify = ecdsa_verify, .set_pub_key = ecdsa_set_pub_key, - .max_size = ecdsa_max_size, + .key_size = ecdsa_key_size, .init = ecdsa_nist_p192_init_tfm, .exit = ecdsa_exit_tfm, .base = { diff --git a/crypto/ecrdsa.c b/crypto/ecrdsa.c index 7383dd11089b..f981b31f4249 100644 --- a/crypto/ecrdsa.c +++ b/crypto/ecrdsa.c @@ -241,7 +241,7 @@ static int ecrdsa_set_pub_key(struct crypto_sig *tfm, const void *key, return 0; } -static unsigned int ecrdsa_max_size(struct crypto_sig *tfm) +static unsigned int ecrdsa_key_size(struct crypto_sig *tfm) { struct ecrdsa_ctx *ctx = crypto_sig_ctx(tfm); @@ -259,7 +259,7 @@ static void ecrdsa_exit_tfm(struct crypto_sig *tfm) static struct sig_alg ecrdsa_alg = { .verify = ecrdsa_verify, .set_pub_key = ecrdsa_set_pub_key, - .max_size = ecrdsa_max_size, + .key_size = ecrdsa_key_size, .exit = ecrdsa_exit_tfm, .base = { .cra_name = "ecrdsa", diff --git a/crypto/rsassa-pkcs1.c b/crypto/rsassa-pkcs1.c index b291ec0944a2..9c28f1c62826 100644 --- a/crypto/rsassa-pkcs1.c +++ b/crypto/rsassa-pkcs1.c @@ -302,7 +302,7 @@ static int rsassa_pkcs1_verify(struct crypto_sig *tfm, return 0; } -static unsigned int rsassa_pkcs1_max_size(struct crypto_sig *tfm) +static unsigned int rsassa_pkcs1_key_size(struct crypto_sig *tfm) { struct rsassa_pkcs1_ctx *ctx = crypto_sig_ctx(tfm); @@ -419,7 +419,7 @@ static int rsassa_pkcs1_create(struct crypto_template *tmpl, struct rtattr **tb) inst->alg.sign = rsassa_pkcs1_sign; inst->alg.verify = rsassa_pkcs1_verify; - inst->alg.max_size = rsassa_pkcs1_max_size; + inst->alg.key_size = rsassa_pkcs1_key_size; inst->alg.set_pub_key = rsassa_pkcs1_set_pub_key; inst->alg.set_priv_key = rsassa_pkcs1_set_priv_key; diff --git a/crypto/sig.c b/crypto/sig.c index 84d0ea9fd73b..7a3521bee29a 100644 --- a/crypto/sig.c +++ b/crypto/sig.c @@ -125,7 +125,7 @@ int crypto_register_sig(struct sig_alg *alg) alg->set_priv_key = sig_default_set_key; if (!alg->set_pub_key) return -EINVAL; - if (!alg->max_size) + if (!alg->key_size) return -EINVAL; sig_prepare_alg(alg); diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 5760615e27b1..9d6774c5a463 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -4340,7 +4340,7 @@ static int test_sig_one(struct crypto_sig *tfm, const struct sig_testvec *vecs) if (vecs->public_key_vec) return 0; - sig_size = crypto_sig_maxsize(tfm); + sig_size = crypto_sig_keysize(tfm); if (sig_size < vecs->c_size) { pr_err("alg: sig: invalid maxsize %u\n", sig_size); return -EINVAL; diff --git a/include/crypto/sig.h b/include/crypto/sig.h index bbc902642bf5..a3ef17c5f72f 100644 --- a/include/crypto/sig.h +++ b/include/crypto/sig.h @@ -32,7 +32,7 @@ struct crypto_sig { * @set_priv_key: Function invokes the algorithm specific set private key * function, which knows how to decode and interpret * the BER encoded private key and parameters. Optional. - * @max_size: Function returns key size. Mandatory. + * @key_size: Function returns key size. Mandatory. * @init: Initialize the cryptographic transformation object. * This function is used to initialize the cryptographic * transformation object. This function is called only once at @@ -58,7 +58,7 @@ struct sig_alg { const void *key, unsigned int keylen); int (*set_priv_key)(struct crypto_sig *tfm, const void *key, unsigned int keylen); - unsigned int (*max_size)(struct crypto_sig *tfm); + unsigned int (*key_size)(struct crypto_sig *tfm); int (*init)(struct crypto_sig *tfm); void (*exit)(struct crypto_sig *tfm); @@ -121,20 +121,20 @@ static inline void crypto_free_sig(struct crypto_sig *tfm) } /** - * crypto_sig_maxsize() - Get len for output buffer + * crypto_sig_keysize() - Get key size * - * Function returns the dest buffer size required for a given key. + * Function returns the key size in bytes. * Function assumes that the key is already set in the transformation. If this - * function is called without a setkey or with a failed setkey, you will end up + * function is called without a setkey or with a failed setkey, you may end up * in a NULL dereference. * * @tfm: signature tfm handle allocated with crypto_alloc_sig() */ -static inline int crypto_sig_maxsize(struct crypto_sig *tfm) +static inline unsigned int crypto_sig_keysize(struct crypto_sig *tfm) { struct sig_alg *alg = crypto_sig_alg(tfm); - return alg->max_size(tfm); + return alg->key_size(tfm); } /** -- cgit v1.2.3 From a2471684dae23a676b4badea306140d24e6507f5 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 10 Sep 2024 16:30:27 +0200 Subject: crypto: ecdsa - Move X9.62 signature size calculation into template software_key_query() returns the maximum signature and digest size for a given key to user space. When it only supported RSA keys, calculating those sizes was trivial as they were always equivalent to the key size. However when ECDSA was added, the function grew somewhat complicated calculations which take the ASN.1 encoding and curve into account. This doesn't scale well and adjusting the calculations is easily forgotten when adding support for new encodings or curves. In fact, when NIST P521 support was recently added, the function was initially not amended: https://lore.kernel.org/all/b749d5ee-c3b8-4cbd-b252-7773e4536e07@linux.ibm.com/ Introduce a ->max_size() callback to struct sig_alg and take advantage of it to move the signature size calculations to ecdsa-x962.c. Introduce a ->digest_size() callback to struct sig_alg and move the maximum ECDSA digest size to ecdsa.c. It is common across ecdsa-x962.c and the upcoming ecdsa-p1363.c and thus inherited by both of them. For all other algorithms, continue using the key size as maximum signature and digest size. Signed-off-by: Lukas Wunner Signed-off-by: Herbert Xu --- Documentation/crypto/api-sig.rst | 2 +- crypto/asymmetric_keys/public_key.c | 38 ++++--------------------------------- crypto/ecdsa-x962.c | 36 +++++++++++++++++++++++++++++++++++ crypto/ecdsa.c | 16 ++++++++++++++++ crypto/sig.c | 4 ++++ include/crypto/sig.h | 38 +++++++++++++++++++++++++++++++++++++ 6 files changed, 99 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/Documentation/crypto/api-sig.rst b/Documentation/crypto/api-sig.rst index 10dabc87df02..aaec18e26d54 100644 --- a/Documentation/crypto/api-sig.rst +++ b/Documentation/crypto/api-sig.rst @@ -11,5 +11,5 @@ Asymmetric Signature API :doc: Generic Public Key Signature API .. kernel-doc:: include/crypto/sig.h - :functions: crypto_alloc_sig crypto_free_sig crypto_sig_set_pubkey crypto_sig_set_privkey crypto_sig_keysize crypto_sig_sign crypto_sig_verify + :functions: crypto_alloc_sig crypto_free_sig crypto_sig_set_pubkey crypto_sig_set_privkey crypto_sig_keysize crypto_sig_maxsize crypto_sig_digestsize crypto_sig_sign crypto_sig_verify diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c index 8bf5aa329c26..ec2c0e009b49 100644 --- a/crypto/asymmetric_keys/public_key.c +++ b/crypto/asymmetric_keys/public_key.c @@ -202,6 +202,8 @@ static int software_key_query(const struct kernel_pkey_params *params, goto error_free_tfm; len = crypto_sig_keysize(sig); + info->max_sig_size = crypto_sig_maxsize(sig); + info->max_data_size = crypto_sig_digestsize(sig); info->supported_ops = KEYCTL_SUPPORTS_VERIFY; if (pkey->key_is_private) @@ -227,6 +229,8 @@ static int software_key_query(const struct kernel_pkey_params *params, goto error_free_tfm; len = crypto_akcipher_maxsize(tfm); + info->max_sig_size = len; + info->max_data_size = len; info->supported_ops = KEYCTL_SUPPORTS_ENCRYPT; if (pkey->key_is_private) @@ -234,40 +238,6 @@ static int software_key_query(const struct kernel_pkey_params *params, } info->key_size = len * 8; - - if (strncmp(pkey->pkey_algo, "ecdsa", 5) == 0) { - int slen = len; - /* - * ECDSA key sizes are much smaller than RSA, and thus could - * operate on (hashed) inputs that are larger than key size. - * For example SHA384-hashed input used with secp256r1 - * based keys. Set max_data_size to be at least as large as - * the largest supported hash size (SHA512) - */ - info->max_data_size = 64; - - /* - * Verify takes ECDSA-Sig (described in RFC 5480) as input, - * which is actually 2 'key_size'-bit integers encoded in - * ASN.1. Account for the ASN.1 encoding overhead here. - * - * NIST P192/256/384 may prepend a '0' to a coordinate to - * indicate a positive integer. NIST P521 never needs it. - */ - if (strcmp(pkey->pkey_algo, "ecdsa-nist-p521") != 0) - slen += 1; - /* Length of encoding the x & y coordinates */ - slen = 2 * (slen + 2); - /* - * If coordinate encoding takes at least 128 bytes then an - * additional byte for length encoding is needed. - */ - info->max_sig_size = 1 + (slen >= 128) + 1 + slen; - } else { - info->max_data_size = len; - info->max_sig_size = len; - } - info->max_enc_size = len; info->max_dec_size = len; diff --git a/crypto/ecdsa-x962.c b/crypto/ecdsa-x962.c index 8a15232dfa77..6a77c13e192b 100644 --- a/crypto/ecdsa-x962.c +++ b/crypto/ecdsa-x962.c @@ -99,6 +99,40 @@ static unsigned int ecdsa_x962_key_size(struct crypto_sig *tfm) return crypto_sig_keysize(ctx->child); } +static unsigned int ecdsa_x962_max_size(struct crypto_sig *tfm) +{ + struct ecdsa_x962_ctx *ctx = crypto_sig_ctx(tfm); + struct sig_alg *alg = crypto_sig_alg(ctx->child); + int slen = crypto_sig_keysize(ctx->child); + + /* + * Verify takes ECDSA-Sig-Value (described in RFC 5480) as input, + * which is actually 2 'key_size'-bit integers encoded in ASN.1. + * Account for the ASN.1 encoding overhead here. + * + * NIST P192/256/384 may prepend a '0' to a coordinate to indicate + * a positive integer. NIST P521 never needs it. + */ + if (strcmp(alg->base.cra_name, "ecdsa-nist-p521") != 0) + slen += 1; + + /* Length of encoding the x & y coordinates */ + slen = 2 * (slen + 2); + + /* + * If coordinate encoding takes at least 128 bytes then an + * additional byte for length encoding is needed. + */ + return 1 + (slen >= 128) + 1 + slen; +} + +static unsigned int ecdsa_x962_digest_size(struct crypto_sig *tfm) +{ + struct ecdsa_x962_ctx *ctx = crypto_sig_ctx(tfm); + + return crypto_sig_digestsize(ctx->child); +} + static int ecdsa_x962_set_pub_key(struct crypto_sig *tfm, const void *key, unsigned int keylen) { @@ -180,6 +214,8 @@ static int ecdsa_x962_create(struct crypto_template *tmpl, struct rtattr **tb) inst->alg.verify = ecdsa_x962_verify; inst->alg.key_size = ecdsa_x962_key_size; + inst->alg.max_size = ecdsa_x962_max_size; + inst->alg.digest_size = ecdsa_x962_digest_size; inst->alg.set_pub_key = ecdsa_x962_set_pub_key; inst->free = ecdsa_x962_free; diff --git a/crypto/ecdsa.c b/crypto/ecdsa.c index 6cb0a6ce9de1..cf8e0c5d1dd8 100644 --- a/crypto/ecdsa.c +++ b/crypto/ecdsa.c @@ -7,6 +7,7 @@ #include #include #include +#include #include struct ecc_ctx { @@ -169,6 +170,17 @@ static unsigned int ecdsa_key_size(struct crypto_sig *tfm) return DIV_ROUND_UP(ctx->curve->nbits, 8); } +static unsigned int ecdsa_digest_size(struct crypto_sig *tfm) +{ + /* + * ECDSA key sizes are much smaller than RSA, and thus could + * operate on (hashed) inputs that are larger than the key size. + * E.g. SHA384-hashed input used with secp256r1 based keys. + * Return the largest supported hash size (SHA512). + */ + return SHA512_DIGEST_SIZE; +} + static int ecdsa_nist_p521_init_tfm(struct crypto_sig *tfm) { struct ecc_ctx *ctx = crypto_sig_ctx(tfm); @@ -180,6 +192,7 @@ static struct sig_alg ecdsa_nist_p521 = { .verify = ecdsa_verify, .set_pub_key = ecdsa_set_pub_key, .key_size = ecdsa_key_size, + .digest_size = ecdsa_digest_size, .init = ecdsa_nist_p521_init_tfm, .exit = ecdsa_exit_tfm, .base = { @@ -202,6 +215,7 @@ static struct sig_alg ecdsa_nist_p384 = { .verify = ecdsa_verify, .set_pub_key = ecdsa_set_pub_key, .key_size = ecdsa_key_size, + .digest_size = ecdsa_digest_size, .init = ecdsa_nist_p384_init_tfm, .exit = ecdsa_exit_tfm, .base = { @@ -224,6 +238,7 @@ static struct sig_alg ecdsa_nist_p256 = { .verify = ecdsa_verify, .set_pub_key = ecdsa_set_pub_key, .key_size = ecdsa_key_size, + .digest_size = ecdsa_digest_size, .init = ecdsa_nist_p256_init_tfm, .exit = ecdsa_exit_tfm, .base = { @@ -246,6 +261,7 @@ static struct sig_alg ecdsa_nist_p192 = { .verify = ecdsa_verify, .set_pub_key = ecdsa_set_pub_key, .key_size = ecdsa_key_size, + .digest_size = ecdsa_digest_size, .init = ecdsa_nist_p192_init_tfm, .exit = ecdsa_exit_tfm, .base = { diff --git a/crypto/sig.c b/crypto/sig.c index 7a3521bee29a..be5ac0e59384 100644 --- a/crypto/sig.c +++ b/crypto/sig.c @@ -127,6 +127,10 @@ int crypto_register_sig(struct sig_alg *alg) return -EINVAL; if (!alg->key_size) return -EINVAL; + if (!alg->max_size) + alg->max_size = alg->key_size; + if (!alg->digest_size) + alg->digest_size = alg->key_size; sig_prepare_alg(alg); return crypto_register_alg(base); diff --git a/include/crypto/sig.h b/include/crypto/sig.h index a3ef17c5f72f..cff41ad93824 100644 --- a/include/crypto/sig.h +++ b/include/crypto/sig.h @@ -33,6 +33,8 @@ struct crypto_sig { * function, which knows how to decode and interpret * the BER encoded private key and parameters. Optional. * @key_size: Function returns key size. Mandatory. + * @digest_size: Function returns maximum digest size. Optional. + * @max_size: Function returns maximum signature size. Optional. * @init: Initialize the cryptographic transformation object. * This function is used to initialize the cryptographic * transformation object. This function is called only once at @@ -59,6 +61,8 @@ struct sig_alg { int (*set_priv_key)(struct crypto_sig *tfm, const void *key, unsigned int keylen); unsigned int (*key_size)(struct crypto_sig *tfm); + unsigned int (*digest_size)(struct crypto_sig *tfm); + unsigned int (*max_size)(struct crypto_sig *tfm); int (*init)(struct crypto_sig *tfm); void (*exit)(struct crypto_sig *tfm); @@ -137,6 +141,40 @@ static inline unsigned int crypto_sig_keysize(struct crypto_sig *tfm) return alg->key_size(tfm); } +/** + * crypto_sig_digestsize() - Get maximum digest size + * + * Function returns the maximum digest size in bytes. + * Function assumes that the key is already set in the transformation. If this + * function is called without a setkey or with a failed setkey, you may end up + * in a NULL dereference. + * + * @tfm: signature tfm handle allocated with crypto_alloc_sig() + */ +static inline unsigned int crypto_sig_digestsize(struct crypto_sig *tfm) +{ + struct sig_alg *alg = crypto_sig_alg(tfm); + + return alg->digest_size(tfm); +} + +/** + * crypto_sig_maxsize() - Get maximum signature size + * + * Function returns the maximum signature size in bytes. + * Function assumes that the key is already set in the transformation. If this + * function is called without a setkey or with a failed setkey, you may end up + * in a NULL dereference. + * + * @tfm: signature tfm handle allocated with crypto_alloc_sig() + */ +static inline unsigned int crypto_sig_maxsize(struct crypto_sig *tfm) +{ + struct sig_alg *alg = crypto_sig_alg(tfm); + + return alg->max_size(tfm); +} + /** * crypto_sig_sign() - Invoke signing operation * -- cgit v1.2.3 From b04163863caf599d4348a05af5a71cf5d42f11dc Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 10 Sep 2024 16:30:28 +0200 Subject: crypto: ecdsa - Support P1363 signature decoding Alternatively to the X9.62 encoding of ecdsa signatures, which uses ASN.1 and is already supported by the kernel, there's another common encoding called P1363. It stores r and s as the concatenation of two big endian, unsigned integers. The name originates from IEEE P1363. Add a P1363 template in support of the forthcoming SPDM library (Security Protocol and Data Model) for PCI device authentication. P1363 is prescribed by SPDM 1.2.1 margin no 44: "For ECDSA signatures, excluding SM2, in SPDM, the signature shall be the concatenation of r and s. The size of r shall be the size of the selected curve. Likewise, the size of s shall be the size of the selected curve. See BaseAsymAlgo in NEGOTIATE_ALGORITHMS for the size of r and s. The byte order for r and s shall be in big endian order. When placing ECDSA signatures into an SPDM signature field, r shall come first followed by s." Link: https://www.dmtf.org/sites/default/files/standards/documents/DSP0274_1.2.1.pdf Signed-off-by: Lukas Wunner Reviewed-by: Jonathan Cameron Reviewed-by: Stefan Berger Signed-off-by: Herbert Xu --- crypto/Makefile | 1 + crypto/asymmetric_keys/public_key.c | 3 +- crypto/ecdsa-p1363.c | 159 ++++++++++++++++++++++++++++++++++++ crypto/ecdsa.c | 8 ++ crypto/testmgr.c | 18 ++++ crypto/testmgr.h | 28 +++++++ include/crypto/internal/ecc.h | 1 + 7 files changed, 217 insertions(+), 1 deletion(-) create mode 100644 crypto/ecdsa-p1363.c (limited to 'include') diff --git a/crypto/Makefile b/crypto/Makefile index af43a1bd1cfa..81be78d39c2d 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -55,6 +55,7 @@ $(obj)/ecdsasignature.asn1.o: $(obj)/ecdsasignature.asn1.c $(obj)/ecdsasignature $(obj)/ecdsa-x962.o: $(obj)/ecdsasignature.asn1.h ecdsa_generic-y += ecdsa.o ecdsa_generic-y += ecdsa-x962.o +ecdsa_generic-y += ecdsa-p1363.o ecdsa_generic-y += ecdsasignature.asn1.o obj-$(CONFIG_CRYPTO_ECDSA) += ecdsa_generic.o diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c index ec2c0e009b49..c98c1588802b 100644 --- a/crypto/asymmetric_keys/public_key.c +++ b/crypto/asymmetric_keys/public_key.c @@ -110,7 +110,8 @@ software_key_determine_akcipher(const struct public_key *pkey, return -EINVAL; *sig = false; } else if (strncmp(pkey->pkey_algo, "ecdsa", 5) == 0) { - if (strcmp(encoding, "x962") != 0) + if (strcmp(encoding, "x962") != 0 && + strcmp(encoding, "p1363") != 0) return -EINVAL; /* * ECDSA signatures are taken over a raw hash, so they don't diff --git a/crypto/ecdsa-p1363.c b/crypto/ecdsa-p1363.c new file mode 100644 index 000000000000..eaae7214d69b --- /dev/null +++ b/crypto/ecdsa-p1363.c @@ -0,0 +1,159 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ECDSA P1363 signature encoding + * + * Copyright (c) 2024 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include + +struct ecdsa_p1363_ctx { + struct crypto_sig *child; +}; + +static int ecdsa_p1363_verify(struct crypto_sig *tfm, + const void *src, unsigned int slen, + const void *digest, unsigned int dlen) +{ + struct ecdsa_p1363_ctx *ctx = crypto_sig_ctx(tfm); + unsigned int keylen = crypto_sig_keysize(ctx->child); + unsigned int ndigits = DIV_ROUND_UP(keylen, sizeof(u64)); + struct ecdsa_raw_sig sig; + + if (slen != 2 * keylen) + return -EINVAL; + + ecc_digits_from_bytes(src, keylen, sig.r, ndigits); + ecc_digits_from_bytes(src + keylen, keylen, sig.s, ndigits); + + return crypto_sig_verify(ctx->child, &sig, sizeof(sig), digest, dlen); +} + +static unsigned int ecdsa_p1363_key_size(struct crypto_sig *tfm) +{ + struct ecdsa_p1363_ctx *ctx = crypto_sig_ctx(tfm); + + return crypto_sig_keysize(ctx->child); +} + +static unsigned int ecdsa_p1363_max_size(struct crypto_sig *tfm) +{ + struct ecdsa_p1363_ctx *ctx = crypto_sig_ctx(tfm); + + return 2 * crypto_sig_keysize(ctx->child); +} + +static unsigned int ecdsa_p1363_digest_size(struct crypto_sig *tfm) +{ + struct ecdsa_p1363_ctx *ctx = crypto_sig_ctx(tfm); + + return crypto_sig_digestsize(ctx->child); +} + +static int ecdsa_p1363_set_pub_key(struct crypto_sig *tfm, + const void *key, unsigned int keylen) +{ + struct ecdsa_p1363_ctx *ctx = crypto_sig_ctx(tfm); + + return crypto_sig_set_pubkey(ctx->child, key, keylen); +} + +static int ecdsa_p1363_init_tfm(struct crypto_sig *tfm) +{ + struct sig_instance *inst = sig_alg_instance(tfm); + struct crypto_sig_spawn *spawn = sig_instance_ctx(inst); + struct ecdsa_p1363_ctx *ctx = crypto_sig_ctx(tfm); + struct crypto_sig *child_tfm; + + child_tfm = crypto_spawn_sig(spawn); + if (IS_ERR(child_tfm)) + return PTR_ERR(child_tfm); + + ctx->child = child_tfm; + + return 0; +} + +static void ecdsa_p1363_exit_tfm(struct crypto_sig *tfm) +{ + struct ecdsa_p1363_ctx *ctx = crypto_sig_ctx(tfm); + + crypto_free_sig(ctx->child); +} + +static void ecdsa_p1363_free(struct sig_instance *inst) +{ + struct crypto_sig_spawn *spawn = sig_instance_ctx(inst); + + crypto_drop_sig(spawn); + kfree(inst); +} + +static int ecdsa_p1363_create(struct crypto_template *tmpl, struct rtattr **tb) +{ + struct crypto_sig_spawn *spawn; + struct sig_instance *inst; + struct sig_alg *ecdsa_alg; + u32 mask; + int err; + + err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SIG, &mask); + if (err) + return err; + + inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); + if (!inst) + return -ENOMEM; + + spawn = sig_instance_ctx(inst); + + err = crypto_grab_sig(spawn, sig_crypto_instance(inst), + crypto_attr_alg_name(tb[1]), 0, mask); + if (err) + goto err_free_inst; + + ecdsa_alg = crypto_spawn_sig_alg(spawn); + + err = -EINVAL; + if (strncmp(ecdsa_alg->base.cra_name, "ecdsa", 5) != 0) + goto err_free_inst; + + err = crypto_inst_setname(sig_crypto_instance(inst), tmpl->name, + &ecdsa_alg->base); + if (err) + goto err_free_inst; + + inst->alg.base.cra_priority = ecdsa_alg->base.cra_priority; + inst->alg.base.cra_ctxsize = sizeof(struct ecdsa_p1363_ctx); + + inst->alg.init = ecdsa_p1363_init_tfm; + inst->alg.exit = ecdsa_p1363_exit_tfm; + + inst->alg.verify = ecdsa_p1363_verify; + inst->alg.key_size = ecdsa_p1363_key_size; + inst->alg.max_size = ecdsa_p1363_max_size; + inst->alg.digest_size = ecdsa_p1363_digest_size; + inst->alg.set_pub_key = ecdsa_p1363_set_pub_key; + + inst->free = ecdsa_p1363_free; + + err = sig_register_instance(tmpl, inst); + if (err) { +err_free_inst: + ecdsa_p1363_free(inst); + } + return err; +} + +struct crypto_template ecdsa_p1363_tmpl = { + .name = "p1363", + .create = ecdsa_p1363_create, + .module = THIS_MODULE, +}; + +MODULE_ALIAS_CRYPTO("p1363"); diff --git a/crypto/ecdsa.c b/crypto/ecdsa.c index cf8e0c5d1dd8..117526d15dde 100644 --- a/crypto/ecdsa.c +++ b/crypto/ecdsa.c @@ -298,8 +298,15 @@ static int __init ecdsa_init(void) if (ret) goto x962_tmpl_error; + ret = crypto_register_template(&ecdsa_p1363_tmpl); + if (ret) + goto p1363_tmpl_error; + return 0; +p1363_tmpl_error: + crypto_unregister_template(&ecdsa_x962_tmpl); + x962_tmpl_error: crypto_unregister_sig(&ecdsa_nist_p521); @@ -318,6 +325,7 @@ nist_p256_error: static void __exit ecdsa_exit(void) { crypto_unregister_template(&ecdsa_x962_tmpl); + crypto_unregister_template(&ecdsa_p1363_tmpl); if (ecdsa_nist_p192_registered) crypto_unregister_sig(&ecdsa_nist_p192); diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 9d6774c5a463..7d768f0ed81f 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -5515,6 +5515,24 @@ static const struct alg_test_desc alg_test_descs[] = { .suite = { .hash = __VECS(nhpoly1305_tv_template) } + }, { + .alg = "p1363(ecdsa-nist-p192)", + .test = alg_test_null, + }, { + .alg = "p1363(ecdsa-nist-p256)", + .test = alg_test_sig, + .fips_allowed = 1, + .suite = { + .sig = __VECS(p1363_ecdsa_nist_p256_tv_template) + } + }, { + .alg = "p1363(ecdsa-nist-p384)", + .test = alg_test_null, + .fips_allowed = 1, + }, { + .alg = "p1363(ecdsa-nist-p521)", + .test = alg_test_null, + .fips_allowed = 1, }, { .alg = "pcbc(fcrypt)", .test = alg_test_skcipher, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 2bd77eaafdf6..55aae1859d2c 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -1805,6 +1805,34 @@ static const struct sig_testvec x962_ecdsa_nist_p521_tv_template[] = { }, }; +/* + * ECDSA P1363 test vectors. + * + * Identical to ECDSA test vectors, except signature in "c" is P1363 encoded. + */ +static const struct sig_testvec p1363_ecdsa_nist_p256_tv_template[] = { + { + .key = /* secp256r1(sha256) */ + "\x04\xf1\xea\xc4\x53\xf3\xb9\x0e\x9f\x7e\xad\xe3\xea\xd7\x0e\x0f" + "\xd6\x98\x9a\xca\x92\x4d\x0a\x80\xdb\x2d\x45\xc7\xec\x4b\x97\x00" + "\x2f\xe9\x42\x6c\x29\xdc\x55\x0e\x0b\x53\x12\x9b\x2b\xad\x2c\xe9" + "\x80\xe6\xc5\x43\xc2\x1d\x5e\xbb\x65\x21\x50\xb6\x37\xb0\x03\x8e" + "\xb8", + .key_len = 65, + .m = + "\x8f\x43\x43\x46\x64\x8f\x6b\x96\xdf\x89\xdd\xa9\x01\xc5\x17\x6b" + "\x10\xa6\xd8\x39\x61\xdd\x3c\x1a\xc8\x8b\x59\xb2\xdc\x32\x7a\xa4", + .m_size = 32, + .c = + "\x08\x31\xfa\x74\x0d\x1d\x21\x5d\x09\xdc\x29\x63\xa8\x1a\xad\xfc" + "\xac\x44\xc3\xe8\x24\x11\x2d\xa4\x91\xdc\x02\x67\xdc\x0c\xd0\x82" + "\xbd\xff\xce\xee\x42\xc3\x97\xff\xf9\xa9\x81\xac\x4a\x50\xd0\x91" + "\x0a\x6e\x1b\xc4\xaf\xe1\x83\xc3\x4f\x2a\x65\x35\x23\xe3\x1d\xfa", + .c_size = 64, + .public_key_vec = true, + }, +}; + /* * EC-RDSA test vectors are generated by gost-engine. */ diff --git a/include/crypto/internal/ecc.h b/include/crypto/internal/ecc.h index 68db975e0963..71483e5305e1 100644 --- a/include/crypto/internal/ecc.h +++ b/include/crypto/internal/ecc.h @@ -306,4 +306,5 @@ void ecc_point_mult_shamir(const struct ecc_point *result, const struct ecc_curve *curve); extern struct crypto_template ecdsa_x962_tmpl; +extern struct crypto_template ecdsa_p1363_tmpl; #endif -- cgit v1.2.3 From 452c55dcefa958f7e87798d764497485687e4bc3 Mon Sep 17 00:00:00 2001 From: Chenghai Huang Date: Sun, 29 Sep 2024 19:26:57 +0800 Subject: crypto: hisilicon/qm - fix the coding specifications issue Ensure that the inline function contains no more than 10 lines. move q_num_set() from hisi_acc_qm.h to qm.c. Signed-off-by: Chenghai Huang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_main.c | 2 +- drivers/crypto/hisilicon/qm.c | 31 +++++++++++++++++++++++++++++ drivers/crypto/hisilicon/sec2/sec_main.c | 2 +- drivers/crypto/hisilicon/zip/zip_main.c | 2 +- include/linux/hisi_acc_qm.h | 33 ++----------------------------- 5 files changed, 36 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index 23e8fb9414af..f129878559c8 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -415,7 +415,7 @@ static int pf_q_num_set(const char *val, const struct kernel_param *kp) { pf_q_num_flag = true; - return q_num_set(val, kp, PCI_DEVICE_ID_HUAWEI_HPRE_PF); + return hisi_qm_q_num_set(val, kp, PCI_DEVICE_ID_HUAWEI_HPRE_PF); } static const struct kernel_param_ops hpre_pf_q_num_ops = { diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 07983af9e3e2..f7e8237e3a93 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -451,6 +451,37 @@ static struct qm_typical_qos_table shaper_cbs_s[] = { static void qm_irqs_unregister(struct hisi_qm *qm); static int qm_reset_device(struct hisi_qm *qm); +int hisi_qm_q_num_set(const char *val, const struct kernel_param *kp, + unsigned int device) +{ + struct pci_dev *pdev; + u32 n, q_num; + int ret; + + if (!val) + return -EINVAL; + + pdev = pci_get_device(PCI_VENDOR_ID_HUAWEI, device, NULL); + if (!pdev) { + q_num = min_t(u32, QM_QNUM_V1, QM_QNUM_V2); + pr_info("No device found currently, suppose queue number is %u\n", + q_num); + } else { + if (pdev->revision == QM_HW_V1) + q_num = QM_QNUM_V1; + else + q_num = QM_QNUM_V2; + + pci_dev_put(pdev); + } + + ret = kstrtou32(val, 10, &n); + if (ret || n < QM_MIN_QNUM || n > q_num) + return -EINVAL; + + return param_set_int(val, kp); +} +EXPORT_SYMBOL_GPL(hisi_qm_q_num_set); static u32 qm_get_hw_error_status(struct hisi_qm *qm) { diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index c35533d8930b..3abd12017250 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -322,7 +322,7 @@ static int sec_pf_q_num_set(const char *val, const struct kernel_param *kp) { pf_q_num_flag = true; - return q_num_set(val, kp, PCI_DEVICE_ID_HUAWEI_SEC_PF); + return hisi_qm_q_num_set(val, kp, PCI_DEVICE_ID_HUAWEI_SEC_PF); } static const struct kernel_param_ops sec_pf_q_num_ops = { diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index d07e47b48be0..f547e6732bf5 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -402,7 +402,7 @@ static int pf_q_num_set(const char *val, const struct kernel_param *kp) { pf_q_num_flag = true; - return q_num_set(val, kp, PCI_DEVICE_ID_HUAWEI_ZIP_PF); + return hisi_qm_q_num_set(val, kp, PCI_DEVICE_ID_HUAWEI_ZIP_PF); } static const struct kernel_param_ops pf_q_num_ops = { diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index 9d7754ad5e9b..389e95754776 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -436,37 +436,6 @@ struct hisi_qp { struct uacce_queue *uacce_q; }; -static inline int q_num_set(const char *val, const struct kernel_param *kp, - unsigned int device) -{ - struct pci_dev *pdev; - u32 n, q_num; - int ret; - - if (!val) - return -EINVAL; - - pdev = pci_get_device(PCI_VENDOR_ID_HUAWEI, device, NULL); - if (!pdev) { - q_num = min_t(u32, QM_QNUM_V1, QM_QNUM_V2); - pr_info("No device found currently, suppose queue number is %u\n", - q_num); - } else { - if (pdev->revision == QM_HW_V1) - q_num = QM_QNUM_V1; - else - q_num = QM_QNUM_V2; - - pci_dev_put(pdev); - } - - ret = kstrtou32(val, 10, &n); - if (ret || n < QM_MIN_QNUM || n > q_num) - return -EINVAL; - - return param_set_int(val, kp); -} - static inline int vfs_num_set(const char *val, const struct kernel_param *kp) { u32 n; @@ -526,6 +495,8 @@ static inline void hisi_qm_del_list(struct hisi_qm *qm, struct hisi_qm_list *qm_ mutex_unlock(&qm_list->lock); } +int hisi_qm_q_num_set(const char *val, const struct kernel_param *kp, + unsigned int device); int hisi_qm_init(struct hisi_qm *qm); void hisi_qm_uninit(struct hisi_qm *qm); int hisi_qm_start(struct hisi_qm *qm); -- cgit v1.2.3 From 21e92806d39c68af2accd1fb238c2daecfcf9fbd Mon Sep 17 00:00:00 2001 From: Donglin Peng Date: Sat, 14 Sep 2024 20:29:12 -0700 Subject: function_graph: Support recording and printing the function return address When using function_graph tracer to analyze the flow of kernel function execution, it is often necessary to quickly locate the exact line of code where the call occurs. While this may be easy at times, it can be more time-consuming when some functions are inlined or the flow is too long. This feature aims to simplify the process by recording the return address of traced funcions and printing it when outputing trace logs. To enhance human readability, the prefix 'ret=' is used for the kernel return value, while '<-' serves as the prefix for the return address in trace logs to make it look more like the function tracer. A new trace option named 'funcgraph-retaddr' has been introduced, and the existing option 'sym-addr' can be used to control the format of the return address. See below logs with both funcgraph-retval and funcgraph-retaddr enabled. 0) | load_elf_binary() { /* <-bprm_execve+0x249/0x600 */ 0) | load_elf_phdrs() { /* <-load_elf_binary+0x84/0x1730 */ 0) | __kmalloc_noprof() { /* <-load_elf_phdrs+0x4a/0xb0 */ 0) 3.657 us | __cond_resched(); /* <-__kmalloc_noprof+0x28c/0x390 ret=0x0 */ 0) + 24.335 us | } /* __kmalloc_noprof ret=0xffff8882007f3000 */ 0) | kernel_read() { /* <-load_elf_phdrs+0x6c/0xb0 */ 0) | rw_verify_area() { /* <-kernel_read+0x2b/0x50 */ 0) | security_file_permission() { /* <-kernel_read+0x2b/0x50 */ 0) | selinux_file_permission() { /* <-security_file_permission+0x26/0x40 */ 0) | __inode_security_revalidate() { /* <-selinux_file_permission+0x6d/0x140 */ 0) 2.034 us | __cond_resched(); /* <-__inode_security_revalidate+0x5f/0x80 ret=0x0 */ 0) 6.602 us | } /* __inode_security_revalidate ret=0x0 */ 0) 2.214 us | avc_policy_seqno(); /* <-selinux_file_permission+0x107/0x140 ret=0x0 */ 0) + 16.670 us | } /* selinux_file_permission ret=0x0 */ 0) + 20.809 us | } /* security_file_permission ret=0x0 */ 0) + 25.217 us | } /* rw_verify_area ret=0x0 */ 0) | __kernel_read() { /* <-load_elf_phdrs+0x6c/0xb0 */ 0) | ext4_file_read_iter() { /* <-__kernel_read+0x160/0x2e0 */ Then, we can use the faddr2line to locate the source code, for example: $ ./scripts/faddr2line ./vmlinux load_elf_phdrs+0x6c/0xb0 load_elf_phdrs+0x6c/0xb0: elf_read at fs/binfmt_elf.c:471 (inlined by) load_elf_phdrs at fs/binfmt_elf.c:531 Link: https://lore.kernel.org/20240915032912.1118397-1-dolinux.peng@gmail.com Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202409150605.HgUmU8ea-lkp@intel.com/ Signed-off-by: Donglin Peng [ Rebased to handle text_delta offsets ] Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 27 ++- kernel/trace/Kconfig | 10 + kernel/trace/fgraph.c | 22 ++- kernel/trace/ftrace.c | 3 +- kernel/trace/trace.h | 11 +- kernel/trace/trace_entries.h | 29 ++- kernel/trace/trace_functions_graph.c | 216 ++++++++++++++++----- kernel/trace/trace_irqsoff.c | 3 +- kernel/trace/trace_sched_wakeup.c | 3 +- kernel/trace/trace_selftest.c | 9 +- .../ftrace/test.d/ftrace/fgraph-retval.tc | 2 +- 11 files changed, 274 insertions(+), 61 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index e684addf6508..2ac3b3b53cd0 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1040,6 +1040,17 @@ struct ftrace_graph_ent { int depth; } __packed; +/* + * Structure that defines an entry function trace with retaddr. + * It's already packed but the attribute "packed" is needed + * to remove extra padding at the end. + */ +struct fgraph_retaddr_ent { + unsigned long func; /* Current function */ + int depth; + unsigned long retaddr; /* Return address */ +} __packed; + /* * Structure that defines a return function trace. * It's already packed but the attribute "packed" is needed @@ -1057,19 +1068,29 @@ struct ftrace_graph_ret { unsigned long long rettime; } __packed; +struct fgraph_extras; struct fgraph_ops; /* Type of the callback handlers for tracing function graph*/ typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *, struct fgraph_ops *); /* return */ typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *, - struct fgraph_ops *); /* entry */ + struct fgraph_ops *, + struct fgraph_extras *); /* entry */ -extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, struct fgraph_ops *gops); +extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, + struct fgraph_ops *gops, + struct fgraph_extras *extras); bool ftrace_pids_enabled(struct ftrace_ops *ops); #ifdef CONFIG_FUNCTION_GRAPH_TRACER +/* Used to convey some extra datas when creating a graph entry */ +struct fgraph_extras { + u32 flags; + unsigned long retaddr; +}; + struct fgraph_ops { trace_func_graph_ent_t entryfunc; trace_func_graph_ret_t retfunc; @@ -1115,6 +1136,8 @@ unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, unsigned long ret, unsigned long *retp); unsigned long *fgraph_get_task_var(struct fgraph_ops *gops); +u32 graph_tracer_flags_get(u32 flags); + /* * Sometimes we don't want to trace a function with the function * graph tracer but we want them to keep traced by the usual function diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 721c3b221048..74c2b1d43bb9 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -242,6 +242,16 @@ config FUNCTION_GRAPH_RETVAL enable it via the trace option funcgraph-retval. See Documentation/trace/ftrace.rst +config FUNCTION_GRAPH_RETADDR + bool "Kernel Function Graph Return Address" + depends on FUNCTION_GRAPH_TRACER + default n + help + Support recording and printing the function return address when + using function graph tracer. It can be helpful to locate code line that + the function is called. This feature is off by default, and you can + enable it via the trace option funcgraph-retaddr. + config DYNAMIC_FTRACE bool "enable/disable function tracing dynamically" depends on FUNCTION_TRACER diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index 58a28ec35dab..875aefe60a13 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -290,7 +290,8 @@ static inline unsigned long make_data_type_val(int idx, int size, int offset) } /* ftrace_graph_entry set to this to tell some archs to run function graph */ -static int entry_run(struct ftrace_graph_ent *trace, struct fgraph_ops *ops) +static int entry_run(struct ftrace_graph_ent *trace, struct fgraph_ops *ops, + struct fgraph_extras *extras) { return 0; } @@ -518,7 +519,8 @@ int __weak ftrace_disable_ftrace_graph_caller(void) #endif int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops) + struct fgraph_ops *gops, + struct fgraph_extras *extras) { return 0; } @@ -646,13 +648,20 @@ int function_graph_enter(unsigned long ret, unsigned long func, unsigned long frame_pointer, unsigned long *retp) { struct ftrace_graph_ent trace; + struct fgraph_extras extras; unsigned long bitmap = 0; int offset; int i; + int idx = 0; trace.func = func; trace.depth = ++current->curr_ret_depth; + extras.flags = graph_tracer_flags_get(TRACE_GRAPH_PRINT_RETADDR); + if (IS_ENABLED(CONFIG_FUNCTION_GRAPH_RETADDR) + && extras.flags & TRACE_GRAPH_PRINT_RETADDR) + extras.retaddr = ftrace_graph_ret_addr(current, &idx, ret, retp); + offset = ftrace_push_return_trace(ret, func, frame_pointer, retp, 0); if (offset < 0) goto out; @@ -661,7 +670,7 @@ int function_graph_enter(unsigned long ret, unsigned long func, if (static_branch_likely(&fgraph_do_direct)) { int save_curr_ret_stack = current->curr_ret_stack; - if (static_call(fgraph_func)(&trace, fgraph_direct_gops)) + if (static_call(fgraph_func)(&trace, fgraph_direct_gops, &extras)) bitmap |= BIT(fgraph_direct_gops->idx); else /* Clear out any saved storage */ @@ -679,7 +688,7 @@ int function_graph_enter(unsigned long ret, unsigned long func, save_curr_ret_stack = current->curr_ret_stack; if (ftrace_ops_test(&gops->ops, func, NULL) && - gops->entryfunc(&trace, gops)) + gops->entryfunc(&trace, gops, &extras)) bitmap |= BIT(i); else /* Clear out any saved storage */ @@ -1136,7 +1145,8 @@ void ftrace_graph_exit_task(struct task_struct *t) #ifdef CONFIG_DYNAMIC_FTRACE static int fgraph_pid_func(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops) + struct fgraph_ops *gops, + struct fgraph_extras *extras) { struct trace_array *tr = gops->ops.private; int pid; @@ -1150,7 +1160,7 @@ static int fgraph_pid_func(struct ftrace_graph_ent *trace, return 0; } - return gops->saved_func(trace, gops); + return gops->saved_func(trace, gops, NULL); } void fgraph_update_pid_func(void) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index cae388122ca8..5d87dac83b80 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -827,7 +827,8 @@ struct profile_fgraph_data { }; static int profile_graph_entry(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops) + struct fgraph_ops *gops, + struct fgraph_extras *extras) { struct profile_fgraph_data *profile_data; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 2f8017f8d34d..13f08f257c0b 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -46,6 +46,7 @@ enum trace_type { TRACE_BRANCH, TRACE_GRAPH_RET, TRACE_GRAPH_ENT, + TRACE_GRAPH_RETADDR_ENT, TRACE_USER_STACK, TRACE_BLK, TRACE_BPUTS, @@ -512,6 +513,8 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \ IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry, \ TRACE_GRAPH_ENT); \ + IF_ASSIGN(var, ent, struct fgraph_retaddr_ent_entry,\ + TRACE_GRAPH_RETADDR_ENT); \ IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \ TRACE_GRAPH_RET); \ IF_ASSIGN(var, ent, struct func_repeats_entry, \ @@ -692,7 +695,8 @@ void trace_default_header(struct seq_file *m); void print_trace_header(struct seq_file *m, struct trace_iterator *iter); void trace_graph_return(struct ftrace_graph_ret *trace, struct fgraph_ops *gops); -int trace_graph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops); +int trace_graph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops, + struct fgraph_extras *extras); void tracing_start_cmdline_record(void); void tracing_stop_cmdline_record(void); @@ -879,6 +883,7 @@ static __always_inline bool ftrace_hash_empty(struct ftrace_hash *hash) #define TRACE_GRAPH_GRAPH_TIME 0x400 #define TRACE_GRAPH_PRINT_RETVAL 0x800 #define TRACE_GRAPH_PRINT_RETVAL_HEX 0x1000 +#define TRACE_GRAPH_PRINT_RETADDR 0x2000 #define TRACE_GRAPH_PRINT_FILL_SHIFT 28 #define TRACE_GRAPH_PRINT_FILL_MASK (0x3 << TRACE_GRAPH_PRINT_FILL_SHIFT) @@ -900,6 +905,10 @@ extern void graph_trace_close(struct trace_iterator *iter); extern int __trace_graph_entry(struct trace_array *tr, struct ftrace_graph_ent *trace, unsigned int trace_ctx); +extern int __trace_graph_retaddr_entry(struct trace_array *tr, + struct ftrace_graph_ent *trace, + unsigned int trace_ctx, + unsigned long retaddr); extern void __trace_graph_return(struct trace_array *tr, struct ftrace_graph_ret *trace, unsigned int trace_ctx); diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index c47422b20908..82fd174ebbe0 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -85,9 +85,35 @@ FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry, F_printk("--> %ps (%d)", (void *)__entry->func, __entry->depth) ); -/* Function return entry */ +#ifdef CONFIG_FUNCTION_GRAPH_RETADDR + +/* Function call entry with a return address */ +FTRACE_ENTRY_PACKED(fgraph_retaddr_entry, fgraph_retaddr_ent_entry, + + TRACE_GRAPH_RETADDR_ENT, + + F_STRUCT( + __field_struct( struct fgraph_retaddr_ent, graph_ent ) + __field_packed( unsigned long, graph_ent, func ) + __field_packed( int, graph_ent, depth ) + __field_packed( unsigned long, graph_ent, retaddr ) + ), + + F_printk("--> %ps (%d) <- %ps", (void *)__entry->func, __entry->depth, + (void *)__entry->retaddr) +); + +#else + +#ifndef fgraph_retaddr_ent_entry +#define fgraph_retaddr_ent_entry ftrace_graph_ent_entry +#endif + +#endif + #ifdef CONFIG_FUNCTION_GRAPH_RETVAL +/* Function return entry */ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry, TRACE_GRAPH_RET, @@ -110,6 +136,7 @@ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry, #else +/* Function return entry */ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry, TRACE_GRAPH_RET, diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 5c1b150fbba3..3dd63ae2afe8 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -31,7 +31,10 @@ struct fgraph_data { struct fgraph_cpu_data __percpu *cpu_data; /* Place to preserve last processed entry. */ - struct ftrace_graph_ent_entry ent; + union { + struct ftrace_graph_ent_entry ent; + struct fgraph_retaddr_ent_entry rent; + } ent; struct ftrace_graph_ret_entry ret; int failed; int cpu; @@ -63,6 +66,10 @@ static struct tracer_opt trace_opts[] = { { TRACER_OPT(funcgraph-retval, TRACE_GRAPH_PRINT_RETVAL) }, /* Display function return value in hexadecimal format ? */ { TRACER_OPT(funcgraph-retval-hex, TRACE_GRAPH_PRINT_RETVAL_HEX) }, +#endif +#ifdef CONFIG_FUNCTION_GRAPH_RETADDR + /* Display function return address ? */ + { TRACER_OPT(funcgraph-retaddr, TRACE_GRAPH_PRINT_RETADDR) }, #endif /* Include sleep time (scheduled out) between entry and return */ { TRACER_OPT(sleep-time, TRACE_GRAPH_SLEEP_TIME) }, @@ -83,6 +90,11 @@ static struct tracer_flags tracer_flags = { .opts = trace_opts }; +u32 graph_tracer_flags_get(u32 flags) +{ + return tracer_flags.val & flags; +} + /* * DURATION column is being also used to display IRQ signs, * following values are used by print_graph_irq and others @@ -119,6 +131,40 @@ int __trace_graph_entry(struct trace_array *tr, return 1; } +#ifdef CONFIG_FUNCTION_GRAPH_RETADDR +int __trace_graph_retaddr_entry(struct trace_array *tr, + struct ftrace_graph_ent *trace, + unsigned int trace_ctx, + unsigned long retaddr) +{ + struct trace_event_call *call = &event_fgraph_retaddr_entry; + struct ring_buffer_event *event; + struct trace_buffer *buffer = tr->array_buffer.buffer; + struct fgraph_retaddr_ent_entry *entry; + + event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RETADDR_ENT, + sizeof(*entry), trace_ctx); + if (!event) + return 0; + entry = ring_buffer_event_data(event); + entry->graph_ent.func = trace->func; + entry->graph_ent.depth = trace->depth; + entry->graph_ent.retaddr = retaddr; + if (!call_filter_check_discard(call, entry, buffer, event)) + trace_buffer_unlock_commit_nostack(buffer, event); + + return 1; +} +#else +int __trace_graph_retaddr_entry(struct trace_array *tr, + struct ftrace_graph_ent *trace, + unsigned int trace_ctx, + unsigned long retaddr) +{ + return 1; +} +#endif + static inline int ftrace_graph_ignore_irqs(void) { if (!ftrace_graph_skip_irqs || trace_recursion_test(TRACE_IRQ_BIT)) @@ -133,7 +179,8 @@ struct fgraph_times { }; int trace_graph_entry(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops) + struct fgraph_ops *gops, + struct fgraph_extras *extras) { unsigned long *task_var = fgraph_get_task_var(gops); struct trace_array *tr = gops->private; @@ -199,7 +246,12 @@ int trace_graph_entry(struct ftrace_graph_ent *trace, disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { trace_ctx = tracing_gen_ctx_flags(flags); - ret = __trace_graph_entry(tr, trace, trace_ctx); + if (unlikely(IS_ENABLED(CONFIG_FUNCTION_GRAPH_RETADDR) && extras + && (extras->flags & TRACE_GRAPH_PRINT_RETADDR))) + ret = __trace_graph_retaddr_entry(tr, trace, trace_ctx, + extras->retaddr); + else + ret = __trace_graph_entry(tr, trace, trace_ctx); } else { ret = 0; } @@ -507,7 +559,7 @@ get_return_for_leaf(struct trace_iterator *iter, * then we just reuse the data from before. */ if (data && data->failed) { - curr = &data->ent; + curr = &data->ent.ent; next = &data->ret; } else { @@ -537,7 +589,10 @@ get_return_for_leaf(struct trace_iterator *iter, * Save current and next entries for later reference * if the output fails. */ - data->ent = *curr; + if (unlikely(curr->ent.type == TRACE_GRAPH_RETADDR_ENT)) + data->ent.rent = *(struct fgraph_retaddr_ent_entry *)curr; + else + data->ent.ent = *curr; /* * If the next event is not a return type, then * we only care about what type it is. Otherwise we can @@ -701,52 +756,96 @@ print_graph_duration(struct trace_array *tr, unsigned long long duration, } #ifdef CONFIG_FUNCTION_GRAPH_RETVAL - #define __TRACE_GRAPH_PRINT_RETVAL TRACE_GRAPH_PRINT_RETVAL +#else +#define __TRACE_GRAPH_PRINT_RETVAL 0 +#endif -static void print_graph_retval(struct trace_seq *s, unsigned long retval, - bool leaf, void *func, bool hex_format) +#ifdef CONFIG_FUNCTION_GRAPH_RETADDR +#define __TRACE_GRAPH_PRINT_RETADDR TRACE_GRAPH_PRINT_RETADDR +static void print_graph_retaddr(struct trace_seq *s, struct fgraph_retaddr_ent_entry *entry, + u32 trace_flags, bool comment) +{ + if (comment) + trace_seq_puts(s, " /*"); + + trace_seq_puts(s, " <-"); + seq_print_ip_sym(s, entry->graph_ent.retaddr, trace_flags | TRACE_ITER_SYM_OFFSET); + + if (comment) + trace_seq_puts(s, " */"); +} +#else +#define __TRACE_GRAPH_PRINT_RETADDR 0 +#define print_graph_retaddr(_seq, _entry, _tflags, _comment) do { } while (0) +#endif + +#if defined(CONFIG_FUNCTION_GRAPH_RETVAL) || defined(CONFIG_FUNCTION_GRAPH_RETADDR) + +static void print_graph_retval(struct trace_seq *s, struct ftrace_graph_ent_entry *entry, + struct ftrace_graph_ret *graph_ret, void *func, + u32 opt_flags, u32 trace_flags) { unsigned long err_code = 0; + unsigned long retval = 0; + bool print_retaddr = false; + bool print_retval = false; + bool hex_format = !!(opt_flags & TRACE_GRAPH_PRINT_RETVAL_HEX); - if (retval == 0 || hex_format) - goto done; +#ifdef CONFIG_FUNCTION_GRAPH_RETVAL + retval = graph_ret->retval; + print_retval = !!(opt_flags & TRACE_GRAPH_PRINT_RETVAL); +#endif - /* Check if the return value matches the negative format */ - if (IS_ENABLED(CONFIG_64BIT) && (retval & BIT(31)) && - (((u64)retval) >> 32) == 0) { - /* sign extension */ - err_code = (unsigned long)(s32)retval; - } else { - err_code = retval; +#ifdef CONFIG_FUNCTION_GRAPH_RETADDR + print_retaddr = !!(opt_flags & TRACE_GRAPH_PRINT_RETADDR); +#endif + + if (print_retval && retval && !hex_format) { + /* Check if the return value matches the negative format */ + if (IS_ENABLED(CONFIG_64BIT) && (retval & BIT(31)) && + (((u64)retval) >> 32) == 0) { + err_code = sign_extend64(retval, 31); + } else { + err_code = retval; + } + + if (!IS_ERR_VALUE(err_code)) + err_code = 0; } - if (!IS_ERR_VALUE(err_code)) - err_code = 0; + if (entry) { + if (entry->ent.type != TRACE_GRAPH_RETADDR_ENT) + print_retaddr = false; -done: - if (leaf) { - if (hex_format || (err_code == 0)) - trace_seq_printf(s, "%ps(); /* = 0x%lx */\n", - func, retval); + trace_seq_printf(s, "%ps();", func); + if (print_retval || print_retaddr) + trace_seq_puts(s, " /*"); else - trace_seq_printf(s, "%ps(); /* = %ld */\n", - func, err_code); + trace_seq_putc(s, '\n'); } else { + print_retaddr = false; + trace_seq_printf(s, "} /* %ps", func); + } + + if (print_retaddr) + print_graph_retaddr(s, (struct fgraph_retaddr_ent_entry *)entry, + trace_flags, false); + + if (print_retval) { if (hex_format || (err_code == 0)) - trace_seq_printf(s, "} /* %ps = 0x%lx */\n", - func, retval); + trace_seq_printf(s, " ret=0x%lx", retval); else - trace_seq_printf(s, "} /* %ps = %ld */\n", - func, err_code); + trace_seq_printf(s, " ret=%ld", err_code); } + + if (!entry || print_retval || print_retaddr) + trace_seq_puts(s, " */\n"); } #else -#define __TRACE_GRAPH_PRINT_RETVAL 0 - -#define print_graph_retval(_seq, _retval, _leaf, _func, _format) do {} while (0) +#define print_graph_retval(_seq, _ent, _ret, _func, _opt_flags, _trace_flags) do {} while (0) #endif @@ -798,14 +897,15 @@ print_graph_entry_leaf(struct trace_iterator *iter, trace_seq_putc(s, ' '); /* - * Write out the function return value if the option function-retval is - * enabled. + * Write out the function return value or return address */ - if (flags & __TRACE_GRAPH_PRINT_RETVAL) - print_graph_retval(s, graph_ret->retval, true, (void *)func, - !!(flags & TRACE_GRAPH_PRINT_RETVAL_HEX)); - else + if (flags & (__TRACE_GRAPH_PRINT_RETVAL | __TRACE_GRAPH_PRINT_RETADDR)) { + print_graph_retval(s, entry, graph_ret, + (void *)graph_ret->func + iter->tr->text_delta, + flags, tr->trace_flags); + } else { trace_seq_printf(s, "%ps();\n", (void *)func); + } print_graph_irq(iter, graph_ret->func, TRACE_GRAPH_RET, cpu, iter->ent->pid, flags); @@ -846,7 +946,12 @@ print_graph_entry_nested(struct trace_iterator *iter, func = call->func + iter->tr->text_delta; - trace_seq_printf(s, "%ps() {\n", (void *)func); + trace_seq_printf(s, "%ps() {", (void *)func); + if (flags & __TRACE_GRAPH_PRINT_RETADDR && + entry->ent.type == TRACE_GRAPH_RETADDR_ENT) + print_graph_retaddr(s, (struct fgraph_retaddr_ent_entry *)entry, + tr->trace_flags, true); + trace_seq_putc(s, '\n'); if (trace_seq_has_overflowed(s)) return TRACE_TYPE_PARTIAL_LINE; @@ -1093,11 +1198,10 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, /* * Always write out the function name and its return value if the - * function-retval option is enabled. + * funcgraph-retval option is enabled. */ if (flags & __TRACE_GRAPH_PRINT_RETVAL) { - print_graph_retval(s, trace->retval, false, (void *)func, - !!(flags & TRACE_GRAPH_PRINT_RETVAL_HEX)); + print_graph_retval(s, NULL, trace, (void *)func, flags, tr->trace_flags); } else { /* * If the return function does not have a matching entry, @@ -1212,7 +1316,7 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags) * to print out the missing entry which would never go out. */ if (data && data->failed) { - field = &data->ent; + field = &data->ent.ent; iter->cpu = data->cpu; ret = print_graph_entry(field, s, iter, flags); if (ret == TRACE_TYPE_HANDLED && iter->cpu != cpu) { @@ -1236,6 +1340,16 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags) saved = *field; return print_graph_entry(&saved, s, iter, flags); } +#ifdef CONFIG_FUNCTION_GRAPH_RETADDR + case TRACE_GRAPH_RETADDR_ENT: { + struct fgraph_retaddr_ent_entry saved; + struct fgraph_retaddr_ent_entry *rfield; + + trace_assign_type(rfield, entry); + saved = *rfield; + return print_graph_entry((struct ftrace_graph_ent_entry *)&saved, s, iter, flags); + } +#endif case TRACE_GRAPH_RET: { struct ftrace_graph_ret_entry *field; trace_assign_type(field, entry); @@ -1430,6 +1544,13 @@ static struct trace_event graph_trace_entry_event = { .funcs = &graph_functions, }; +#ifdef CONFIG_FUNCTION_GRAPH_RETADDR +static struct trace_event graph_trace_retaddr_entry_event = { + .type = TRACE_GRAPH_RETADDR_ENT, + .funcs = &graph_functions, +}; +#endif + static struct trace_event graph_trace_ret_event = { .type = TRACE_GRAPH_RET, .funcs = &graph_functions @@ -1516,6 +1637,13 @@ static __init int init_graph_trace(void) return 1; } +#ifdef CONFIG_FUNCTION_GRAPH_RETADDR + if (!register_trace_event(&graph_trace_retaddr_entry_event)) { + pr_warn("Warning: could not register graph trace retaddr events\n"); + return 1; + } +#endif + if (!register_trace_event(&graph_trace_ret_event)) { pr_warn("Warning: could not register graph trace events\n"); return 1; diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index fce064e20570..eb3aa36cf10f 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -176,7 +176,8 @@ static int irqsoff_display_graph(struct trace_array *tr, int set) } static int irqsoff_graph_entry(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops) + struct fgraph_ops *gops, + struct fgraph_extras *extras) { struct trace_array *tr = irqsoff_trace; struct trace_array_cpu *data; diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index ae2ace5e515a..155de2551507 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -113,7 +113,8 @@ static int wakeup_display_graph(struct trace_array *tr, int set) } static int wakeup_graph_entry(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops) + struct fgraph_ops *gops, + struct fgraph_extras *extras) { struct trace_array *tr = wakeup_trace; struct trace_array_cpu *data; diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index c4ad7cd7e778..fbb99f8c8062 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -17,6 +17,7 @@ static inline int trace_valid_entry(struct trace_entry *entry) case TRACE_PRINT: case TRACE_BRANCH: case TRACE_GRAPH_ENT: + case TRACE_GRAPH_RETADDR_ENT: case TRACE_GRAPH_RET: return 1; } @@ -773,7 +774,8 @@ struct fgraph_fixture { }; static __init int store_entry(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops) + struct fgraph_ops *gops, + struct fgraph_extras *extras) { struct fgraph_fixture *fixture = container_of(gops, struct fgraph_fixture, gops); const char *type = fixture->store_type_name; @@ -1024,7 +1026,8 @@ static unsigned int graph_hang_thresh; /* Wrap the real function entry probe to avoid possible hanging */ static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops) + struct fgraph_ops *gops, + struct fgraph_extras *extras) { /* This is harmlessly racy, we want to approximately detect a hang */ if (unlikely(++graph_hang_thresh > GRAPH_MAX_FUNC_TEST)) { @@ -1038,7 +1041,7 @@ static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace, return 0; } - return trace_graph_entry(trace, gops); + return trace_graph_entry(trace, gops, NULL); } static struct fgraph_ops fgraph_ops __initdata = { diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-retval.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-retval.tc index e34c0bdef3ed..e8e46378b88d 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-retval.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-retval.tc @@ -29,7 +29,7 @@ set -e : "Test printing the error code in signed decimal format" echo 0 > options/funcgraph-retval-hex -count=`cat trace | grep 'proc_reg_write' | grep '= -5' | wc -l` +count=`cat trace | grep 'proc_reg_write' | grep '=-5' | wc -l` if [ $count -eq 0 ]; then fail "Return value can not be printed in signed decimal format" fi -- cgit v1.2.3 From 4436df478860bb5da1864df2cd20f281a210f139 Mon Sep 17 00:00:00 2001 From: Erick Archer Date: Fri, 7 Jun 2024 18:19:12 +0200 Subject: batman-adv: Add flex array to struct batadv_tvlv_tt_data The "struct batadv_tvlv_tt_data" uses a dynamically sized set of trailing elements. Specifically, it uses an array of structures of type "batadv_tvlv_tt_vlan_data". So, use the preferred way in the kernel declaring a flexible array [1]. At the same time, prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). In this case, it is important to note that the attribute used is specifically __counted_by_be since variable "num_vlan" is of type __be16. The following change to the "batadv_tt_tvlv_ogm_handler_v1" function: - tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(tt_data + 1); - tt_change = (struct batadv_tvlv_tt_change *)(tt_vlan + num_vlan); + tt_change = (struct batadv_tvlv_tt_change *)((void *)tt_data + + flex_size); is intended to prevent the compiler from generating an "out-of-bounds" notification due to the __counted_by attribute. The compiler can do a pointer calculation using the vlan_data flexible array memory, or in other words, this may be calculated as an array offset, since it is the same as: &tt_data->vlan_data[num_vlan] Therefore, we go past the end of the array. In other "multiple trailing flexible array" situations, this has been solved by addressing from the base pointer, since the compiler either knows the full allocation size or it knows nothing about it (this case, since it came from a "void *" function argument). The order in which the structure batadv_tvlv_tt_data and the structure batadv_tvlv_tt_vlan_data are defined must be swap to avoid an incomplete type error. Also, avoid the open-coded arithmetic in memory allocator functions [2] using the "struct_size" macro and use the "flex_array_size" helper to clarify some calculations, when possible. Moreover, the new structure member also allow us to avoid the open-coded arithmetic on pointers in some situations. Take advantage of this. This code was detected with the help of Coccinelle, and audited and modified manually. Link: https://www.kernel.org/doc/html/next/process/deprecated.html#zero-length-and-one-element-arrays [1] Link: https://www.kernel.org/doc/html/next/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments [2] Reviewed-by: Kees Cook Signed-off-by: Erick Archer Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batadv_packet.h | 29 ++++++++++++---------- net/batman-adv/translation-table.c | 49 ++++++++++++++++---------------------- 2 files changed, 36 insertions(+), 42 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/batadv_packet.h b/include/uapi/linux/batadv_packet.h index 6e25753015df..439132a819ea 100644 --- a/include/uapi/linux/batadv_packet.h +++ b/include/uapi/linux/batadv_packet.h @@ -9,6 +9,7 @@ #include #include +#include #include /** @@ -592,19 +593,6 @@ struct batadv_tvlv_gateway_data { __be32 bandwidth_up; }; -/** - * struct batadv_tvlv_tt_data - tt data propagated through the tt tvlv container - * @flags: translation table flags (see batadv_tt_data_flags) - * @ttvn: translation table version number - * @num_vlan: number of announced VLANs. In the TVLV this struct is followed by - * one batadv_tvlv_tt_vlan_data object per announced vlan - */ -struct batadv_tvlv_tt_data { - __u8 flags; - __u8 ttvn; - __be16 num_vlan; -}; - /** * struct batadv_tvlv_tt_vlan_data - vlan specific tt data propagated through * the tt tvlv container @@ -618,6 +606,21 @@ struct batadv_tvlv_tt_vlan_data { __u16 reserved; }; +/** + * struct batadv_tvlv_tt_data - tt data propagated through the tt tvlv container + * @flags: translation table flags (see batadv_tt_data_flags) + * @ttvn: translation table version number + * @num_vlan: number of announced VLANs. In the TVLV this struct is followed by + * one batadv_tvlv_tt_vlan_data object per announced vlan + * @vlan_data: array of batadv_tvlv_tt_vlan_data objects + */ +struct batadv_tvlv_tt_data { + __u8 flags; + __u8 ttvn; + __be16 num_vlan; + struct batadv_tvlv_tt_vlan_data vlan_data[] __counted_by_be(num_vlan); +}; + /** * struct batadv_tvlv_tt_change - translation table diff data * @flags: status indicators concerning the non-mesh client (see diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 2243cec18ecc..6815d1262feb 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -856,8 +857,7 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node, num_entries += atomic_read(&vlan->tt.num_entries); } - change_offset = sizeof(**tt_data); - change_offset += num_vlan * sizeof(*tt_vlan); + change_offset = struct_size(*tt_data, vlan_data, num_vlan); /* if tt_len is negative, allocate the space needed by the full table */ if (*tt_len < 0) @@ -876,7 +876,7 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node, (*tt_data)->ttvn = atomic_read(&orig_node->last_ttvn); (*tt_data)->num_vlan = htons(num_vlan); - tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(*tt_data + 1); + tt_vlan = (*tt_data)->vlan_data; hlist_for_each_entry(vlan, &orig_node->vlan_list, list) { tt_vlan->vid = htons(vlan->vid); tt_vlan->crc = htonl(vlan->tt.crc); @@ -936,8 +936,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, total_entries += vlan_entries; } - change_offset = sizeof(**tt_data); - change_offset += num_vlan * sizeof(*tt_vlan); + change_offset = struct_size(*tt_data, vlan_data, num_vlan); /* if tt_len is negative, allocate the space needed by the full table */ if (*tt_len < 0) @@ -956,7 +955,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, (*tt_data)->ttvn = atomic_read(&bat_priv->tt.vn); (*tt_data)->num_vlan = htons(num_vlan); - tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(*tt_data + 1); + tt_vlan = (*tt_data)->vlan_data; hlist_for_each_entry(vlan, &bat_priv->softif_vlan_list, list) { vlan_entries = atomic_read(&vlan->tt.num_entries); if (vlan_entries < 1) @@ -2916,7 +2915,6 @@ static bool batadv_send_tt_request(struct batadv_priv *bat_priv, { struct batadv_tvlv_tt_data *tvlv_tt_data = NULL; struct batadv_tt_req_node *tt_req_node = NULL; - struct batadv_tvlv_tt_vlan_data *tt_vlan_req; struct batadv_hard_iface *primary_if; bool ret = false; int i, size; @@ -2932,7 +2930,7 @@ static bool batadv_send_tt_request(struct batadv_priv *bat_priv, if (!tt_req_node) goto out; - size = sizeof(*tvlv_tt_data) + sizeof(*tt_vlan_req) * num_vlan; + size = struct_size(tvlv_tt_data, vlan_data, num_vlan); tvlv_tt_data = kzalloc(size, GFP_ATOMIC); if (!tvlv_tt_data) goto out; @@ -2944,12 +2942,10 @@ static bool batadv_send_tt_request(struct batadv_priv *bat_priv, /* send all the CRCs within the request. This is needed by intermediate * nodes to ensure they have the correct table before replying */ - tt_vlan_req = (struct batadv_tvlv_tt_vlan_data *)(tvlv_tt_data + 1); for (i = 0; i < num_vlan; i++) { - tt_vlan_req->vid = tt_vlan->vid; - tt_vlan_req->crc = tt_vlan->crc; + tvlv_tt_data->vlan_data[i].vid = tt_vlan->vid; + tvlv_tt_data->vlan_data[i].crc = tt_vlan->crc; - tt_vlan_req++; tt_vlan++; } @@ -3001,7 +2997,6 @@ static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv, struct batadv_orig_node *res_dst_orig_node = NULL; struct batadv_tvlv_tt_change *tt_change; struct batadv_tvlv_tt_data *tvlv_tt_data = NULL; - struct batadv_tvlv_tt_vlan_data *tt_vlan; bool ret = false, full_table; u8 orig_ttvn, req_ttvn; u16 tvlv_len; @@ -3024,10 +3019,9 @@ static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv, orig_ttvn = (u8)atomic_read(&req_dst_orig_node->last_ttvn); req_ttvn = tt_data->ttvn; - tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(tt_data + 1); /* this node doesn't have the requested data */ if (orig_ttvn != req_ttvn || - !batadv_tt_global_check_crc(req_dst_orig_node, tt_vlan, + !batadv_tt_global_check_crc(req_dst_orig_node, tt_data->vlan_data, ntohs(tt_data->num_vlan))) goto out; @@ -3370,7 +3364,6 @@ static void batadv_handle_tt_response(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node = NULL; struct batadv_tvlv_tt_change *tt_change; u8 *tvlv_ptr = (u8 *)tt_data; - u16 change_offset; batadv_dbg(BATADV_DBG_TT, bat_priv, "Received TT_RESPONSE from %pM for ttvn %d t_size: %d [%c]\n", @@ -3383,10 +3376,7 @@ static void batadv_handle_tt_response(struct batadv_priv *bat_priv, spin_lock_bh(&orig_node->tt_lock); - change_offset = sizeof(struct batadv_tvlv_tt_vlan_data); - change_offset *= ntohs(tt_data->num_vlan); - change_offset += sizeof(*tt_data); - tvlv_ptr += change_offset; + tvlv_ptr += struct_size(tt_data, vlan_data, ntohs(tt_data->num_vlan)); tt_change = (struct batadv_tvlv_tt_change *)tvlv_ptr; if (tt_data->flags & BATADV_TT_FULL_TABLE) { @@ -3985,10 +3975,10 @@ static void batadv_tt_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, u8 flags, void *tvlv_value, u16 tvlv_value_len) { - struct batadv_tvlv_tt_vlan_data *tt_vlan; struct batadv_tvlv_tt_change *tt_change; struct batadv_tvlv_tt_data *tt_data; u16 num_entries, num_vlan; + size_t flex_size; if (tvlv_value_len < sizeof(*tt_data)) return; @@ -3998,17 +3988,18 @@ static void batadv_tt_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, num_vlan = ntohs(tt_data->num_vlan); - if (tvlv_value_len < sizeof(*tt_vlan) * num_vlan) + flex_size = flex_array_size(tt_data, vlan_data, num_vlan); + if (tvlv_value_len < flex_size) return; - tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(tt_data + 1); - tt_change = (struct batadv_tvlv_tt_change *)(tt_vlan + num_vlan); - tvlv_value_len -= sizeof(*tt_vlan) * num_vlan; + tt_change = (struct batadv_tvlv_tt_change *)((void *)tt_data + + flex_size); + tvlv_value_len -= flex_size; num_entries = batadv_tt_entries(tvlv_value_len); - batadv_tt_update_orig(bat_priv, orig, tt_vlan, num_vlan, tt_change, - num_entries, tt_data->ttvn); + batadv_tt_update_orig(bat_priv, orig, tt_data->vlan_data, num_vlan, + tt_change, num_entries, tt_data->ttvn); } /** @@ -4039,8 +4030,8 @@ static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv, tt_data = tvlv_value; tvlv_value_len -= sizeof(*tt_data); - tt_vlan_len = sizeof(struct batadv_tvlv_tt_vlan_data); - tt_vlan_len *= ntohs(tt_data->num_vlan); + tt_vlan_len = flex_array_size(tt_data, vlan_data, + ntohs(tt_data->num_vlan)); if (tvlv_value_len < tt_vlan_len) return NET_RX_SUCCESS; -- cgit v1.2.3 From 61b17d072d811df5733a1570889b8c6fa6834bf8 Mon Sep 17 00:00:00 2001 From: Danila Tikhonov Date: Sun, 18 Aug 2024 23:43:39 +0300 Subject: dt-bindings: clock: qcom,gcc-sm8450: Add SM8475 GCC bindings Add new entry to the SM8450 dt-bindings and add SM8475-specific clocks to SM8450 GCC header file. Signed-off-by: Danila Tikhonov Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240818204348.197788-2-danila@jiaxyga.com Signed-off-by: Bjorn Andersson --- Documentation/devicetree/bindings/clock/qcom,gcc-sm8450.yaml | 4 +++- include/dt-bindings/clock/qcom,gcc-sm8450.h | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-sm8450.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-sm8450.yaml index d848361beeb3..77273aee5d52 100644 --- a/Documentation/devicetree/bindings/clock/qcom,gcc-sm8450.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,gcc-sm8450.yaml @@ -17,7 +17,9 @@ description: | properties: compatible: - const: qcom,gcc-sm8450 + enum: + - qcom,gcc-sm8450 + - qcom,sm8475-gcc clocks: items: diff --git a/include/dt-bindings/clock/qcom,gcc-sm8450.h b/include/dt-bindings/clock/qcom,gcc-sm8450.h index 9679410843a0..7320e63c3a2f 100644 --- a/include/dt-bindings/clock/qcom,gcc-sm8450.h +++ b/include/dt-bindings/clock/qcom,gcc-sm8450.h @@ -194,6 +194,9 @@ #define GCC_VIDEO_AXI0_CLK 182 #define GCC_VIDEO_AXI1_CLK 183 #define GCC_VIDEO_XO_CLK 184 +/* Additional SM8475-specific clocks */ +#define SM8475_GCC_GPLL2 185 +#define SM8475_GCC_GPLL3 186 /* GCC resets */ #define GCC_CAMERA_BCR 0 -- cgit v1.2.3 From 0a97195d2181caced187acd7454464b8e37021d7 Mon Sep 17 00:00:00 2001 From: Rajendra Nayak Date: Tue, 3 Sep 2024 15:45:10 +0530 Subject: EDAC/qcom: Make irq configuration optional On most modern qualcomm SoCs, the configuration necessary to enable the Tag/Data RAM related irqs being propagated to the SoC irq controller is already done in firmware (in DSF or 'DDR System Firmware') On some like the x1e80100, these registers aren't even accesible to the kernel causing a crash when edac device is probed. Hence, make the irq configuration optional in the driver and mark x1e80100 as the SoC on which this should be avoided. Fixes: af16b00578a7 ("arm64: dts: qcom: Add base X1E80100 dtsi and the QCP dts") Reported-by: Bjorn Andersson Signed-off-by: Rajendra Nayak Reviewed-by: Manivannan Sadhasivam Reviewed-by: Abel Vesa Link: https://lore.kernel.org/r/20240903101510.3452734-1-quic_rjendra@quicinc.com Signed-off-by: Bjorn Andersson --- drivers/edac/qcom_edac.c | 8 +++++--- drivers/soc/qcom/llcc-qcom.c | 3 +++ include/linux/soc/qcom/llcc-qcom.h | 2 ++ 3 files changed, 10 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/edac/qcom_edac.c b/drivers/edac/qcom_edac.c index d3cd4cc54ace..a9a8ba067007 100644 --- a/drivers/edac/qcom_edac.c +++ b/drivers/edac/qcom_edac.c @@ -342,9 +342,11 @@ static int qcom_llcc_edac_probe(struct platform_device *pdev) int ecc_irq; int rc; - rc = qcom_llcc_core_setup(llcc_driv_data, llcc_driv_data->bcast_regmap); - if (rc) - return rc; + if (!llcc_driv_data->ecc_irq_configured) { + rc = qcom_llcc_core_setup(llcc_driv_data, llcc_driv_data->bcast_regmap); + if (rc) + return rc; + } /* Allocate edac control info */ edev_ctl = edac_device_alloc_ctl_info(0, "qcom-llcc", 1, "bank", diff --git a/drivers/soc/qcom/llcc-qcom.c b/drivers/soc/qcom/llcc-qcom.c index 8fa4ffd3a9b5..28bcc65e91be 100644 --- a/drivers/soc/qcom/llcc-qcom.c +++ b/drivers/soc/qcom/llcc-qcom.c @@ -139,6 +139,7 @@ struct qcom_llcc_config { int size; bool need_llcc_cfg; bool no_edac; + bool irq_configured; }; struct qcom_sct_config { @@ -718,6 +719,7 @@ static const struct qcom_llcc_config x1e80100_cfg[] = { .need_llcc_cfg = true, .reg_offset = llcc_v2_1_reg_offset, .edac_reg_offset = &llcc_v2_1_edac_reg_offset, + .irq_configured = true, }, }; @@ -1345,6 +1347,7 @@ static int qcom_llcc_probe(struct platform_device *pdev) drv_data->cfg = llcc_cfg; drv_data->cfg_size = sz; drv_data->edac_reg_offset = cfg->edac_reg_offset; + drv_data->ecc_irq_configured = cfg->irq_configured; mutex_init(&drv_data->lock); platform_set_drvdata(pdev, drv_data); diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h index 9e9f528b1370..2f20281d4ad4 100644 --- a/include/linux/soc/qcom/llcc-qcom.h +++ b/include/linux/soc/qcom/llcc-qcom.h @@ -125,6 +125,7 @@ struct llcc_edac_reg_offset { * @num_banks: Number of llcc banks * @bitmap: Bit map to track the active slice ids * @ecc_irq: interrupt for llcc cache error detection and reporting + * @ecc_irq_configured: 'True' if firmware has already configured the irq propagation * @version: Indicates the LLCC version */ struct llcc_drv_data { @@ -139,6 +140,7 @@ struct llcc_drv_data { u32 num_banks; unsigned long *bitmap; int ecc_irq; + bool ecc_irq_configured; u32 version; }; -- cgit v1.2.3 From 421d2251fbeac8ab4308ab6653a9252dc4efa6c9 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Thu, 3 Oct 2024 13:46:39 +0200 Subject: dt-bindings: iio: adc: Add the GE HealthCare PMC ADC The GE HealthCare PMC Analog to Digital Converter (ADC) is a 16-Channel (voltage and current), 16-Bit ADC with an I2C Interface. Signed-off-by: Herve Codina Tested-by: Ian Ray Reviewed-by: Conor Dooley Link: https://patch.msgid.link/20241003114641.672086-3-herve.codina@bootlin.com Signed-off-by: Jonathan Cameron --- .../devicetree/bindings/iio/adc/gehc,pmc-adc.yaml | 86 ++++++++++++++++++++++ include/dt-bindings/iio/adc/gehc,pmc-adc.h | 10 +++ 2 files changed, 96 insertions(+) create mode 100644 Documentation/devicetree/bindings/iio/adc/gehc,pmc-adc.yaml create mode 100644 include/dt-bindings/iio/adc/gehc,pmc-adc.h (limited to 'include') diff --git a/Documentation/devicetree/bindings/iio/adc/gehc,pmc-adc.yaml b/Documentation/devicetree/bindings/iio/adc/gehc,pmc-adc.yaml new file mode 100644 index 000000000000..2cea7c104a26 --- /dev/null +++ b/Documentation/devicetree/bindings/iio/adc/gehc,pmc-adc.yaml @@ -0,0 +1,86 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/iio/adc/gehc,pmc-adc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: GE HealthCare PMC Analog to Digital Converter (ADC) + +maintainers: + - Herve Codina + +description: + The GE HealthCare PMC ADC is a 16-Channel (voltage and current), 16-Bit ADC + with an I2C Interface. + +properties: + compatible: + const: gehc,pmc-adc + + reg: + maxItems: 1 + + vdd-supply: + description: + Regulator for the VDD power supply. + + vdda-supply: + description: + Regulator for the VDD analog (VDDA) power supply. + + vddio-supply: + description: + Regulator for the VDD IO (VDDIO) power supply. + + vref-supply: + description: + Regulator for the voltage reference power supply. + + clocks: + maxItems: 1 + description: + The component uses an external oscillator (osc) if an external oscillator + is connected to its clock pins. Otherwise, it uses an internal reference + clock. + + clock-names: + items: + - const: osc + + "#io-channel-cells": + const: 2 + description: | + The first cell is the channel type (dt-bindings/iio/adc/gehc,pmc-adc.h + defines these values): + - 0: voltage + - 1: current + The second cell is the channel number from 0 to 15. + +required: + - compatible + - reg + - vdd-supply + - vdda-supply + - vddio-supply + - vref-supply + - '#io-channel-cells' + +additionalProperties: false + +examples: + - | + i2c { + #address-cells = <1>; + #size-cells = <0>; + + adc@14 { + compatible = "gehc,pmc-adc"; + reg = <0x14>; + vdd-supply = <®_vdd>; + vdda-supply = <®_vdda>; + vddio-supply = <®_vddio>; + vref-supply = <®_vref>; + #io-channel-cells = <2>; + }; + }; +... diff --git a/include/dt-bindings/iio/adc/gehc,pmc-adc.h b/include/dt-bindings/iio/adc/gehc,pmc-adc.h new file mode 100644 index 000000000000..2f291e3c76ae --- /dev/null +++ b/include/dt-bindings/iio/adc/gehc,pmc-adc.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */ + +#ifndef _DT_BINDINGS_IIO_ADC_GEHC_PMC_ADC_H +#define _DT_BINDINGS_IIO_ADC_GEHC_PMC_ADC_H + +/* ADC channel type */ +#define GEHC_PMC_ADC_VOLTAGE 0 +#define GEHC_PMC_ADC_CURRENT 1 + +#endif -- cgit v1.2.3 From 70c8fd00a9bd0509bbf7bccd9baea8bbd5ddc756 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 2 Oct 2024 17:27:16 -0400 Subject: timekeeping: Add interfaces for handling timestamps with a floor value Multigrain timestamps allow the kernel to use fine-grained timestamps when an inode's attributes is being actively observed via ->getattr(). With this support, it's possible for a file to get a fine-grained timestamp, and another modified after it to get a coarse-grained stamp that is earlier than the fine-grained time. If this happens then the files can appear to have been modified in reverse order, which breaks VFS ordering guarantees [1]. To prevent this, maintain a floor value for multigrain timestamps. Whenever a fine-grained timestamp is handed out, record it, and when later coarse-grained stamps are handed out, ensure they are not earlier than that value. If the coarse-grained timestamp is earlier than the fine-grained floor, return the floor value instead. Add a static singleton atomic64_t into timekeeper.c that is used to keep track of the latest fine-grained time ever handed out. This is tracked as a monotonic ktime_t value to ensure that it isn't affected by clock jumps. Because it is updated at different times than the rest of the timekeeper object, the floor value is managed independently of the timekeeper via a cmpxchg() operation, and sits on its own cacheline. Add two new public interfaces: - ktime_get_coarse_real_ts64_mg() fills a timespec64 with the later of the coarse-grained clock and the floor time - ktime_get_real_ts64_mg() gets the fine-grained clock value, and tries to swap it into the floor. A timespec64 is filled with the result. The floor value is global and updated via a single try_cmpxchg(). If that fails then the operation raced with a concurrent update. Any concurrent update must be later than the existing floor value, so any racing tasks can accept any resulting floor value without retrying. [1]: POSIX requires that files be stamped with realtime clock values, and makes no provision for dealing with backward clock jumps. If a backward realtime clock jump occurs, then files can appear to have been modified in reverse order. Signed-off-by: Jeff Layton Signed-off-by: Thomas Gleixner Tested-by: Randy Dunlap # documentation bits Acked-by: John Stultz Link: https://lore.kernel.org/all/20241002-mgtime-v10-1-d1c4717f5284@kernel.org --- include/linux/timekeeping.h | 4 ++ kernel/time/timekeeping.c | 104 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+) (limited to 'include') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index fc12a9ba2c88..7aa85246c183 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -45,6 +45,10 @@ extern void ktime_get_real_ts64(struct timespec64 *tv); extern void ktime_get_coarse_ts64(struct timespec64 *ts); extern void ktime_get_coarse_real_ts64(struct timespec64 *ts); +/* Multigrain timestamp interfaces */ +extern void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts); +extern void ktime_get_real_ts64_mg(struct timespec64 *ts); + void getboottime64(struct timespec64 *ts); /* diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 7e6f409bf311..441792c907fa 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -114,6 +114,23 @@ static struct tk_fast tk_fast_raw ____cacheline_aligned = { .base[1] = FAST_TK_INIT, }; +/* + * Multigrain timestamps require tracking the latest fine-grained timestamp + * that has been issued, and never returning a coarse-grained timestamp that is + * earlier than that value. + * + * mg_floor represents the latest fine-grained time that has been handed out as + * a file timestamp on the system. This is tracked as a monotonic ktime_t, and + * converted to a realtime clock value on an as-needed basis. + * + * Maintaining mg_floor ensures the multigrain interfaces never issue a + * timestamp earlier than one that has been previously issued. + * + * The exception to this rule is when there is a backward realtime clock jump. If + * such an event occurs, a timestamp can appear to be earlier than a previous one. + */ +static __cacheline_aligned_in_smp atomic64_t mg_floor; + static inline void tk_normalize_xtime(struct timekeeper *tk) { while (tk->tkr_mono.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_mono.shift)) { @@ -2394,6 +2411,93 @@ void ktime_get_coarse_real_ts64(struct timespec64 *ts) } EXPORT_SYMBOL(ktime_get_coarse_real_ts64); +/** + * ktime_get_coarse_real_ts64_mg - return latter of coarse grained time or floor + * @ts: timespec64 to be filled + * + * Fetch the global mg_floor value, convert it to realtime and compare it + * to the current coarse-grained time. Fill @ts with whichever is + * latest. Note that this is a filesystem-specific interface and should be + * avoided outside of that context. + */ +void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts) +{ + struct timekeeper *tk = &tk_core.timekeeper; + u64 floor = atomic64_read(&mg_floor); + ktime_t f_real, offset, coarse; + unsigned int seq; + + do { + seq = read_seqcount_begin(&tk_core.seq); + *ts = tk_xtime(tk); + offset = tk_core.timekeeper.offs_real; + } while (read_seqcount_retry(&tk_core.seq, seq)); + + coarse = timespec64_to_ktime(*ts); + f_real = ktime_add(floor, offset); + if (ktime_after(f_real, coarse)) + *ts = ktime_to_timespec64(f_real); +} + +/** + * ktime_get_real_ts64_mg - attempt to update floor value and return result + * @ts: pointer to the timespec to be set + * + * Get a monotonic fine-grained time value and attempt to swap it into + * mg_floor. If that succeeds then accept the new floor value. If it fails + * then another task raced in during the interim time and updated the + * floor. Since any update to the floor must be later than the previous + * floor, either outcome is acceptable. + * + * Typically this will be called after calling ktime_get_coarse_real_ts64_mg(), + * and determining that the resulting coarse-grained timestamp did not effect + * a change in ctime. Any more recent floor value would effect a change to + * ctime, so there is no need to retry the atomic64_try_cmpxchg() on failure. + * + * @ts will be filled with the latest floor value, regardless of the outcome of + * the cmpxchg. Note that this is a filesystem specific interface and should be + * avoided outside of that context. + */ +void ktime_get_real_ts64_mg(struct timespec64 *ts) +{ + struct timekeeper *tk = &tk_core.timekeeper; + ktime_t old = atomic64_read(&mg_floor); + ktime_t offset, mono; + unsigned int seq; + u64 nsecs; + + do { + seq = read_seqcount_begin(&tk_core.seq); + + ts->tv_sec = tk->xtime_sec; + mono = tk->tkr_mono.base; + nsecs = timekeeping_get_ns(&tk->tkr_mono); + offset = tk_core.timekeeper.offs_real; + } while (read_seqcount_retry(&tk_core.seq, seq)); + + mono = ktime_add_ns(mono, nsecs); + + /* + * Attempt to update the floor with the new time value. As any + * update must be later then the existing floor, and would effect + * a change to ctime from the perspective of the current task, + * accept the resulting floor value regardless of the outcome of + * the swap. + */ + if (atomic64_try_cmpxchg(&mg_floor, &old, mono)) { + ts->tv_nsec = 0; + timespec64_add_ns(ts, nsecs); + } else { + /* + * Another task changed mg_floor since "old" was fetched. + * "old" has been updated with the latest value of "mg_floor". + * That value is newer than the previous floor value, which + * is enough to effect a change to ctime. Accept it. + */ + *ts = ktime_to_timespec64(ktime_add(old, offset)); + } +} + void ktime_get_coarse_ts64(struct timespec64 *ts) { struct timekeeper *tk = &tk_core.timekeeper; -- cgit v1.2.3 From 96f9a366ec8abe027326d7aab84d64370019f0f1 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 2 Oct 2024 17:27:17 -0400 Subject: timekeeping: Add percpu counter for tracking floor swap events The mgtime_floor value is a global variable for tracking the latest fine-grained timestamp handed out. Because it's a global, track the number of times that a new floor value is assigned. Add a new percpu counter to the timekeeping code to track the number of floor swap events that have occurred. A later patch will add a debugfs file to display this counter alongside other stats involving multigrain timestamps. Signed-off-by: Jeff Layton Signed-off-by: Thomas Gleixner Tested-by: Randy Dunlap # documentation bits Link: https://lore.kernel.org/all/20241002-mgtime-v10-2-d1c4717f5284@kernel.org --- include/linux/timekeeping.h | 1 + kernel/time/timekeeping.c | 1 + kernel/time/timekeeping_debug.c | 13 +++++++++++++ kernel/time/timekeeping_internal.h | 15 +++++++++++++++ 4 files changed, 30 insertions(+) (limited to 'include') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 7aa85246c183..84a035e86ac8 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -48,6 +48,7 @@ extern void ktime_get_coarse_real_ts64(struct timespec64 *ts); /* Multigrain timestamp interfaces */ extern void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts); extern void ktime_get_real_ts64_mg(struct timespec64 *ts); +extern unsigned long timekeeping_get_mg_floor_swaps(void); void getboottime64(struct timespec64 *ts); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 441792c907fa..962b2a31f015 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -2487,6 +2487,7 @@ void ktime_get_real_ts64_mg(struct timespec64 *ts) if (atomic64_try_cmpxchg(&mg_floor, &old, mono)) { ts->tv_nsec = 0; timespec64_add_ns(ts, nsecs); + timekeeping_inc_mg_floor_swaps(); } else { /* * Another task changed mg_floor since "old" was fetched. diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c index b73e8850e58d..badeb222eab9 100644 --- a/kernel/time/timekeeping_debug.c +++ b/kernel/time/timekeeping_debug.c @@ -17,6 +17,9 @@ #define NUM_BINS 32 +/* Incremented every time mg_floor is updated */ +DEFINE_PER_CPU(unsigned long, timekeeping_mg_floor_swaps); + static unsigned int sleep_time_bin[NUM_BINS] = {0}; static int tk_debug_sleep_time_show(struct seq_file *s, void *data) @@ -53,3 +56,13 @@ void tk_debug_account_sleep_time(const struct timespec64 *t) (s64)t->tv_sec, t->tv_nsec / NSEC_PER_MSEC); } +unsigned long timekeeping_get_mg_floor_swaps(void) +{ + unsigned long sum = 0; + int cpu; + + for_each_possible_cpu(cpu) + sum += data_race(per_cpu(timekeeping_mg_floor_swaps, cpu)); + + return sum; +} diff --git a/kernel/time/timekeeping_internal.h b/kernel/time/timekeeping_internal.h index 4ca2787d1642..0bbae825bc02 100644 --- a/kernel/time/timekeeping_internal.h +++ b/kernel/time/timekeeping_internal.h @@ -10,9 +10,24 @@ * timekeeping debug functions */ #ifdef CONFIG_DEBUG_FS + +DECLARE_PER_CPU(unsigned long, timekeeping_mg_floor_swaps); + +static inline void timekeeping_inc_mg_floor_swaps(void) +{ + this_cpu_inc(timekeeping_mg_floor_swaps); +} + extern void tk_debug_account_sleep_time(const struct timespec64 *t); + #else + #define tk_debug_account_sleep_time(x) + +static inline void timekeeping_inc_mg_floor_swaps(void) +{ +} + #endif #ifdef CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE -- cgit v1.2.3 From 51976c6cd786151b6a1bdf8b8b3334beac0ba99c Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Fri, 27 Sep 2024 18:33:22 +0800 Subject: RDMA/core: Provide rdma_user_mmap_disassociate() to disassociate mmap pages Provide a new api rdma_user_mmap_disassociate() for drivers to disassociate mmap pages for a device. Since drivers can now disassociate mmaps by calling this api, introduce a new disassociation_lock to specifically prevent races between this disassociation process and new mmaps. And thus the old hw_destroy_rwsem is not needed in this api. Signed-off-by: Chengchang Tang Signed-off-by: Junxian Huang Link: https://patch.msgid.link/20240927103323.1897094-2-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/uverbs.h | 2 ++ drivers/infiniband/core/uverbs_main.c | 43 +++++++++++++++++++++++++++++++++-- include/rdma/ib_verbs.h | 8 +++++++ 3 files changed, 51 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 821d93c8f712..dfd2e5a86e6f 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -160,6 +160,8 @@ struct ib_uverbs_file { struct page *disassociate_page; struct xarray idr; + + struct mutex disassociation_lock; }; struct ib_uverbs_event { diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 94454186ed81..85cfc790a7bb 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -76,6 +76,7 @@ static dev_t dynamic_uverbs_dev; static DEFINE_IDA(uverbs_ida); static int ib_uverbs_add_one(struct ib_device *device); static void ib_uverbs_remove_one(struct ib_device *device, void *client_data); +static struct ib_client uverbs_client; static char *uverbs_devnode(const struct device *dev, umode_t *mode) { @@ -217,6 +218,7 @@ void ib_uverbs_release_file(struct kref *ref) if (file->disassociate_page) __free_pages(file->disassociate_page, 0); + mutex_destroy(&file->disassociation_lock); mutex_destroy(&file->umap_lock); mutex_destroy(&file->ucontext_lock); kfree(file); @@ -698,8 +700,13 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) ret = PTR_ERR(ucontext); goto out; } + + mutex_lock(&file->disassociation_lock); + vma->vm_ops = &rdma_umap_ops; ret = ucontext->device->ops.mmap(ucontext, vma); + + mutex_unlock(&file->disassociation_lock); out: srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); return ret; @@ -721,6 +728,8 @@ static void rdma_umap_open(struct vm_area_struct *vma) /* We are racing with disassociation */ if (!down_read_trylock(&ufile->hw_destroy_rwsem)) goto out_zap; + mutex_lock(&ufile->disassociation_lock); + /* * Disassociation already completed, the VMA should already be zapped. */ @@ -732,10 +741,12 @@ static void rdma_umap_open(struct vm_area_struct *vma) goto out_unlock; rdma_umap_priv_init(priv, vma, opriv->entry); + mutex_unlock(&ufile->disassociation_lock); up_read(&ufile->hw_destroy_rwsem); return; out_unlock: + mutex_unlock(&ufile->disassociation_lock); up_read(&ufile->hw_destroy_rwsem); out_zap: /* @@ -819,7 +830,7 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) { struct rdma_umap_priv *priv, *next_priv; - lockdep_assert_held(&ufile->hw_destroy_rwsem); + mutex_lock(&ufile->disassociation_lock); while (1) { struct mm_struct *mm = NULL; @@ -845,8 +856,10 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) break; } mutex_unlock(&ufile->umap_lock); - if (!mm) + if (!mm) { + mutex_unlock(&ufile->disassociation_lock); return; + } /* * The umap_lock is nested under mmap_lock since it used within @@ -876,7 +889,31 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) mmap_read_unlock(mm); mmput(mm); } + + mutex_unlock(&ufile->disassociation_lock); +} + +/** + * rdma_user_mmap_disassociate() - Revoke mmaps for a device + * @device: device to revoke + * + * This function should be called by drivers that need to disable mmaps for the + * device, for instance because it is going to be reset. + */ +void rdma_user_mmap_disassociate(struct ib_device *device) +{ + struct ib_uverbs_device *uverbs_dev = + ib_get_client_data(device, &uverbs_client); + struct ib_uverbs_file *ufile; + + mutex_lock(&uverbs_dev->lists_mutex); + list_for_each_entry(ufile, &uverbs_dev->uverbs_file_list, list) { + if (ufile->ucontext) + uverbs_user_mmap_disassociate(ufile); + } + mutex_unlock(&uverbs_dev->lists_mutex); } +EXPORT_SYMBOL(rdma_user_mmap_disassociate); /* * ib_uverbs_open() does not need the BKL: @@ -947,6 +984,8 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) mutex_init(&file->umap_lock); INIT_LIST_HEAD(&file->umaps); + mutex_init(&file->disassociation_lock); + filp->private_data = file; list_add_tail(&file->list, &dev->uverbs_file_list); mutex_unlock(&dev->lists_mutex); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index aa8ede439905..9cb8b5fe7eee 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2948,6 +2948,14 @@ int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext, size_t length, u32 min_pgoff, u32 max_pgoff); +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) +void rdma_user_mmap_disassociate(struct ib_device *device); +#else +static inline void rdma_user_mmap_disassociate(struct ib_device *device) +{ +} +#endif + static inline int rdma_user_mmap_entry_insert_exact(struct ib_ucontext *ucontext, struct rdma_user_mmap_entry *entry, -- cgit v1.2.3 From 2382d68d7d43873ba856baf567cab0d5c523f23b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 25 Sep 2024 15:31:38 +1000 Subject: sched: change wake_up_bit() and related function to expect unsigned long * wake_up_bit() currently allows a "void *". While this isn't strictly a problem as the address is never dereferenced, it is inconsistent with the corresponding wait_on_bit() which requires "unsigned long *" and does dereference the pointer. Any code that needs to wait for a change in something other than an unsigned long would be better served by wake_up_var()/wait_var_event(). This patch changes all related "void *" to "unsigned long *". Reported-by: Linus Torvalds Signed-off-by: NeilBrown Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20240925053405.3960701-2-neilb@suse.de --- include/linux/wait_bit.h | 16 ++++++++-------- kernel/sched/wait_bit.c | 12 ++++++------ 2 files changed, 14 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h index 7725b7579b78..48e123839892 100644 --- a/include/linux/wait_bit.h +++ b/include/linux/wait_bit.h @@ -8,7 +8,7 @@ #include struct wait_bit_key { - void *flags; + unsigned long *flags; int bit_nr; unsigned long timeout; }; @@ -23,14 +23,14 @@ struct wait_bit_queue_entry { typedef int wait_bit_action_f(struct wait_bit_key *key, int mode); -void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit); +void __wake_up_bit(struct wait_queue_head *wq_head, unsigned long *word, int bit); int __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode); int __wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode); -void wake_up_bit(void *word, int bit); -int out_of_line_wait_on_bit(void *word, int, wait_bit_action_f *action, unsigned int mode); -int out_of_line_wait_on_bit_timeout(void *word, int, wait_bit_action_f *action, unsigned int mode, unsigned long timeout); -int out_of_line_wait_on_bit_lock(void *word, int, wait_bit_action_f *action, unsigned int mode); -struct wait_queue_head *bit_waitqueue(void *word, int bit); +void wake_up_bit(unsigned long *word, int bit); +int out_of_line_wait_on_bit(unsigned long *word, int, wait_bit_action_f *action, unsigned int mode); +int out_of_line_wait_on_bit_timeout(unsigned long *word, int, wait_bit_action_f *action, unsigned int mode, unsigned long timeout); +int out_of_line_wait_on_bit_lock(unsigned long *word, int, wait_bit_action_f *action, unsigned int mode); +struct wait_queue_head *bit_waitqueue(unsigned long *word, int bit); extern void __init wait_bit_init(void); int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key); @@ -327,7 +327,7 @@ do { \ * You can use this helper if bitflags are manipulated atomically rather than * non-atomically under a lock. */ -static inline void clear_and_wake_up_bit(int bit, void *word) +static inline void clear_and_wake_up_bit(int bit, unsigned long *word) { clear_bit_unlock(bit, word); /* See wake_up_bit() for which memory barrier you need to use. */ diff --git a/kernel/sched/wait_bit.c b/kernel/sched/wait_bit.c index 134d7112ef71..058b0e18727e 100644 --- a/kernel/sched/wait_bit.c +++ b/kernel/sched/wait_bit.c @@ -9,7 +9,7 @@ static wait_queue_head_t bit_wait_table[WAIT_TABLE_SIZE] __cacheline_aligned; -wait_queue_head_t *bit_waitqueue(void *word, int bit) +wait_queue_head_t *bit_waitqueue(unsigned long *word, int bit) { const int shift = BITS_PER_LONG == 32 ? 5 : 6; unsigned long val = (unsigned long)word << shift | bit; @@ -55,7 +55,7 @@ __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_ } EXPORT_SYMBOL(__wait_on_bit); -int __sched out_of_line_wait_on_bit(void *word, int bit, +int __sched out_of_line_wait_on_bit(unsigned long *word, int bit, wait_bit_action_f *action, unsigned mode) { struct wait_queue_head *wq_head = bit_waitqueue(word, bit); @@ -66,7 +66,7 @@ int __sched out_of_line_wait_on_bit(void *word, int bit, EXPORT_SYMBOL(out_of_line_wait_on_bit); int __sched out_of_line_wait_on_bit_timeout( - void *word, int bit, wait_bit_action_f *action, + unsigned long *word, int bit, wait_bit_action_f *action, unsigned mode, unsigned long timeout) { struct wait_queue_head *wq_head = bit_waitqueue(word, bit); @@ -108,7 +108,7 @@ __wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue_entry } EXPORT_SYMBOL(__wait_on_bit_lock); -int __sched out_of_line_wait_on_bit_lock(void *word, int bit, +int __sched out_of_line_wait_on_bit_lock(unsigned long *word, int bit, wait_bit_action_f *action, unsigned mode) { struct wait_queue_head *wq_head = bit_waitqueue(word, bit); @@ -118,7 +118,7 @@ int __sched out_of_line_wait_on_bit_lock(void *word, int bit, } EXPORT_SYMBOL(out_of_line_wait_on_bit_lock); -void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit) +void __wake_up_bit(struct wait_queue_head *wq_head, unsigned long *word, int bit) { struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit); @@ -144,7 +144,7 @@ EXPORT_SYMBOL(__wake_up_bit); * may need to use a less regular barrier, such fs/inode.c's smp_mb(), * because spin_unlock() does not guarantee a memory barrier. */ -void wake_up_bit(void *word, int bit) +void wake_up_bit(unsigned long *word, int bit) { __wake_up_bit(bit_waitqueue(word, bit), word, bit); } -- cgit v1.2.3 From 3cdee6b359f134da22f7fd4606e0338413cfd79e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 25 Sep 2024 15:31:39 +1000 Subject: sched: Improve documentation for wake_up_bit/wait_on_bit family of functions This patch revises the documention for wake_up_bit(), clear_and_wake_up_bit(), and all the wait_on_bit() family of functions. The new documentation places less emphasis on the pool of waitqueues used (an implementation detail) and focuses instead on details of how the functions behave. The barriers included in the wait functions and clear_and_wake_up_bit() and those required for wake_up_bit() are spelled out more clearly. The error statuses returned are given explicitly. The fact that the wait_on_bit_lock() function sets the bit is made more obvious. Signed-off-by: NeilBrown Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20240925053405.3960701-3-neilb@suse.de --- include/linux/wait_bit.h | 159 +++++++++++++++++++++++++---------------------- kernel/sched/wait_bit.c | 34 ++++++---- 2 files changed, 107 insertions(+), 86 deletions(-) (limited to 'include') diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h index 48e123839892..723e7bf35747 100644 --- a/include/linux/wait_bit.h +++ b/include/linux/wait_bit.h @@ -53,19 +53,21 @@ extern int bit_wait_io_timeout(struct wait_bit_key *key, int mode); /** * wait_on_bit - wait for a bit to be cleared - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on + * @word: the address containing the bit being waited on + * @bit: the bit at that address being waited on * @mode: the task state to sleep in * - * There is a standard hashed waitqueue table for generic use. This - * is the part of the hashtable's accessor API that waits on a bit. - * For instance, if one were to have waiters on a bitflag, one would - * call wait_on_bit() in threads waiting for the bit to clear. - * One uses wait_on_bit() where one is waiting for the bit to clear, - * but has no intention of setting it. - * Returned value will be zero if the bit was cleared, or non-zero - * if the process received a signal and the mode permitted wakeup - * on that signal. + * Wait for the given bit in an unsigned long or bitmap (see DECLARE_BITMAP()) + * to be cleared. The clearing of the bit must be signalled with + * wake_up_bit(), often as clear_and_wake_up_bit(). + * + * The process will wait on a waitqueue selected by hash from a shared + * pool. It will only be woken on a wake_up for the target bit, even + * if other processes on the same queue are waiting for other bits. + * + * Returned value will be zero if the bit was cleared in which case the + * call has ACQUIRE semantics, or %-EINTR if the process received a + * signal and the mode permitted wake up on that signal. */ static inline int wait_on_bit(unsigned long *word, int bit, unsigned mode) @@ -80,17 +82,20 @@ wait_on_bit(unsigned long *word, int bit, unsigned mode) /** * wait_on_bit_io - wait for a bit to be cleared - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on + * @word: the address containing the bit being waited on + * @bit: the bit at that address being waited on * @mode: the task state to sleep in * - * Use the standard hashed waitqueue table to wait for a bit - * to be cleared. This is similar to wait_on_bit(), but calls - * io_schedule() instead of schedule() for the actual waiting. + * Wait for the given bit in an unsigned long or bitmap (see DECLARE_BITMAP()) + * to be cleared. The clearing of the bit must be signalled with + * wake_up_bit(), often as clear_and_wake_up_bit(). + * + * This is similar to wait_on_bit(), but calls io_schedule() instead of + * schedule() for the actual waiting. * - * Returned value will be zero if the bit was cleared, or non-zero - * if the process received a signal and the mode permitted wakeup - * on that signal. + * Returned value will be zero if the bit was cleared in which case the + * call has ACQUIRE semantics, or %-EINTR if the process received a + * signal and the mode permitted wake up on that signal. */ static inline int wait_on_bit_io(unsigned long *word, int bit, unsigned mode) @@ -104,19 +109,24 @@ wait_on_bit_io(unsigned long *word, int bit, unsigned mode) } /** - * wait_on_bit_timeout - wait for a bit to be cleared or a timeout elapses - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on + * wait_on_bit_timeout - wait for a bit to be cleared or a timeout to elapse + * @word: the address containing the bit being waited on + * @bit: the bit at that address being waited on * @mode: the task state to sleep in * @timeout: timeout, in jiffies * - * Use the standard hashed waitqueue table to wait for a bit - * to be cleared. This is similar to wait_on_bit(), except also takes a - * timeout parameter. + * Wait for the given bit in an unsigned long or bitmap (see + * DECLARE_BITMAP()) to be cleared, or for a timeout to expire. The + * clearing of the bit must be signalled with wake_up_bit(), often as + * clear_and_wake_up_bit(). * - * Returned value will be zero if the bit was cleared before the - * @timeout elapsed, or non-zero if the @timeout elapsed or process - * received a signal and the mode permitted wakeup on that signal. + * This is similar to wait_on_bit(), except it also takes a timeout + * parameter. + * + * Returned value will be zero if the bit was cleared in which case the + * call has ACQUIRE semantics, or %-EINTR if the process received a + * signal and the mode permitted wake up on that signal, or %-EAGAIN if the + * timeout elapsed. */ static inline int wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode, @@ -132,19 +142,21 @@ wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode, /** * wait_on_bit_action - wait for a bit to be cleared - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on + * @word: the address containing the bit waited on + * @bit: the bit at that address being waited on * @action: the function used to sleep, which may take special actions * @mode: the task state to sleep in * - * Use the standard hashed waitqueue table to wait for a bit - * to be cleared, and allow the waiting action to be specified. - * This is like wait_on_bit() but allows fine control of how the waiting - * is done. + * Wait for the given bit in an unsigned long or bitmap (see DECLARE_BITMAP()) + * to be cleared. The clearing of the bit must be signalled with + * wake_up_bit(), often as clear_and_wake_up_bit(). + * + * This is similar to wait_on_bit(), but calls @action() instead of + * schedule() for the actual waiting. * - * Returned value will be zero if the bit was cleared, or non-zero - * if the process received a signal and the mode permitted wakeup - * on that signal. + * Returned value will be zero if the bit was cleared in which case the + * call has ACQUIRE semantics, or the error code returned by @action if + * that call returned non-zero. */ static inline int wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action, @@ -157,23 +169,22 @@ wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action, } /** - * wait_on_bit_lock - wait for a bit to be cleared, when wanting to set it - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on + * wait_on_bit_lock - wait for a bit to be cleared, then set it + * @word: the address containing the bit being waited on + * @bit: the bit of the word being waited on and set * @mode: the task state to sleep in * - * There is a standard hashed waitqueue table for generic use. This - * is the part of the hashtable's accessor API that waits on a bit - * when one intends to set it, for instance, trying to lock bitflags. - * For instance, if one were to have waiters trying to set bitflag - * and waiting for it to clear before setting it, one would call - * wait_on_bit() in threads waiting to be able to set the bit. - * One uses wait_on_bit_lock() where one is waiting for the bit to - * clear with the intention of setting it, and when done, clearing it. + * Wait for the given bit in an unsigned long or bitmap (see + * DECLARE_BITMAP()) to be cleared. The clearing of the bit must be + * signalled with wake_up_bit(), often as clear_and_wake_up_bit(). As + * soon as it is clear, atomically set it and return. * - * Returns zero if the bit was (eventually) found to be clear and was - * set. Returns non-zero if a signal was delivered to the process and - * the @mode allows that signal to wake the process. + * This is similar to wait_on_bit(), but sets the bit before returning. + * + * Returned value will be zero if the bit was successfully set in which + * case the call has the same memory sequencing semantics as + * test_and_clear_bit(), or %-EINTR if the process received a signal and + * the mode permitted wake up on that signal. */ static inline int wait_on_bit_lock(unsigned long *word, int bit, unsigned mode) @@ -185,15 +196,18 @@ wait_on_bit_lock(unsigned long *word, int bit, unsigned mode) } /** - * wait_on_bit_lock_io - wait for a bit to be cleared, when wanting to set it - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on + * wait_on_bit_lock_io - wait for a bit to be cleared, then set it + * @word: the address containing the bit being waited on + * @bit: the bit of the word being waited on and set * @mode: the task state to sleep in * - * Use the standard hashed waitqueue table to wait for a bit - * to be cleared and then to atomically set it. This is similar - * to wait_on_bit(), but calls io_schedule() instead of schedule() - * for the actual waiting. + * Wait for the given bit in an unsigned long or bitmap (see + * DECLARE_BITMAP()) to be cleared. The clearing of the bit must be + * signalled with wake_up_bit(), often as clear_and_wake_up_bit(). As + * soon as it is clear, atomically set it and return. + * + * This is similar to wait_on_bit_lock(), but calls io_schedule() instead + * of schedule(). * * Returns zero if the bit was (eventually) found to be clear and was * set. Returns non-zero if a signal was delivered to the process and @@ -209,21 +223,19 @@ wait_on_bit_lock_io(unsigned long *word, int bit, unsigned mode) } /** - * wait_on_bit_lock_action - wait for a bit to be cleared, when wanting to set it - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on + * wait_on_bit_lock_action - wait for a bit to be cleared, then set it + * @word: the address containing the bit being waited on + * @bit: the bit of the word being waited on and set * @action: the function used to sleep, which may take special actions * @mode: the task state to sleep in * - * Use the standard hashed waitqueue table to wait for a bit - * to be cleared and then to set it, and allow the waiting action - * to be specified. - * This is like wait_on_bit() but allows fine control of how the waiting - * is done. + * This is similar to wait_on_bit_lock(), but calls @action() instead of + * schedule() for the actual waiting. * - * Returns zero if the bit was (eventually) found to be clear and was - * set. Returns non-zero if a signal was delivered to the process and - * the @mode allows that signal to wake the process. + * Returned value will be zero if the bit was successfully set in which + * case the call has the same memory sequencing semantics as + * test_and_clear_bit(), or the error code returned by @action if that + * call returned non-zero. */ static inline int wait_on_bit_lock_action(unsigned long *word, int bit, wait_bit_action_f *action, @@ -320,12 +332,13 @@ do { \ /** * clear_and_wake_up_bit - clear a bit and wake up anyone waiting on that bit - * * @bit: the bit of the word being waited on - * @word: the word being waited on, a kernel virtual address + * @word: the address containing the bit being waited on * - * You can use this helper if bitflags are manipulated atomically rather than - * non-atomically under a lock. + * The designated bit is cleared and any tasks waiting in wait_on_bit() + * or similar will be woken. This call has RELEASE semantics so that + * any changes to memory made before this call are guaranteed to be visible + * after the corresponding wait_on_bit() completes. */ static inline void clear_and_wake_up_bit(int bit, unsigned long *word) { diff --git a/kernel/sched/wait_bit.c b/kernel/sched/wait_bit.c index 058b0e18727e..bd2fc750fb1f 100644 --- a/kernel/sched/wait_bit.c +++ b/kernel/sched/wait_bit.c @@ -128,21 +128,29 @@ void __wake_up_bit(struct wait_queue_head *wq_head, unsigned long *word, int bit EXPORT_SYMBOL(__wake_up_bit); /** - * wake_up_bit - wake up a waiter on a bit - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on + * wake_up_bit - wake up waiters on a bit + * @word: the address containing the bit being waited on + * @bit: the bit at that address being waited on * - * There is a standard hashed waitqueue table for generic use. This - * is the part of the hash-table's accessor API that wakes up waiters - * on a bit. For instance, if one were to have waiters on a bitflag, - * one would call wake_up_bit() after clearing the bit. + * Wake up any process waiting in wait_on_bit() or similar for the + * given bit to be cleared. * - * In order for this to function properly, as it uses waitqueue_active() - * internally, some kind of memory barrier must be done prior to calling - * this. Typically, this will be smp_mb__after_atomic(), but in some - * cases where bitflags are manipulated non-atomically under a lock, one - * may need to use a less regular barrier, such fs/inode.c's smp_mb(), - * because spin_unlock() does not guarantee a memory barrier. + * The wake-up is sent to tasks in a waitqueue selected by hash from a + * shared pool. Only those tasks on that queue which have requested + * wake_up on this specific address and bit will be woken, and only if the + * bit is clear. + * + * In order for this to function properly there must be a full memory + * barrier after the bit is cleared and before this function is called. + * If the bit was cleared atomically, such as a by clear_bit() then + * smb_mb__after_atomic() can be used, othwewise smb_mb() is needed. + * If the bit was cleared with a fully-ordered operation, no further + * barrier is required. + * + * Normally the bit should be cleared by an operation with RELEASE + * semantics so that any changes to memory made before the bit is + * cleared are guaranteed to be visible after the matching wait_on_bit() + * completes. */ void wake_up_bit(unsigned long *word, int bit) { -- cgit v1.2.3 From bf39882edc798279765ca31751f6e679b50b97ef Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 25 Sep 2024 15:31:40 +1000 Subject: sched: Document wait_var_event() family of functions and wake_up_var() wake_up_var(), wait_var_event() and related interfaces are not documented but have important ordering requirements. This patch adds documentation and makes these requirements explicit. The return values for those wait_var_event_* functions which return a value are documented. Note that these are, perhaps surprisingly, sometimes different from comparable wait_on_bit() functions. Signed-off-by: NeilBrown Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20240925053405.3960701-4-neilb@suse.de --- include/linux/wait_bit.h | 71 ++++++++++++++++++++++++++++++++++++++++++++++++ kernel/sched/wait_bit.c | 30 ++++++++++++++++++++ 2 files changed, 101 insertions(+) (limited to 'include') diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h index 723e7bf35747..06ec99b90bf3 100644 --- a/include/linux/wait_bit.h +++ b/include/linux/wait_bit.h @@ -282,6 +282,22 @@ __out: __ret; \ ___wait_var_event(var, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ schedule()) +/** + * wait_var_event - wait for a variable to be updated and notified + * @var: the address of variable being waited on + * @condition: the condition to wait for + * + * Wait for a @condition to be true, only re-checking when a wake up is + * received for the given @var (an arbitrary kernel address which need + * not be directly related to the given condition, but usually is). + * + * The process will wait on a waitqueue selected by hash from a shared + * pool. It will only be woken on a wake_up for the given address. + * + * The condition should normally use smp_load_acquire() or a similarly + * ordered access to ensure that any changes to memory made before the + * condition became true will be visible after the wait completes. + */ #define wait_var_event(var, condition) \ do { \ might_sleep(); \ @@ -294,6 +310,24 @@ do { \ ___wait_var_event(var, condition, TASK_KILLABLE, 0, 0, \ schedule()) +/** + * wait_var_event_killable - wait for a variable to be updated and notified + * @var: the address of variable being waited on + * @condition: the condition to wait for + * + * Wait for a @condition to be true or a fatal signal to be received, + * only re-checking the condition when a wake up is received for the given + * @var (an arbitrary kernel address which need not be directly related + * to the given condition, but usually is). + * + * This is similar to wait_var_event() but returns a value which is + * 0 if the condition became true, or %-ERESTARTSYS if a fatal signal + * was received. + * + * The condition should normally use smp_load_acquire() or a similarly + * ordered access to ensure that any changes to memory made before the + * condition became true will be visible after the wait completes. + */ #define wait_var_event_killable(var, condition) \ ({ \ int __ret = 0; \ @@ -308,6 +342,26 @@ do { \ TASK_UNINTERRUPTIBLE, 0, timeout, \ __ret = schedule_timeout(__ret)) +/** + * wait_var_event_timeout - wait for a variable to be updated or a timeout to expire + * @var: the address of variable being waited on + * @condition: the condition to wait for + * @timeout: maximum time to wait in jiffies + * + * Wait for a @condition to be true or a timeout to expire, only + * re-checking the condition when a wake up is received for the given + * @var (an arbitrary kernel address which need not be directly related + * to the given condition, but usually is). + * + * This is similar to wait_var_event() but returns a value which is 0 if + * the timeout expired and the condition was still false, or the + * remaining time left in the timeout (but at least 1) if the condition + * was found to be true. + * + * The condition should normally use smp_load_acquire() or a similarly + * ordered access to ensure that any changes to memory made before the + * condition became true will be visible after the wait completes. + */ #define wait_var_event_timeout(var, condition, timeout) \ ({ \ long __ret = timeout; \ @@ -321,6 +375,23 @@ do { \ ___wait_var_event(var, condition, TASK_INTERRUPTIBLE, 0, 0, \ schedule()) +/** + * wait_var_event_killable - wait for a variable to be updated and notified + * @var: the address of variable being waited on + * @condition: the condition to wait for + * + * Wait for a @condition to be true or a signal to be received, only + * re-checking the condition when a wake up is received for the given + * @var (an arbitrary kernel address which need not be directly related + * to the given condition, but usually is). + * + * This is similar to wait_var_event() but returns a value which is 0 if + * the condition became true, or %-ERESTARTSYS if a signal was received. + * + * The condition should normally use smp_load_acquire() or a similarly + * ordered access to ensure that any changes to memory made before the + * condition became true will be visible after the wait completes. + */ #define wait_var_event_interruptible(var, condition) \ ({ \ int __ret = 0; \ diff --git a/kernel/sched/wait_bit.c b/kernel/sched/wait_bit.c index bd2fc750fb1f..22ec270f5ab5 100644 --- a/kernel/sched/wait_bit.c +++ b/kernel/sched/wait_bit.c @@ -196,6 +196,36 @@ void init_wait_var_entry(struct wait_bit_queue_entry *wbq_entry, void *var, int } EXPORT_SYMBOL(init_wait_var_entry); +/** + * wake_up_var - wake up waiters on a variable (kernel address) + * @var: the address of the variable being waited on + * + * Wake up any process waiting in wait_var_event() or similar for the + * given variable to change. wait_var_event() can be waiting for an + * arbitrary condition to be true and associates that condition with an + * address. Calling wake_up_var() suggests that the condition has been + * made true, but does not strictly require the condtion to use the + * address given. + * + * The wake-up is sent to tasks in a waitqueue selected by hash from a + * shared pool. Only those tasks on that queue which have requested + * wake_up on this specific address will be woken. + * + * In order for this to function properly there must be a full memory + * barrier after the variable is updated (or more accurately, after the + * condition waited on has been made to be true) and before this function + * is called. If the variable was updated atomically, such as a by + * atomic_dec() then smb_mb__after_atomic() can be used. If the + * variable was updated by a fully ordered operation such as + * atomic_dec_and_test() then no extra barrier is required. Otherwise + * smb_mb() is needed. + * + * Normally the variable should be updated (the condition should be made + * to be true) by an operation with RELEASE semantics such as + * smp_store_release() so that any changes to memory made before the + * variable was updated are guaranteed to be visible after the matching + * wait_var_event() completes. + */ void wake_up_var(void *var) { __wake_up_bit(__var_waitqueue(var), var, -1); -- cgit v1.2.3 From 52d633def56c10fe3e82a2c5d88c3ecb3f4e4852 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 25 Sep 2024 15:31:41 +1000 Subject: sched: Add test_and_clear_wake_up_bit() and atomic_dec_and_wake_up() There are common patterns in the kernel of using test_and_clear_bit() before wake_up_bit(), and atomic_dec_and_test() before wake_up_var(). These combinations don't need extra barriers but sometimes include them unnecessarily. To help avoid the unnecessary barriers and to help discourage the general use of wake_up_bit/var (which is a fragile interface) introduce two combined functions which implement these patterns. Also add store_release_wake_up() which supports the task of simply setting a non-atomic variable and sending a wakeup. This pattern requires barriers which are often omitted. Signed-off-by: NeilBrown Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20240925053405.3960701-5-neilb@suse.de --- include/linux/wait_bit.h | 60 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) (limited to 'include') diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h index 06ec99b90bf3..0272629b590a 100644 --- a/include/linux/wait_bit.h +++ b/include/linux/wait_bit.h @@ -419,4 +419,64 @@ static inline void clear_and_wake_up_bit(int bit, unsigned long *word) wake_up_bit(word, bit); } +/** + * test_and_clear_wake_up_bit - clear a bit if it was set: wake up anyone waiting on that bit + * @bit: the bit of the word being waited on + * @word: the address of memory containing that bit + * + * If the bit is set and can be atomically cleared, any tasks waiting in + * wait_on_bit() or similar will be woken. This call has the same + * complete ordering semantics as test_and_clear_bit(). Any changes to + * memory made before this call are guaranteed to be visible after the + * corresponding wait_on_bit() completes. + * + * Returns %true if the bit was successfully set and the wake up was sent. + */ +static inline bool test_and_clear_wake_up_bit(int bit, unsigned long *word) +{ + if (!test_and_clear_bit(bit, word)) + return false; + /* no extra barrier required */ + wake_up_bit(word, bit); + return true; +} + +/** + * atomic_dec_and_wake_up - decrement an atomic_t and if zero, wake up waiters + * @var: the variable to dec and test + * + * Decrements the atomic variable and if it reaches zero, send a wake_up to any + * processes waiting on the variable. + * + * This function has the same complete ordering semantics as atomic_dec_and_test. + * + * Returns %true is the variable reaches zero and the wake up was sent. + */ + +static inline bool atomic_dec_and_wake_up(atomic_t *var) +{ + if (!atomic_dec_and_test(var)) + return false; + /* No extra barrier required */ + wake_up_var(var); + return true; +} + +/** + * store_release_wake_up - update a variable and send a wake_up + * @var: the address of the variable to be updated and woken + * @val: the value to store in the variable. + * + * Store the given value in the variable send a wake up to any tasks + * waiting on the variable. All necessary barriers are included to ensure + * the task calling wait_var_event() sees the new value and all values + * written to memory before this call. + */ +#define store_release_wake_up(var, val) \ +do { \ + smp_store_release(var, val); \ + smp_mb(); \ + wake_up_var(var); \ +} while (0) + #endif /* _LINUX_WAIT_BIT_H */ -- cgit v1.2.3 From cc2e1c82d7e474753681a38b07b63034e107e369 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 25 Sep 2024 15:31:42 +1000 Subject: sched: Add wait/wake interface for variable updated under a lock. Sometimes we need to wait for a condition to be true which must be testing while holding a lock. Correspondingly the condition is made true while holding the lock and the wake up is sent under the lock. This patch provides wake and wait interfaces which can be used for this situation when the lock is a mutex or a spinlock, or any other lock for which there are foo_lock() and foo_unlock() functions. Signed-off-by: NeilBrown Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20240925053405.3960701-6-neilb@suse.de --- include/linux/wait_bit.h | 106 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) (limited to 'include') diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h index 0272629b590a..6aea10efca3d 100644 --- a/include/linux/wait_bit.h +++ b/include/linux/wait_bit.h @@ -401,6 +401,112 @@ do { \ __ret; \ }) +/** + * wait_var_event_any_lock - wait for a variable to be updated under a lock + * @var: the address of the variable being waited on + * @condition: condition to wait for + * @lock: the object that is locked to protect updates to the variable + * @type: prefix on lock and unlock operations + * @state: waiting state, %TASK_UNINTERRUPTIBLE etc. + * + * Wait for a condition which can only be reliably tested while holding + * a lock. The variables assessed in the condition will normal be updated + * under the same lock, and the wake up should be signalled with + * wake_up_var_locked() under the same lock. + * + * This is similar to wait_var_event(), but assumes a lock is held + * while calling this function and while updating the variable. + * + * This must be called while the given lock is held and the lock will be + * dropped when schedule() is called to wait for a wake up, and will be + * reclaimed before testing the condition again. The functions used to + * unlock and lock the object are constructed by appending _unlock and _lock + * to @type. + * + * Return %-ERESTARTSYS if a signal arrives which is allowed to interrupt + * the wait according to @state. + */ +#define wait_var_event_any_lock(var, condition, lock, type, state) \ +({ \ + int __ret = 0; \ + if (!(condition)) \ + __ret = ___wait_var_event(var, condition, state, 0, 0, \ + type ## _unlock(lock); \ + schedule(); \ + type ## _lock(lock)); \ + __ret; \ +}) + +/** + * wait_var_event_spinlock - wait for a variable to be updated under a spinlock + * @var: the address of the variable being waited on + * @condition: condition to wait for + * @lock: the spinlock which protects updates to the variable + * + * Wait for a condition which can only be reliably tested while holding + * a spinlock. The variables assessed in the condition will normal be updated + * under the same spinlock, and the wake up should be signalled with + * wake_up_var_locked() under the same spinlock. + * + * This is similar to wait_var_event(), but assumes a spinlock is held + * while calling this function and while updating the variable. + * + * This must be called while the given lock is held and the lock will be + * dropped when schedule() is called to wait for a wake up, and will be + * reclaimed before testing the condition again. + */ +#define wait_var_event_spinlock(var, condition, lock) \ + wait_var_event_any_lock(var, condition, lock, spin, TASK_UNINTERRUPTIBLE) + +/** + * wait_var_event_mutex - wait for a variable to be updated under a mutex + * @var: the address of the variable being waited on + * @condition: condition to wait for + * @mutex: the mutex which protects updates to the variable + * + * Wait for a condition which can only be reliably tested while holding + * a mutex. The variables assessed in the condition will normal be + * updated under the same mutex, and the wake up should be signalled + * with wake_up_var_locked() under the same mutex. + * + * This is similar to wait_var_event(), but assumes a mutex is held + * while calling this function and while updating the variable. + * + * This must be called while the given mutex is held and the mutex will be + * dropped when schedule() is called to wait for a wake up, and will be + * reclaimed before testing the condition again. + */ +#define wait_var_event_mutex(var, condition, lock) \ + wait_var_event_any_lock(var, condition, lock, mutex, TASK_UNINTERRUPTIBLE) + +/** + * wake_up_var_protected - wake up waiters for a variable asserting that it is safe + * @var: the address of the variable being waited on + * @cond: the condition which afirms this is safe + * + * When waking waiters which use wait_var_event_any_lock() the waker must be + * holding the reelvant lock to avoid races. This version of wake_up_var() + * asserts that the relevant lock is held and so no barrier is needed. + * The @cond is only tested when CONFIG_LOCKDEP is enabled. + */ +#define wake_up_var_protected(var, cond) \ +do { \ + lockdep_assert(cond); \ + wake_up_var(var); \ +} while (0) + +/** + * wake_up_var_locked - wake up waiters for a variable while holding a spinlock or mutex + * @var: the address of the variable being waited on + * @lock: The spinlock or mutex what protects the variable + * + * Send a wake up for the given variable which should be waited for with + * wait_var_event_spinlock() or wait_var_event_mutex(). Unlike wake_up_var(), + * no extra barriers are needed as the locking provides sufficient sequencing. + */ +#define wake_up_var_locked(var, lock) \ + wake_up_var_protected(var, lockdep_is_held(lock)) + /** * clear_and_wake_up_bit - clear a bit and wake up anyone waiting on that bit * @bit: the bit of the word being waited on -- cgit v1.2.3 From 80681c04c5e8e4297b9ebf201ca3ce6242aa16c3 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 25 Sep 2024 15:31:43 +1000 Subject: sched: add wait_var_event_io() It is not currently possible to wait wait_var_event for an io_schedule() style wait. This patch adds wait_var_event_io() for that purpose. Signed-off-by: NeilBrown Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20240925053405.3960701-7-neilb@suse.de --- include/linux/wait_bit.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include') diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h index 6aea10efca3d..6346e26fbfd1 100644 --- a/include/linux/wait_bit.h +++ b/include/linux/wait_bit.h @@ -281,6 +281,9 @@ __out: __ret; \ #define __wait_var_event(var, condition) \ ___wait_var_event(var, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ schedule()) +#define __wait_var_event_io(var, condition) \ + ___wait_var_event(var, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ + io_schedule()) /** * wait_var_event - wait for a variable to be updated and notified @@ -306,6 +309,34 @@ do { \ __wait_var_event(var, condition); \ } while (0) +/** + * wait_var_event_io - wait for a variable to be updated and notified + * @var: the address of variable being waited on + * @condition: the condition to wait for + * + * Wait for an IO related @condition to be true, only re-checking when a + * wake up is received for the given @var (an arbitrary kernel address + * which need not be directly related to the given condition, but + * usually is). + * + * The process will wait on a waitqueue selected by hash from a shared + * pool. It will only be woken on a wake_up for the given address. + * + * This is similar to wait_var_event(), but calls io_schedule() instead + * of schedule(). + * + * The condition should normally use smp_load_acquire() or a similarly + * ordered access to ensure that any changes to memory made before the + * condition became true will be visible after the wait completes. + */ +#define wait_var_event_io(var, condition) \ +do { \ + might_sleep(); \ + if (condition) \ + break; \ + __wait_var_event_io(var, condition); \ +} while (0) + #define __wait_var_event_killable(var, condition) \ ___wait_var_event(var, condition, TASK_KILLABLE, 0, 0, \ schedule()) -- cgit v1.2.3 From 5e9f0c4819deb9459f32f12c4fd2b47993b8c395 Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Mon, 30 Sep 2024 05:09:46 +0000 Subject: sched: remove unused __HAVE_THREAD_FUNCTIONS hook support __HAVE_THREAD_FUNCTIONS could be defined by architectures wishing to provide their own task_thread_info(), task_stack_page(), setup_thread_stack() and end_of_stack() hooks. Commit cf8e8658100d ("arch: Remove Itanium (IA-64) architecture") removed the last upstream consumer of __HAVE_THREAD_FUNCTIONS, so change the remaining !CONFIG_THREAD_INFO_IN_TASK && !__HAVE_THREAD_FUNCTIONS conditionals to only check for the former case. Signed-off-by: David Disseldorp Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Ard Biesheuvel Link: https://lkml.kernel.org/r/20240930050945.30304-2-ddiss@suse.de --- include/linux/sched.h | 2 +- include/linux/sched/task_stack.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index e6ee4258169a..abf26f1e1447 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1898,7 +1898,7 @@ extern unsigned long init_stack[THREAD_SIZE / sizeof(unsigned long)]; #ifdef CONFIG_THREAD_INFO_IN_TASK # define task_thread_info(task) (&(task)->thread_info) -#elif !defined(__HAVE_THREAD_FUNCTIONS) +#else # define task_thread_info(task) ((struct thread_info *)(task)->stack) #endif diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h index bf10bdb487dd..2e52cc421bce 100644 --- a/include/linux/sched/task_stack.h +++ b/include/linux/sched/task_stack.h @@ -33,7 +33,7 @@ static __always_inline unsigned long *end_of_stack(const struct task_struct *tas #endif } -#elif !defined(__HAVE_THREAD_FUNCTIONS) +#else #define task_stack_page(task) ((void *)(task)->stack) -- cgit v1.2.3 From 0ac8f14ef22a1592b44dc90272aab35e43b0106a Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 2 Oct 2024 00:40:16 +0100 Subject: sched/wait: Remove unused bit_wait_io_timeout bit_wait_io_timeout has been unused since 2016's commit 62906027091f ("mm: add PageWaiters indicating tasks are waiting for a page bit") Remove it. Signed-off-by: "Dr. David Alan Gilbert" Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Tim Chen Link: https://lore.kernel.org/r/20241001234016.231696-1-linux@treblig.org --- include/linux/wait_bit.h | 1 - kernel/sched/wait_bit.c | 14 -------------- 2 files changed, 15 deletions(-) (limited to 'include') diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h index 6346e26fbfd1..9e29d79fc790 100644 --- a/include/linux/wait_bit.h +++ b/include/linux/wait_bit.h @@ -49,7 +49,6 @@ int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync extern int bit_wait(struct wait_bit_key *key, int mode); extern int bit_wait_io(struct wait_bit_key *key, int mode); extern int bit_wait_timeout(struct wait_bit_key *key, int mode); -extern int bit_wait_io_timeout(struct wait_bit_key *key, int mode); /** * wait_on_bit - wait for a bit to be cleared diff --git a/kernel/sched/wait_bit.c b/kernel/sched/wait_bit.c index 22ec270f5ab5..b410b61cec95 100644 --- a/kernel/sched/wait_bit.c +++ b/kernel/sched/wait_bit.c @@ -266,20 +266,6 @@ __sched int bit_wait_timeout(struct wait_bit_key *word, int mode) } EXPORT_SYMBOL_GPL(bit_wait_timeout); -__sched int bit_wait_io_timeout(struct wait_bit_key *word, int mode) -{ - unsigned long now = READ_ONCE(jiffies); - - if (time_after_eq(now, word->timeout)) - return -EAGAIN; - io_schedule_timeout(word->timeout - now); - if (signal_pending_state(mode, current)) - return -EINTR; - - return 0; -} -EXPORT_SYMBOL_GPL(bit_wait_io_timeout); - void __init wait_bit_init(void) { int i; -- cgit v1.2.3 From 066c779b094b63754e0742ad8675d72d6c0a46f6 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 3 Oct 2024 18:48:19 +0300 Subject: platform/x86: intel_scu_ipc: Don't use "proxy" headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update header inclusions to follow IWYU (Include What You Use) principle. Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg Link: https://lore.kernel.org/r/20241003154819.1075141-1-andriy.shevchenko@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/linux/platform_data/x86/intel_scu_ipc.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/platform_data/x86/intel_scu_ipc.h b/include/linux/platform_data/x86/intel_scu_ipc.h index 0ca9962e97f2..b287627759f7 100644 --- a/include/linux/platform_data/x86/intel_scu_ipc.h +++ b/include/linux/platform_data/x86/intel_scu_ipc.h @@ -2,9 +2,13 @@ #ifndef __PLATFORM_X86_INTEL_SCU_IPC_H_ #define __PLATFORM_X86_INTEL_SCU_IPC_H_ +#include #include +#include struct device; +struct module; + struct intel_scu_ipc_dev; /** -- cgit v1.2.3 From 4e40eff0b5737c0de39e1ae5812509efbc0b986e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 2 Oct 2024 17:27:18 -0400 Subject: fs: add infrastructure for multigrain timestamps The VFS has always used coarse-grained timestamps when updating the ctime and mtime after a change. This has the benefit of allowing filesystems to optimize away a lot metadata updates, down to around 1 per jiffy, even when a file is under heavy writes. Unfortunately, this has always been an issue when we're exporting via NFSv3, which relies on timestamps to validate caches. A lot of changes can happen in a jiffy, so timestamps aren't sufficient to help the client decide when to invalidate the cache. Even with NFSv4, a lot of exported filesystems don't properly support a change attribute and are subject to the same problems with timestamp granularity. Other applications have similar issues with timestamps (e.g backup applications). If fine-grained timestamps were always used, that would improve the situation, but that becomes rather expensive, as the underlying filesystem would have to log a lot more metadata updates. What is needed is a way to only use fine-grained timestamps when they are being actively queried. Use the (unused) top bit in inode->i_ctime_nsec as a flag that indicates whether the current timestamps have been queried via stat() or the like. When it's set, allow the update to use a fine-grained timestamp iff it's necessary to make the ctime show a different value. If it has been queried, then first see whether the current coarse time is later than the existing ctime. If it is, accept that value. If it isn't, then get a fine-grained timestamp and attempt to stamp the inode ctime with that value. If that races with another concurrent stamp, then abandon the update and take the new value without retrying. Filesystems can opt into this by setting the FS_MGTIME fstype flag. Others should be unaffected (other than being subject to the same floor value as multigrain filesystems). Tested-by: Randy Dunlap # documentation bits Reviewed-by: Jan Kara Signed-off-by: Jeff Layton Link: https://lore.kernel.org/r/20241002-mgtime-v10-3-d1c4717f5284@kernel.org Signed-off-by: Christian Brauner --- fs/inode.c | 139 +++++++++++++++++++++++++++++++++++++++++++---------- fs/stat.c | 43 ++++++++++++++++- include/linux/fs.h | 34 ++++++++++--- 3 files changed, 181 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/fs/inode.c b/fs/inode.c index 10c4619faeef..53f56f6e1ff2 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -2172,19 +2172,58 @@ int file_remove_privs(struct file *file) } EXPORT_SYMBOL(file_remove_privs); +/** + * current_time - Return FS time (possibly fine-grained) + * @inode: inode. + * + * Return the current time truncated to the time granularity supported by + * the fs, as suitable for a ctime/mtime change. If the ctime is flagged + * as having been QUERIED, get a fine-grained timestamp, but don't update + * the floor. + * + * For a multigrain inode, this is effectively an estimate of the timestamp + * that a file would receive. An actual update must go through + * inode_set_ctime_current(). + */ +struct timespec64 current_time(struct inode *inode) +{ + struct timespec64 now; + u32 cns; + + ktime_get_coarse_real_ts64_mg(&now); + + if (!is_mgtime(inode)) + goto out; + + /* If nothing has queried it, then coarse time is fine */ + cns = smp_load_acquire(&inode->i_ctime_nsec); + if (cns & I_CTIME_QUERIED) { + /* + * If there is no apparent change, then get a fine-grained + * timestamp. + */ + if (now.tv_nsec == (cns & ~I_CTIME_QUERIED)) + ktime_get_real_ts64(&now); + } +out: + return timestamp_truncate(now, inode); +} +EXPORT_SYMBOL(current_time); + static int inode_needs_update_time(struct inode *inode) { + struct timespec64 now, ts; int sync_it = 0; - struct timespec64 now = current_time(inode); - struct timespec64 ts; /* First try to exhaust all avenues to not sync */ if (IS_NOCMTIME(inode)) return 0; + now = current_time(inode); + ts = inode_get_mtime(inode); if (!timespec64_equal(&ts, &now)) - sync_it = S_MTIME; + sync_it |= S_MTIME; ts = inode_get_ctime(inode); if (!timespec64_equal(&ts, &now)) @@ -2562,6 +2601,15 @@ void inode_nohighmem(struct inode *inode) } EXPORT_SYMBOL(inode_nohighmem); +struct timespec64 inode_set_ctime_to_ts(struct inode *inode, struct timespec64 ts) +{ + set_normalized_timespec64(&ts, ts.tv_sec, ts.tv_nsec); + inode->i_ctime_sec = ts.tv_sec; + inode->i_ctime_nsec = ts.tv_nsec; + return ts; +} +EXPORT_SYMBOL(inode_set_ctime_to_ts); + /** * timestamp_truncate - Truncate timespec to a granularity * @t: Timespec @@ -2594,36 +2642,77 @@ struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode) EXPORT_SYMBOL(timestamp_truncate); /** - * current_time - Return FS time - * @inode: inode. + * inode_set_ctime_current - set the ctime to current_time + * @inode: inode * - * Return the current time truncated to the time granularity supported by - * the fs. + * Set the inode's ctime to the current value for the inode. Returns the + * current value that was assigned. If this is not a multigrain inode, then we + * set it to the later of the coarse time and floor value. * - * Note that inode and inode->sb cannot be NULL. - * Otherwise, the function warns and returns time without truncation. + * If it is multigrain, then we first see if the coarse-grained timestamp is + * distinct from what is already there. If so, then use that. Otherwise, get a + * fine-grained timestamp. + * + * After that, try to swap the new value into i_ctime_nsec. Accept the + * resulting ctime, regardless of the outcome of the swap. If it has + * already been replaced, then that timestamp is later than the earlier + * unacceptable one, and is thus acceptable. */ -struct timespec64 current_time(struct inode *inode) +struct timespec64 inode_set_ctime_current(struct inode *inode) { struct timespec64 now; + u32 cns, cur; - ktime_get_coarse_real_ts64(&now); - return timestamp_truncate(now, inode); -} -EXPORT_SYMBOL(current_time); + ktime_get_coarse_real_ts64_mg(&now); + now = timestamp_truncate(now, inode); -/** - * inode_set_ctime_current - set the ctime to current_time - * @inode: inode - * - * Set the inode->i_ctime to the current value for the inode. Returns - * the current value that was assigned to i_ctime. - */ -struct timespec64 inode_set_ctime_current(struct inode *inode) -{ - struct timespec64 now = current_time(inode); + /* Just return that if this is not a multigrain fs */ + if (!is_mgtime(inode)) { + inode_set_ctime_to_ts(inode, now); + goto out; + } - inode_set_ctime_to_ts(inode, now); + /* + * A fine-grained time is only needed if someone has queried + * for timestamps, and the current coarse grained time isn't + * later than what's already there. + */ + cns = smp_load_acquire(&inode->i_ctime_nsec); + if (cns & I_CTIME_QUERIED) { + struct timespec64 ctime = { .tv_sec = inode->i_ctime_sec, + .tv_nsec = cns & ~I_CTIME_QUERIED }; + + if (timespec64_compare(&now, &ctime) <= 0) { + ktime_get_real_ts64_mg(&now); + now = timestamp_truncate(now, inode); + } + } + + /* No need to cmpxchg if it's exactly the same */ + if (cns == now.tv_nsec && inode->i_ctime_sec == now.tv_sec) + goto out; + cur = cns; +retry: + /* Try to swap the nsec value into place. */ + if (try_cmpxchg(&inode->i_ctime_nsec, &cur, now.tv_nsec)) { + /* If swap occurred, then we're (mostly) done */ + inode->i_ctime_sec = now.tv_sec; + } else { + /* + * Was the change due to someone marking the old ctime QUERIED? + * If so then retry the swap. This can only happen once since + * the only way to clear I_CTIME_QUERIED is to stamp the inode + * with a new ctime. + */ + if (!(cns & I_CTIME_QUERIED) && (cns | I_CTIME_QUERIED) == cur) { + cns = cur; + goto retry; + } + /* Otherwise, keep the existing ctime */ + now.tv_sec = inode->i_ctime_sec; + now.tv_nsec = cur & ~I_CTIME_QUERIED; + } +out: return now; } EXPORT_SYMBOL(inode_set_ctime_current); diff --git a/fs/stat.c b/fs/stat.c index 89ce1be56310..dd480bf51a2a 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -26,6 +26,39 @@ #include "internal.h" #include "mount.h" +/** + * fill_mg_cmtime - Fill in the mtime and ctime and flag ctime as QUERIED + * @stat: where to store the resulting values + * @request_mask: STATX_* values requested + * @inode: inode from which to grab the c/mtime + * + * Given @inode, grab the ctime and mtime out if it and store the result + * in @stat. When fetching the value, flag it as QUERIED (if not already) + * so the next write will record a distinct timestamp. + * + * NB: The QUERIED flag is tracked in the ctime, but we set it there even + * if only the mtime was requested, as that ensures that the next mtime + * change will be distinct. + */ +void fill_mg_cmtime(struct kstat *stat, u32 request_mask, struct inode *inode) +{ + atomic_t *pcn = (atomic_t *)&inode->i_ctime_nsec; + + /* If neither time was requested, then don't report them */ + if (!(request_mask & (STATX_CTIME|STATX_MTIME))) { + stat->result_mask &= ~(STATX_CTIME|STATX_MTIME); + return; + } + + stat->mtime = inode_get_mtime(inode); + stat->ctime.tv_sec = inode->i_ctime_sec; + stat->ctime.tv_nsec = (u32)atomic_read(pcn); + if (!(stat->ctime.tv_nsec & I_CTIME_QUERIED)) + stat->ctime.tv_nsec = ((u32)atomic_fetch_or(I_CTIME_QUERIED, pcn)); + stat->ctime.tv_nsec &= ~I_CTIME_QUERIED; +} +EXPORT_SYMBOL(fill_mg_cmtime); + /** * generic_fillattr - Fill in the basic attributes from the inode struct * @idmap: idmap of the mount the inode was found from @@ -58,8 +91,14 @@ void generic_fillattr(struct mnt_idmap *idmap, u32 request_mask, stat->rdev = inode->i_rdev; stat->size = i_size_read(inode); stat->atime = inode_get_atime(inode); - stat->mtime = inode_get_mtime(inode); - stat->ctime = inode_get_ctime(inode); + + if (is_mgtime(inode)) { + fill_mg_cmtime(stat, request_mask, inode); + } else { + stat->ctime = inode_get_ctime(inode); + stat->mtime = inode_get_mtime(inode); + } + stat->blksize = i_blocksize(inode); stat->blocks = inode->i_blocks; diff --git a/include/linux/fs.h b/include/linux/fs.h index 6ca11e241a24..eff688e75f2f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1613,6 +1613,17 @@ static inline struct timespec64 inode_set_mtime(struct inode *inode, return inode_set_mtime_to_ts(inode, ts); } +/* + * Multigrain timestamps + * + * Conditionally use fine-grained ctime and mtime timestamps when there + * are users actively observing them via getattr. The primary use-case + * for this is NFS clients that use the ctime to distinguish between + * different states of the file, and that are often fooled by multiple + * operations that occur in the same coarse-grained timer tick. + */ +#define I_CTIME_QUERIED ((u32)BIT(31)) + static inline time64_t inode_get_ctime_sec(const struct inode *inode) { return inode->i_ctime_sec; @@ -1620,7 +1631,7 @@ static inline time64_t inode_get_ctime_sec(const struct inode *inode) static inline long inode_get_ctime_nsec(const struct inode *inode) { - return inode->i_ctime_nsec; + return inode->i_ctime_nsec & ~I_CTIME_QUERIED; } static inline struct timespec64 inode_get_ctime(const struct inode *inode) @@ -1631,13 +1642,7 @@ static inline struct timespec64 inode_get_ctime(const struct inode *inode) return ts; } -static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode, - struct timespec64 ts) -{ - inode->i_ctime_sec = ts.tv_sec; - inode->i_ctime_nsec = ts.tv_nsec; - return ts; -} +struct timespec64 inode_set_ctime_to_ts(struct inode *inode, struct timespec64 ts); /** * inode_set_ctime - set the ctime in the inode @@ -2500,6 +2505,7 @@ struct file_system_type { #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ #define FS_DISALLOW_NOTIFY_PERM 16 /* Disable fanotify permission events */ #define FS_ALLOW_IDMAP 32 /* FS has been updated to handle vfs idmappings. */ +#define FS_MGTIME 64 /* FS uses multigrain timestamps */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ int (*init_fs_context)(struct fs_context *); const struct fs_parameter_spec *parameters; @@ -2523,6 +2529,17 @@ struct file_system_type { #define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME) +/** + * is_mgtime: is this inode using multigrain timestamps + * @inode: inode to test for multigrain timestamps + * + * Return true if the inode uses multigrain timestamps, false otherwise. + */ +static inline bool is_mgtime(const struct inode *inode) +{ + return inode->i_sb->s_type->fs_flags & FS_MGTIME; +} + extern struct dentry *mount_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, int (*fill_super)(struct super_block *, void *, int)); @@ -3262,6 +3279,7 @@ extern void page_put_link(void *); extern int page_symlink(struct inode *inode, const char *symname, int len); extern const struct inode_operations page_symlink_inode_operations; extern void kfree_link(void *); +void fill_mg_cmtime(struct kstat *stat, u32 request_mask, struct inode *inode); void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *); void generic_fill_statx_attr(struct inode *inode, struct kstat *stat); void generic_fill_statx_atomic_writes(struct kstat *stat, -- cgit v1.2.3 From fcd4904e2f6908d5c255fa5818bcf8ad32a6f0e8 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 5 Oct 2024 19:23:03 +0100 Subject: netfs: Remove call to folio_index() Calling folio_index() is pointless overhead; directly dereferencing folio->index is fine. Signed-off-by: Matthew Wilcox (Oracle) Link: https://lore.kernel.org/r/20241005182307.3190401-2-willy@infradead.org Signed-off-by: Christian Brauner --- include/trace/events/netfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 1d7c52821e55..72a208fd4496 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -451,7 +451,7 @@ TRACE_EVENT(netfs_folio, struct address_space *__m = READ_ONCE(folio->mapping); __entry->ino = __m ? __m->host->i_ino : 0; __entry->why = why; - __entry->index = folio_index(folio); + __entry->index = folio->index; __entry->nr = folio_nr_pages(folio); ), -- cgit v1.2.3 From 10488630e1072cc5feebf25d1be04505fafb59a5 Mon Sep 17 00:00:00 2001 From: Naveen Manohar Date: Mon, 7 Oct 2024 15:59:54 +0800 Subject: ASoC: intel/sdw_utils: refactor RT multifunction sdca speaker codecs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Merge spk_rtd_init for multifunction sdca codecs:rt712/rt722 Signed-off-by: Naveen Manohar Reviewed-by: Péter Ujfalusi Reviewed-by: Liam Girdwood Signed-off-by: Bard Liao Link: https://patch.msgid.link/20241007075955.12575-3-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- include/sound/soc_sdw_utils.h | 3 +- sound/soc/intel/common/soc-acpi-intel-ptl-match.c | 18 ++--- sound/soc/sdw_utils/Makefile | 3 +- sound/soc/sdw_utils/soc_sdw_rt712_sdca.c | 48 ------------- sound/soc/sdw_utils/soc_sdw_rt722_sdca.c | 41 ----------- sound/soc/sdw_utils/soc_sdw_rt_mf_sdca.c | 85 +++++++++++++++++++++++ sound/soc/sdw_utils/soc_sdw_utils.c | 4 +- 7 files changed, 98 insertions(+), 104 deletions(-) delete mode 100644 sound/soc/sdw_utils/soc_sdw_rt712_sdca.c delete mode 100644 sound/soc/sdw_utils/soc_sdw_rt722_sdca.c create mode 100644 sound/soc/sdw_utils/soc_sdw_rt_mf_sdca.c (limited to 'include') diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h index f68c1f193b3b..2374e6df4e58 100644 --- a/include/sound/soc_sdw_utils.h +++ b/include/sound/soc_sdw_utils.h @@ -234,8 +234,7 @@ int asoc_sdw_rt_sdca_jack_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_s int asoc_sdw_rt_amp_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); int asoc_sdw_rt700_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); int asoc_sdw_rt711_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); -int asoc_sdw_rt712_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); -int asoc_sdw_rt722_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); +int asoc_sdw_rt_mf_sdca_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); int asoc_sdw_rt5682_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); int asoc_sdw_cs42l42_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); int asoc_sdw_cs42l43_hs_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); diff --git a/sound/soc/intel/common/soc-acpi-intel-ptl-match.c b/sound/soc/intel/common/soc-acpi-intel-ptl-match.c index 7107f0151030..5ed905440e9d 100644 --- a/sound/soc/intel/common/soc-acpi-intel-ptl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-ptl-match.c @@ -36,10 +36,10 @@ static const struct snd_soc_acpi_endpoint single_endpoint = { }; /* - * RT722 is a multi-function codec, three endpoints are created for - * its headset, amp and dmic functions. + * Multi-function codecs with three endpoints created for + * headset, amp and dmic functions. */ -static const struct snd_soc_acpi_endpoint rt722_endpoints[] = { +static const struct snd_soc_acpi_endpoint rt_mf_endpoints[] = { { .num = 0, .aggregated = 0, @@ -72,8 +72,8 @@ static const struct snd_soc_acpi_adr_device rt711_sdca_0_adr[] = { static const struct snd_soc_acpi_adr_device rt722_0_single_adr[] = { { .adr = 0x000030025d072201ull, - .num_endpoints = ARRAY_SIZE(rt722_endpoints), - .endpoints = rt722_endpoints, + .num_endpoints = ARRAY_SIZE(rt_mf_endpoints), + .endpoints = rt_mf_endpoints, .name_prefix = "rt722" } }; @@ -81,8 +81,8 @@ static const struct snd_soc_acpi_adr_device rt722_0_single_adr[] = { static const struct snd_soc_acpi_adr_device rt722_1_single_adr[] = { { .adr = 0x000130025d072201ull, - .num_endpoints = ARRAY_SIZE(rt722_endpoints), - .endpoints = rt722_endpoints, + .num_endpoints = ARRAY_SIZE(rt_mf_endpoints), + .endpoints = rt_mf_endpoints, .name_prefix = "rt722" } }; @@ -90,8 +90,8 @@ static const struct snd_soc_acpi_adr_device rt722_1_single_adr[] = { static const struct snd_soc_acpi_adr_device rt722_3_single_adr[] = { { .adr = 0x000330025d072201ull, - .num_endpoints = ARRAY_SIZE(rt722_endpoints), - .endpoints = rt722_endpoints, + .num_endpoints = ARRAY_SIZE(rt_mf_endpoints), + .endpoints = rt_mf_endpoints, .name_prefix = "rt722" } }; diff --git a/sound/soc/sdw_utils/Makefile b/sound/soc/sdw_utils/Makefile index 28229ed96ffb..daf019113553 100644 --- a/sound/soc/sdw_utils/Makefile +++ b/sound/soc/sdw_utils/Makefile @@ -1,9 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only snd-soc-sdw-utils-y := soc_sdw_utils.o soc_sdw_dmic.o soc_sdw_rt_dmic.o \ soc_sdw_rt700.o soc_sdw_rt711.o \ - soc_sdw_rt712_sdca.o soc_sdw_rt722_sdca.o \ soc_sdw_rt5682.o soc_sdw_rt_sdca_jack_common.o \ - soc_sdw_rt_amp.o \ + soc_sdw_rt_amp.o soc_sdw_rt_mf_sdca.o \ soc_sdw_bridge_cs35l56.o \ soc_sdw_cs42l42.o soc_sdw_cs42l43.o \ soc_sdw_cs_amp.o \ diff --git a/sound/soc/sdw_utils/soc_sdw_rt712_sdca.c b/sound/soc/sdw_utils/soc_sdw_rt712_sdca.c deleted file mode 100644 index 5127210b9a03..000000000000 --- a/sound/soc/sdw_utils/soc_sdw_rt712_sdca.c +++ /dev/null @@ -1,48 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -// This file incorporates work covered by the following copyright notice: -// Copyright (c) 2023 Intel Corporation -// Copyright (c) 2024 Advanced Micro Devices, Inc. - -/* - * soc_sdw_rt712_sdca - Helpers to handle RT712-SDCA from generic machine driver - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * dapm routes for rt712 spk will be registered dynamically according - * to the number of rt712 spk used. The first two entries will be registered - * for one codec case, and the last two entries are also registered - * if two rt712s are used. - */ -static const struct snd_soc_dapm_route rt712_spk_map[] = { - { "Speaker", NULL, "rt712 SPOL" }, - { "Speaker", NULL, "rt712 SPOR" }, -}; - -int asoc_sdw_rt712_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai) -{ - struct snd_soc_card *card = rtd->card; - int ret; - - card->components = devm_kasprintf(card->dev, GFP_KERNEL, - "%s spk:rt712", - card->components); - if (!card->components) - return -ENOMEM; - - ret = snd_soc_dapm_add_routes(&card->dapm, rt712_spk_map, ARRAY_SIZE(rt712_spk_map)); - if (ret) - dev_err(rtd->dev, "failed to add SPK map: %d\n", ret); - - return ret; -} -EXPORT_SYMBOL_NS(asoc_sdw_rt712_spk_rtd_init, SND_SOC_SDW_UTILS); diff --git a/sound/soc/sdw_utils/soc_sdw_rt722_sdca.c b/sound/soc/sdw_utils/soc_sdw_rt722_sdca.c deleted file mode 100644 index 6a402172289f..000000000000 --- a/sound/soc/sdw_utils/soc_sdw_rt722_sdca.c +++ /dev/null @@ -1,41 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -// This file incorporates work covered by the following copyright notice: -// Copyright (c) 2023 Intel Corporation -// Copyright (c) 2024 Advanced Micro Devices, Inc. - -/* - * soc_sdw_rt722_sdca - Helpers to handle RT722-SDCA from generic machine driver - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static const struct snd_soc_dapm_route rt722_spk_map[] = { - { "Speaker", NULL, "rt722 SPK" }, -}; - -int asoc_sdw_rt722_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai) -{ - struct snd_soc_card *card = rtd->card; - int ret; - - card->components = devm_kasprintf(card->dev, GFP_KERNEL, - "%s spk:rt722", - card->components); - if (!card->components) - return -ENOMEM; - - ret = snd_soc_dapm_add_routes(&card->dapm, rt722_spk_map, ARRAY_SIZE(rt722_spk_map)); - if (ret) - dev_err(rtd->dev, "failed to add rt722 spk map: %d\n", ret); - - return ret; -} -EXPORT_SYMBOL_NS(asoc_sdw_rt722_spk_rtd_init, SND_SOC_SDW_UTILS); diff --git a/sound/soc/sdw_utils/soc_sdw_rt_mf_sdca.c b/sound/soc/sdw_utils/soc_sdw_rt_mf_sdca.c new file mode 100644 index 000000000000..8143d59ad10f --- /dev/null +++ b/sound/soc/sdw_utils/soc_sdw_rt_mf_sdca.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0-only +// This file incorporates work covered by the following copyright notice: +// Copyright (c) 2024 Intel Corporation. + +/* + * soc_sdw_rt_mf_sdca + * - Helpers to handle RT Multifunction Codec from generic machine driver + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CODEC_NAME_SIZE 6 + +/* dapm routes for RT-SPK will be registered dynamically */ +static const struct snd_soc_dapm_route rt712_spk_map[] = { + { "Speaker", NULL, "rt712 SPOL" }, + { "Speaker", NULL, "rt712 SPOR" }, +}; + +static const struct snd_soc_dapm_route rt722_spk_map[] = { + { "Speaker", NULL, "rt722 SPK" }, +}; + +/* Structure to map codec names to respective route arrays and sizes */ +struct codec_route_map { + const char *codec_name; + const struct snd_soc_dapm_route *route_map; + size_t route_size; +}; + +/* Codec route maps array */ +static const struct codec_route_map codec_routes[] = { + { "rt712", rt712_spk_map, ARRAY_SIZE(rt712_spk_map) }, + { "rt722", rt722_spk_map, ARRAY_SIZE(rt722_spk_map) }, +}; + +static const struct codec_route_map *get_codec_route_map(const char *codec_name) +{ + for (size_t i = 0; i < ARRAY_SIZE(codec_routes); i++) { + if (strcmp(codec_routes[i].codec_name, codec_name) == 0) + return &codec_routes[i]; + } + return NULL; +} + +int asoc_sdw_rt_mf_sdca_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai) +{ + struct snd_soc_card *card = rtd->card; + char codec_name[CODEC_NAME_SIZE]; + int ret; + + /* acquire codec name */ + snprintf(codec_name, CODEC_NAME_SIZE, "%s", dai->name); + + /* acquire corresponding route map and size */ + const struct codec_route_map *route_map = get_codec_route_map(codec_name); + + if (!route_map) { + dev_err(rtd->dev, "failed to get codec name and route map\n"); + return -EINVAL; + } + + /* Update card components */ + card->components = devm_kasprintf(card->dev, GFP_KERNEL, + "%s spk:%s", + card->components, codec_name); + if (!card->components) + return -ENOMEM; + + /* Add routes */ + ret = snd_soc_dapm_add_routes(&card->dapm, route_map->route_map, route_map->route_size); + if (ret) + dev_err(rtd->dev, "failed to add rt sdca spk map: %d\n", ret); + + return ret; +} +EXPORT_SYMBOL_NS(asoc_sdw_rt_mf_sdca_spk_rtd_init, SND_SOC_SDW_UTILS); diff --git a/sound/soc/sdw_utils/soc_sdw_utils.c b/sound/soc/sdw_utils/soc_sdw_utils.c index a6070f822eb9..3b1af6c81e83 100644 --- a/sound/soc/sdw_utils/soc_sdw_utils.c +++ b/sound/soc/sdw_utils/soc_sdw_utils.c @@ -138,7 +138,7 @@ struct asoc_sdw_codec_info codec_info_list[] = { .dailink = {SOC_SDW_AMP_OUT_DAI_ID, SOC_SDW_UNUSED_DAI_ID}, .init = asoc_sdw_rt_amp_init, .exit = asoc_sdw_rt_amp_exit, - .rtd_init = asoc_sdw_rt712_spk_rtd_init, + .rtd_init = asoc_sdw_rt_mf_sdca_spk_rtd_init, .controls = generic_spk_controls, .num_controls = ARRAY_SIZE(generic_spk_controls), .widgets = generic_spk_widgets, @@ -358,7 +358,7 @@ struct asoc_sdw_codec_info codec_info_list[] = { .dailink = {SOC_SDW_AMP_OUT_DAI_ID, SOC_SDW_UNUSED_DAI_ID}, .init = asoc_sdw_rt_amp_init, .exit = asoc_sdw_rt_amp_exit, - .rtd_init = asoc_sdw_rt722_spk_rtd_init, + .rtd_init = asoc_sdw_rt_mf_sdca_spk_rtd_init, .controls = generic_spk_controls, .num_controls = ARRAY_SIZE(generic_spk_controls), .widgets = generic_spk_widgets, -- cgit v1.2.3 From 8fd3395ec9051a52828fcca2328cb50a69dea8ef Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 31 Jul 2024 11:49:04 -0400 Subject: get rid of ...lookup...fdget_rcu() family Once upon a time, predecessors of those used to do file lookup without bumping a refcount, provided that caller held rcu_read_lock() across the lookup and whatever it wanted to read from the struct file found. When struct file allocation switched to SLAB_TYPESAFE_BY_RCU, that stopped being feasible and these primitives started to bump the file refcount for lookup result, requiring the caller to call fput() afterwards. But that turned them pointless - e.g. rcu_read_lock(); file = lookup_fdget_rcu(fd); rcu_read_unlock(); is equivalent to file = fget_raw(fd); and all callers of lookup_fdget_rcu() are of that form. Similarly, task_lookup_fdget_rcu() calls can be replaced with calling fget_task(). task_lookup_next_fdget_rcu() doesn't have direct counterparts, but its callers would be happier if we replaced it with an analogue that deals with RCU internally. Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- arch/powerpc/platforms/cell/spufs/coredump.c | 4 +--- fs/file.c | 28 ++++------------------------ fs/gfs2/glock.c | 12 ++---------- fs/notify/dnotify/dnotify.c | 5 +---- fs/proc/fd.c | 12 +++--------- include/linux/fdtable.h | 4 ---- include/linux/file.h | 1 + kernel/bpf/task_iter.c | 6 +----- kernel/kcmp.c | 4 +--- 9 files changed, 14 insertions(+), 62 deletions(-) (limited to 'include') diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c index 18daafbe2e65..301ee7d8b7df 100644 --- a/arch/powerpc/platforms/cell/spufs/coredump.c +++ b/arch/powerpc/platforms/cell/spufs/coredump.c @@ -73,9 +73,7 @@ static struct spu_context *coredump_next_context(int *fd) return NULL; *fd = n - 1; - rcu_read_lock(); - file = lookup_fdget_rcu(*fd); - rcu_read_unlock(); + file = fget_raw(*fd); if (file) { ctx = SPUFS_I(file_inode(file))->i_ctx; get_spu_context(ctx); diff --git a/fs/file.c b/fs/file.c index eb093e736972..991860ee7848 100644 --- a/fs/file.c +++ b/fs/file.c @@ -1037,29 +1037,7 @@ struct file *fget_task(struct task_struct *task, unsigned int fd) return file; } -struct file *lookup_fdget_rcu(unsigned int fd) -{ - return __fget_files_rcu(current->files, fd, 0); - -} -EXPORT_SYMBOL_GPL(lookup_fdget_rcu); - -struct file *task_lookup_fdget_rcu(struct task_struct *task, unsigned int fd) -{ - /* Must be called with rcu_read_lock held */ - struct files_struct *files; - struct file *file = NULL; - - task_lock(task); - files = task->files; - if (files) - file = __fget_files_rcu(files, fd, 0); - task_unlock(task); - - return file; -} - -struct file *task_lookup_next_fdget_rcu(struct task_struct *task, unsigned int *ret_fd) +struct file *fget_task_next(struct task_struct *task, unsigned int *ret_fd) { /* Must be called with rcu_read_lock held */ struct files_struct *files; @@ -1069,17 +1047,19 @@ struct file *task_lookup_next_fdget_rcu(struct task_struct *task, unsigned int * task_lock(task); files = task->files; if (files) { + rcu_read_lock(); for (; fd < files_fdtable(files)->max_fds; fd++) { file = __fget_files_rcu(files, fd, 0); if (file) break; } + rcu_read_unlock(); } task_unlock(task); *ret_fd = fd; return file; } -EXPORT_SYMBOL(task_lookup_next_fdget_rcu); +EXPORT_SYMBOL(fget_task_next); /* * Lightweight file lookup - no refcnt increment if fd table isn't shared. diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 269c3bc7fced..4701c4aafbf4 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include "gfs2.h" @@ -2768,25 +2767,18 @@ static struct file *gfs2_glockfd_next_file(struct gfs2_glockfd_iter *i) i->file = NULL; } - rcu_read_lock(); for(;; i->fd++) { - struct inode *inode; - - i->file = task_lookup_next_fdget_rcu(i->task, &i->fd); + i->file = fget_task_next(i->task, &i->fd); if (!i->file) { i->fd = 0; break; } - inode = file_inode(i->file); - if (inode->i_sb == i->sb) + if (file_inode(i->file)->i_sb == i->sb) break; - rcu_read_unlock(); fput(i->file); - rcu_read_lock(); } - rcu_read_unlock(); return i->file; } diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index d5dbef7f5c95..6004dfdfdf0f 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -16,7 +16,6 @@ #include #include #include -#include #include static int dir_notify_enable __read_mostly = 1; @@ -347,9 +346,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg) new_fsn_mark = NULL; } - rcu_read_lock(); - f = lookup_fdget_rcu(fd); - rcu_read_unlock(); + f = fget_raw(fd); /* if (f != filp) means that we lost a race and another task/thread * actually closed the fd we are still playing with before we grabbed diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 1f54a54bfb91..18d0dddc8e2f 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -116,9 +116,7 @@ static bool tid_fd_mode(struct task_struct *task, unsigned fd, fmode_t *mode) { struct file *file; - rcu_read_lock(); - file = task_lookup_fdget_rcu(task, fd); - rcu_read_unlock(); + file = fget_task(task, fd); if (file) { *mode = file->f_mode; fput(file); @@ -258,19 +256,17 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx, if (!dir_emit_dots(file, ctx)) goto out; - rcu_read_lock(); for (fd = ctx->pos - 2;; fd++) { struct file *f; struct fd_data data; char name[10 + 1]; unsigned int len; - f = task_lookup_next_fdget_rcu(p, &fd); + f = fget_task_next(p, &fd); ctx->pos = fd + 2LL; if (!f) break; data.mode = f->f_mode; - rcu_read_unlock(); fput(f); data.fd = fd; @@ -278,11 +274,9 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx, if (!proc_fill_cache(file, ctx, name, len, instantiate, p, &data)) - goto out; + break; cond_resched(); - rcu_read_lock(); } - rcu_read_unlock(); out: put_task_struct(p); return 0; diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index b1c5722f2b3c..e25e2cb65d30 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -92,10 +92,6 @@ static inline struct file *files_lookup_fd_locked(struct files_struct *files, un return files_lookup_fd_raw(files, fd); } -struct file *lookup_fdget_rcu(unsigned int fd); -struct file *task_lookup_fdget_rcu(struct task_struct *task, unsigned int fd); -struct file *task_lookup_next_fdget_rcu(struct task_struct *task, unsigned int *fd); - static inline bool close_on_exec(unsigned int fd, const struct files_struct *files) { return test_bit(fd, files_fdtable(files)->close_on_exec); diff --git a/include/linux/file.h b/include/linux/file.h index f98de143245a..ec4ad5e6a061 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -72,6 +72,7 @@ static inline void fdput(struct fd fd) extern struct file *fget(unsigned int fd); extern struct file *fget_raw(unsigned int fd); extern struct file *fget_task(struct task_struct *task, unsigned int fd); +extern struct file *fget_task_next(struct task_struct *task, unsigned int *fd); extern void __f_unlock_pos(struct file *); struct fd fdget(unsigned int fd); diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index 02aa9db8d796..7fe602ca74a0 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include @@ -286,17 +285,14 @@ again: curr_fd = 0; } - rcu_read_lock(); - f = task_lookup_next_fdget_rcu(curr_task, &curr_fd); + f = fget_task_next(curr_task, &curr_fd); if (f) { /* set info->fd */ info->fd = curr_fd; - rcu_read_unlock(); return f; } /* the current task is done, go to the next task */ - rcu_read_unlock(); put_task_struct(curr_task); if (info->common.type == BPF_TASK_ITER_TID) { diff --git a/kernel/kcmp.c b/kernel/kcmp.c index b0639f21041f..2c596851f8a9 100644 --- a/kernel/kcmp.c +++ b/kernel/kcmp.c @@ -63,9 +63,7 @@ get_file_raw_ptr(struct task_struct *task, unsigned int idx) { struct file *file; - rcu_read_lock(); - file = task_lookup_fdget_rcu(task, idx); - rcu_read_unlock(); + file = fget_task(task, idx); if (file) fput(file); -- cgit v1.2.3 From cab0515211f483e392d6862021ed008f49058561 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 2 Jun 2024 17:48:36 -0400 Subject: move close_range(2) into fs/file.c, fold __close_range() into it We never had callers for __close_range() except for close_range(2) itself. Nothing of that sort has appeared in four years and if any users do show up, we can always separate those suckers again. Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- fs/file.c | 6 ++++-- fs/open.c | 17 ----------------- include/linux/fdtable.h | 1 - 3 files changed, 4 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/fs/file.c b/fs/file.c index 8770010170c5..8e8f504782bf 100644 --- a/fs/file.c +++ b/fs/file.c @@ -713,7 +713,7 @@ static inline void __range_close(struct files_struct *files, unsigned int fd, } /** - * __close_range() - Close all file descriptors in a given range. + * sys_close_range() - Close all file descriptors in a given range. * * @fd: starting file descriptor to close * @max_fd: last file descriptor to close @@ -721,8 +721,10 @@ static inline void __range_close(struct files_struct *files, unsigned int fd, * * This closes a range of file descriptors. All file descriptors * from @fd up to and including @max_fd are closed. + * Currently, errors to close a given file descriptor are ignored. */ -int __close_range(unsigned fd, unsigned max_fd, unsigned int flags) +SYSCALL_DEFINE3(close_range, unsigned int, fd, unsigned int, max_fd, + unsigned int, flags) { struct task_struct *me = current; struct files_struct *cur_fds = me->files, *fds = NULL; diff --git a/fs/open.c b/fs/open.c index acaeb3e25c88..62dd1383d6f9 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1574,23 +1574,6 @@ SYSCALL_DEFINE1(close, unsigned int, fd) return retval; } -/** - * sys_close_range() - Close all file descriptors in a given range. - * - * @fd: starting file descriptor to close - * @max_fd: last file descriptor to close - * @flags: reserved for future extensions - * - * This closes a range of file descriptors. All file descriptors - * from @fd up to and including @max_fd are closed. - * Currently, errors to close a given file descriptor are ignored. - */ -SYSCALL_DEFINE3(close_range, unsigned int, fd, unsigned int, max_fd, - unsigned int, flags) -{ - return __close_range(fd, max_fd, flags); -} - /* * This routine simulates a hangup on the tty, to arrange that users * are given clean terminals at login time. diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index e25e2cb65d30..c45306a9f007 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -111,7 +111,6 @@ int iterate_fd(struct files_struct *, unsigned, const void *); extern int close_fd(unsigned int fd); -extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags); extern struct file *file_close_fd(unsigned int fd); extern struct kmem_cache *files_cachep; -- cgit v1.2.3 From 539770616521e5b046ca7612eb79ba11b53edb1d Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 3 Oct 2024 12:52:17 +0100 Subject: net: dsa: remove obsolete phylink dsa_switch operations No driver now uses the DSA switch phylink members, so we can now remove the method pointers, but we need to leave empty shim functions to allow those drivers that do not provide phylink MAC operations structure to continue functioning. Signed-off-by: Russell King (oracle) Reviewed-by: Vladimir Oltean Tested-by: Vladimir Oltean # sja1105, felix, dsa_loop Link: https://patch.msgid.link/E1swKNV-0060oN-1b@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 15 --------------- net/dsa/dsa.c | 8 -------- net/dsa/port.c | 34 +--------------------------------- 3 files changed, 1 insertion(+), 56 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index d7a6c2930277..72ae65e7246a 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -885,21 +885,6 @@ struct dsa_switch_ops { */ void (*phylink_get_caps)(struct dsa_switch *ds, int port, struct phylink_config *config); - struct phylink_pcs *(*phylink_mac_select_pcs)(struct dsa_switch *ds, - int port, - phy_interface_t iface); - void (*phylink_mac_config)(struct dsa_switch *ds, int port, - unsigned int mode, - const struct phylink_link_state *state); - void (*phylink_mac_link_down)(struct dsa_switch *ds, int port, - unsigned int mode, - phy_interface_t interface); - void (*phylink_mac_link_up)(struct dsa_switch *ds, int port, - unsigned int mode, - phy_interface_t interface, - struct phy_device *phydev, - int speed, int duplex, - bool tx_pause, bool rx_pause); void (*phylink_fixed_state)(struct dsa_switch *ds, int port, struct phylink_link_state *state); /* diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 1664547deffd..5a7c0e565a89 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -1505,14 +1505,6 @@ static int dsa_switch_probe(struct dsa_switch *ds) if (!ds->num_ports) return -EINVAL; - if (ds->phylink_mac_ops) { - if (ds->ops->phylink_mac_select_pcs || - ds->ops->phylink_mac_config || - ds->ops->phylink_mac_link_down || - ds->ops->phylink_mac_link_up) - return -EINVAL; - } - if (np) { err = dsa_switch_parse_of(ds, np); if (err) diff --git a/net/dsa/port.c b/net/dsa/port.c index 25258b33e59e..f1e96706a701 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -1579,40 +1579,19 @@ static struct phylink_pcs * dsa_port_phylink_mac_select_pcs(struct phylink_config *config, phy_interface_t interface) { - struct dsa_port *dp = dsa_phylink_to_port(config); - struct phylink_pcs *pcs = ERR_PTR(-EOPNOTSUPP); - struct dsa_switch *ds = dp->ds; - - if (ds->ops->phylink_mac_select_pcs) - pcs = ds->ops->phylink_mac_select_pcs(ds, dp->index, interface); - - return pcs; + return ERR_PTR(-EOPNOTSUPP); } static void dsa_port_phylink_mac_config(struct phylink_config *config, unsigned int mode, const struct phylink_link_state *state) { - struct dsa_port *dp = dsa_phylink_to_port(config); - struct dsa_switch *ds = dp->ds; - - if (!ds->ops->phylink_mac_config) - return; - - ds->ops->phylink_mac_config(ds, dp->index, mode, state); } static void dsa_port_phylink_mac_link_down(struct phylink_config *config, unsigned int mode, phy_interface_t interface) { - struct dsa_port *dp = dsa_phylink_to_port(config); - struct dsa_switch *ds = dp->ds; - - if (!ds->ops->phylink_mac_link_down) - return; - - ds->ops->phylink_mac_link_down(ds, dp->index, mode, interface); } static void dsa_port_phylink_mac_link_up(struct phylink_config *config, @@ -1622,14 +1601,6 @@ static void dsa_port_phylink_mac_link_up(struct phylink_config *config, int speed, int duplex, bool tx_pause, bool rx_pause) { - struct dsa_port *dp = dsa_phylink_to_port(config); - struct dsa_switch *ds = dp->ds; - - if (!ds->ops->phylink_mac_link_up) - return; - - ds->ops->phylink_mac_link_up(ds, dp->index, mode, interface, phydev, - speed, duplex, tx_pause, rx_pause); } static const struct phylink_mac_ops dsa_port_phylink_mac_ops = { @@ -1871,9 +1842,6 @@ static void dsa_shared_port_link_down(struct dsa_port *dp) if (ds->phylink_mac_ops && ds->phylink_mac_ops->mac_link_down) ds->phylink_mac_ops->mac_link_down(&dp->pl_config, MLO_AN_FIXED, PHY_INTERFACE_MODE_NA); - else if (ds->ops->phylink_mac_link_down) - ds->ops->phylink_mac_link_down(ds, dp->index, MLO_AN_FIXED, - PHY_INTERFACE_MODE_NA); } int dsa_shared_port_link_register_of(struct dsa_port *dp) -- cgit v1.2.3 From a3f5f4c2f9b6bc2aa6f5a3e8e23b7519e4f2e3e3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 4 Oct 2024 13:47:20 +0000 Subject: ipv4: remove fib_info_devhash[] Upcoming per-netns RTNL conversion needs to get rid of shared hash tables. fib_info_devhash[] is one of them. It is unclear why we used a hash table, because a single hlist_head per net device was cheaper and scalable. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Reviewed-by: David Ahern Link: https://patch.msgid.link/20241004134720.579244-5-edumazet@google.com Signed-off-by: Jakub Kicinski --- .../networking/net_cachelines/net_device.rst | 1 + include/linux/netdevice.h | 3 ++ net/ipv4/fib_semantics.c | 35 ++++++++++------------ 3 files changed, 19 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst index 49f03cb78c6e..556711c4d3cf 100644 --- a/Documentation/networking/net_cachelines/net_device.rst +++ b/Documentation/networking/net_cachelines/net_device.rst @@ -83,6 +83,7 @@ unsigned_int allmulti bool uc_promisc unsigned_char nested_level struct_in_device* ip_ptr read_mostly read_mostly __in_dev_get +struct hlist_head fib_nh_head struct_inet6_dev* ip6_ptr read_mostly read_mostly __in6_dev_get struct_vlan_info* vlan_info struct_dsa_port* dsa_ptr diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 49a7e7db0883..3baf8e539b6f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2211,6 +2211,9 @@ struct net_device { /* Protocol-specific pointers */ struct in_device __rcu *ip_ptr; + /** @fib_nh_head: nexthops associated with this netdev */ + struct hlist_head fib_nh_head; + #if IS_ENABLED(CONFIG_VLAN_8021Q) struct vlan_info __rcu *vlan_info; #endif diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index ece779bfb8f6..d2cee5c314f5 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -56,10 +56,6 @@ static unsigned int fib_info_hash_size; static unsigned int fib_info_hash_bits; static unsigned int fib_info_cnt; -#define DEVINDEX_HASHBITS 8 -#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS) -static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE]; - /* for_nexthops and change_nexthops only used when nexthop object * is not set in a fib_info. The logic within can reference fib_nh. */ @@ -319,12 +315,9 @@ static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi) return 0; } -static struct hlist_head * -fib_info_devhash_bucket(const struct net_device *dev) +static struct hlist_head *fib_nh_head(struct net_device *dev) { - u32 val = net_hash_mix(dev_net(dev)) ^ dev->ifindex; - - return &fib_info_devhash[hash_32(val, DEVINDEX_HASHBITS)]; + return &dev->fib_nh_head; } static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope, @@ -435,11 +428,11 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev) struct hlist_head *head; struct fib_nh *nh; - head = fib_info_devhash_bucket(dev); + head = fib_nh_head(dev); hlist_for_each_entry_rcu(nh, head, nh_hash) { - if (nh->fib_nh_dev == dev && - nh->fib_nh_gw4 == gw && + DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev); + if (nh->fib_nh_gw4 == gw && !(nh->fib_nh_flags & RTNH_F_DEAD)) { return 0; } @@ -1595,7 +1588,7 @@ link_it: if (!nexthop_nh->fib_nh_dev) continue; - head = fib_info_devhash_bucket(nexthop_nh->fib_nh_dev); + head = fib_nh_head(nexthop_nh->fib_nh_dev); hlist_add_head_rcu(&nexthop_nh->nh_hash, head); } endfor_nexthops(fi) } @@ -1948,12 +1941,12 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig) void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) { - struct hlist_head *head = fib_info_devhash_bucket(dev); + struct hlist_head *head = fib_nh_head(dev); struct fib_nh *nh; hlist_for_each_entry(nh, head, nh_hash) { - if (nh->fib_nh_dev == dev) - fib_nhc_update_mtu(&nh->nh_common, dev->mtu, orig_mtu); + DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev); + fib_nhc_update_mtu(&nh->nh_common, dev->mtu, orig_mtu); } } @@ -1967,7 +1960,7 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) */ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) { - struct hlist_head *head = fib_info_devhash_bucket(dev); + struct hlist_head *head = fib_nh_head(dev); struct fib_info *prev_fi = NULL; int scope = RT_SCOPE_NOWHERE; struct fib_nh *nh; @@ -1981,7 +1974,8 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) int dead; BUG_ON(!fi->fib_nhs); - if (nh->fib_nh_dev != dev || fi == prev_fi) + DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev); + if (fi == prev_fi) continue; prev_fi = fi; dead = 0; @@ -2131,7 +2125,7 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags) } prev_fi = NULL; - head = fib_info_devhash_bucket(dev); + head = fib_nh_head(dev); ret = 0; hlist_for_each_entry(nh, head, nh_hash) { @@ -2139,7 +2133,8 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags) int alive; BUG_ON(!fi->fib_nhs); - if (nh->fib_nh_dev != dev || fi == prev_fi) + DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev); + if (fi == prev_fi) continue; prev_fi = fi; -- cgit v1.2.3 From 83134ef4609388f6b9ca31a384f531155196c2a7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 4 Oct 2024 12:13:31 +0200 Subject: netkit: Add option for scrubbing skb meta data Jordan reported that when running Cilium with netkit in per-endpoint-routes mode, network policy misclassifies traffic. In this direct routing mode of Cilium which is used in case of GKE/EKS/AKS, the Pod's BPF program to enforce policy sits on the netkit primary device's egress side. The issue here is that in case of netkit's netkit_prep_forward(), it will clear meta data such as skb->mark and skb->priority before executing the BPF program. Thus, identity data stored in there from earlier BPF programs (e.g. from tcx ingress on the physical device) gets cleared instead of being made available for the primary's program to process. While for traffic egressing the Pod via the peer device this might be desired, this is different for the primary one where compared to tcx egress on the host veth this information would be available. To address this, add a new parameter for the device orchestration to allow control of skb->mark and skb->priority scrubbing, to make the two accessible from BPF (and eventually leave it up to the program to scrub). By default, the current behavior is retained. For netkit peer this also enables the use case where applications could cooperate/signal intent to the BPF program. Note that struct netkit has a 4 byte hole between policy and bundle which is used here, in other words, struct netkit's first cacheline content used in fast-path does not get moved around. Fixes: 35dfaad7188c ("netkit, bpf: Add bpf programmable net device") Reported-by: Jordan Rife Signed-off-by: Daniel Borkmann Cc: Nikolay Aleksandrov Link: https://github.com/cilium/cilium/issues/34042 Acked-by: Jakub Kicinski Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20241004101335.117711-1-daniel@iogearbox.net Signed-off-by: Martin KaFai Lau --- drivers/net/netkit.c | 68 +++++++++++++++++++++++++++++++++++--------- include/uapi/linux/if_link.h | 15 ++++++++++ 2 files changed, 70 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c index 059269557d92..fba2c734f0ec 100644 --- a/drivers/net/netkit.c +++ b/drivers/net/netkit.c @@ -20,6 +20,7 @@ struct netkit { struct net_device __rcu *peer; struct bpf_mprog_entry __rcu *active; enum netkit_action policy; + enum netkit_scrub scrub; struct bpf_mprog_bundle bundle; /* Needed in slow-path */ @@ -50,12 +51,24 @@ netkit_run(const struct bpf_mprog_entry *entry, struct sk_buff *skb, return ret; } -static void netkit_prep_forward(struct sk_buff *skb, bool xnet) +static void netkit_xnet(struct sk_buff *skb) { - skb_scrub_packet(skb, xnet); skb->priority = 0; + skb->mark = 0; +} + +static void netkit_prep_forward(struct sk_buff *skb, + bool xnet, bool xnet_scrub) +{ + skb_scrub_packet(skb, false); nf_skip_egress(skb, true); skb_reset_mac_header(skb); + if (!xnet) + return; + ipvs_reset(skb); + skb_clear_tstamp(skb); + if (xnet_scrub) + netkit_xnet(skb); } static struct netkit *netkit_priv(const struct net_device *dev) @@ -80,7 +93,8 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev) !pskb_may_pull(skb, ETH_HLEN) || skb_orphan_frags(skb, GFP_ATOMIC))) goto drop; - netkit_prep_forward(skb, !net_eq(dev_net(dev), dev_net(peer))); + netkit_prep_forward(skb, !net_eq(dev_net(dev), dev_net(peer)), + nk->scrub); eth_skb_pkt_type(skb, peer); skb->dev = peer; entry = rcu_dereference(nk->active); @@ -332,8 +346,10 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev, struct netlink_ext_ack *extack) { struct nlattr *peer_tb[IFLA_MAX + 1], **tbp = tb, *attr; - enum netkit_action default_prim = NETKIT_PASS; - enum netkit_action default_peer = NETKIT_PASS; + enum netkit_action policy_prim = NETKIT_PASS; + enum netkit_action policy_peer = NETKIT_PASS; + enum netkit_scrub scrub_prim = NETKIT_SCRUB_DEFAULT; + enum netkit_scrub scrub_peer = NETKIT_SCRUB_DEFAULT; enum netkit_mode mode = NETKIT_L3; unsigned char ifname_assign_type; struct ifinfomsg *ifmp = NULL; @@ -362,17 +378,21 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev, return err; tbp = peer_tb; } + if (data[IFLA_NETKIT_SCRUB]) + scrub_prim = nla_get_u32(data[IFLA_NETKIT_SCRUB]); + if (data[IFLA_NETKIT_PEER_SCRUB]) + scrub_peer = nla_get_u32(data[IFLA_NETKIT_PEER_SCRUB]); if (data[IFLA_NETKIT_POLICY]) { attr = data[IFLA_NETKIT_POLICY]; - default_prim = nla_get_u32(attr); - err = netkit_check_policy(default_prim, attr, extack); + policy_prim = nla_get_u32(attr); + err = netkit_check_policy(policy_prim, attr, extack); if (err < 0) return err; } if (data[IFLA_NETKIT_PEER_POLICY]) { attr = data[IFLA_NETKIT_PEER_POLICY]; - default_peer = nla_get_u32(attr); - err = netkit_check_policy(default_peer, attr, extack); + policy_peer = nla_get_u32(attr); + err = netkit_check_policy(policy_peer, attr, extack); if (err < 0) return err; } @@ -409,7 +429,8 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev, nk = netkit_priv(peer); nk->primary = false; - nk->policy = default_peer; + nk->policy = policy_peer; + nk->scrub = scrub_peer; nk->mode = mode; bpf_mprog_bundle_init(&nk->bundle); @@ -434,7 +455,8 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev, nk = netkit_priv(dev); nk->primary = true; - nk->policy = default_prim; + nk->policy = policy_prim; + nk->scrub = scrub_prim; nk->mode = mode; bpf_mprog_bundle_init(&nk->bundle); @@ -874,6 +896,18 @@ static int netkit_change_link(struct net_device *dev, struct nlattr *tb[], return -EACCES; } + if (data[IFLA_NETKIT_SCRUB]) { + NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_SCRUB], + "netkit scrubbing cannot be changed after device creation"); + return -EACCES; + } + + if (data[IFLA_NETKIT_PEER_SCRUB]) { + NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_PEER_SCRUB], + "netkit scrubbing cannot be changed after device creation"); + return -EACCES; + } + if (data[IFLA_NETKIT_PEER_INFO]) { NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_PEER_INFO], "netkit peer info cannot be changed after device creation"); @@ -908,8 +942,10 @@ static size_t netkit_get_size(const struct net_device *dev) { return nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_POLICY */ nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_PEER_POLICY */ - nla_total_size(sizeof(u8)) + /* IFLA_NETKIT_PRIMARY */ + nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_SCRUB */ + nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_PEER_SCRUB */ nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_MODE */ + nla_total_size(sizeof(u8)) + /* IFLA_NETKIT_PRIMARY */ 0; } @@ -924,11 +960,15 @@ static int netkit_fill_info(struct sk_buff *skb, const struct net_device *dev) return -EMSGSIZE; if (nla_put_u32(skb, IFLA_NETKIT_MODE, nk->mode)) return -EMSGSIZE; + if (nla_put_u32(skb, IFLA_NETKIT_SCRUB, nk->scrub)) + return -EMSGSIZE; if (peer) { nk = netkit_priv(peer); if (nla_put_u32(skb, IFLA_NETKIT_PEER_POLICY, nk->policy)) return -EMSGSIZE; + if (nla_put_u32(skb, IFLA_NETKIT_PEER_SCRUB, nk->scrub)) + return -EMSGSIZE; } return 0; @@ -936,9 +976,11 @@ static int netkit_fill_info(struct sk_buff *skb, const struct net_device *dev) static const struct nla_policy netkit_policy[IFLA_NETKIT_MAX + 1] = { [IFLA_NETKIT_PEER_INFO] = { .len = sizeof(struct ifinfomsg) }, - [IFLA_NETKIT_POLICY] = { .type = NLA_U32 }, [IFLA_NETKIT_MODE] = { .type = NLA_U32 }, + [IFLA_NETKIT_POLICY] = { .type = NLA_U32 }, [IFLA_NETKIT_PEER_POLICY] = { .type = NLA_U32 }, + [IFLA_NETKIT_SCRUB] = NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT), + [IFLA_NETKIT_PEER_SCRUB] = NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT), [IFLA_NETKIT_PRIMARY] = { .type = NLA_REJECT, .reject_message = "Primary attribute is read-only" }, }; diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 6dc258993b17..2acc7687e017 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1292,6 +1292,19 @@ enum netkit_mode { NETKIT_L3, }; +/* NETKIT_SCRUB_NONE leaves clearing skb->{mark,priority} up to + * the BPF program if attached. This also means the latter can + * consume the two fields if they were populated earlier. + * + * NETKIT_SCRUB_DEFAULT zeroes skb->{mark,priority} fields before + * invoking the attached BPF program when the peer device resides + * in a different network namespace. This is the default behavior. + */ +enum netkit_scrub { + NETKIT_SCRUB_NONE, + NETKIT_SCRUB_DEFAULT, +}; + enum { IFLA_NETKIT_UNSPEC, IFLA_NETKIT_PEER_INFO, @@ -1299,6 +1312,8 @@ enum { IFLA_NETKIT_POLICY, IFLA_NETKIT_PEER_POLICY, IFLA_NETKIT_MODE, + IFLA_NETKIT_SCRUB, + IFLA_NETKIT_PEER_SCRUB, __IFLA_NETKIT_MAX, }; #define IFLA_NETKIT_MAX (__IFLA_NETKIT_MAX - 1) -- cgit v1.2.3 From 20a4da20e0bd0297bf1ce083362e78b6702f991e Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 4 Oct 2024 11:01:00 +0200 Subject: net: phy: Add support for PHY timing-role configuration via device tree Introduce support for configuring the master/slave role of PHYs based on the `timing-role` property in the device tree. While this functionality is necessary for Single Pair Ethernet (SPE) PHYs (1000/100/10Base-T1) where hardware strap pins may be unavailable or incorrectly set, it works for any PHY type. Signed-off-by: Oleksij Rempel Reviewed-by: Russell King (Oracle) Reviewed-by: Florian Fainelli Reviewed-by: Divya Koppera Signed-off-by: Paolo Abeni --- drivers/net/phy/phy-core.c | 33 +++++++++++++++++++++++++++++++++ drivers/net/phy/phy_device.c | 3 +++ include/linux/phy.h | 1 + 3 files changed, 37 insertions(+) (limited to 'include') diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index 1f98b6a96c15..4e8db12d6092 100644 --- a/drivers/net/phy/phy-core.c +++ b/drivers/net/phy/phy-core.c @@ -412,6 +412,39 @@ void of_set_phy_eee_broken(struct phy_device *phydev) phydev->eee_broken_modes = broken; } +/** + * of_set_phy_timing_role - Set the master/slave mode of the PHY + * + * @phydev: The phy_device struct + * + * Set master/slave configuration of the PHY based on the device tree. + */ +void of_set_phy_timing_role(struct phy_device *phydev) +{ + struct device_node *node = phydev->mdio.dev.of_node; + const char *master; + + if (!IS_ENABLED(CONFIG_OF_MDIO)) + return; + + if (!node) + return; + + if (of_property_read_string(node, "timing-role", &master)) + return; + + if (strcmp(master, "forced-master") == 0) + phydev->master_slave_set = MASTER_SLAVE_CFG_MASTER_FORCE; + else if (strcmp(master, "forced-slave") == 0) + phydev->master_slave_set = MASTER_SLAVE_CFG_SLAVE_FORCE; + else if (strcmp(master, "preferred-master") == 0) + phydev->master_slave_set = MASTER_SLAVE_CFG_MASTER_PREFERRED; + else if (strcmp(master, "preferred-slave") == 0) + phydev->master_slave_set = MASTER_SLAVE_CFG_SLAVE_PREFERRED; + else + phydev_warn(phydev, "Unknown master-slave mode %s\n", master); +} + /** * phy_resolve_aneg_pause - Determine pause autoneg results * diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 560e338b307a..4ccf504a8b2c 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -3608,6 +3608,9 @@ static int phy_probe(struct device *dev) */ of_set_phy_eee_broken(phydev); + /* Get master/slave strap overrides */ + of_set_phy_timing_role(phydev); + /* The Pause Frame bits indicate that the PHY can support passing * pause frames. During autonegotiation, the PHYs will determine if * they should allow pause frames to pass. The MAC driver should then diff --git a/include/linux/phy.h b/include/linux/phy.h index a98bc91a0cde..ff762a3d8270 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1260,6 +1260,7 @@ size_t phy_speeds(unsigned int *speeds, size_t size, unsigned long *mask); void of_set_phy_supported(struct phy_device *phydev); void of_set_phy_eee_broken(struct phy_device *phydev); +void of_set_phy_timing_role(struct phy_device *phydev); int phy_speed_down_core(struct phy_device *phydev); /** -- cgit v1.2.3 From 95397784be23f66c5d4280b0a4c4e0d1ee74f2ef Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 2 Sep 2024 10:42:31 +0200 Subject: media: staging: drop omap4iss The omap4 camera driver has seen no progress since forever, and now OMAP4 support has also been dropped from u-boot (1). So it is time to retire this driver. (1): https://lists.denx.de/pipermail/u-boot/2024-July/558846.html Signed-off-by: Hans Verkuil Acked-by: Laurent Pinchart --- Documentation/admin-guide/media/omap4_camera.rst | 62 - Documentation/admin-guide/media/v4l-drivers.rst | 1 - MAINTAINERS | 8 - drivers/staging/media/Kconfig | 2 - drivers/staging/media/Makefile | 1 - drivers/staging/media/omap4iss/Kconfig | 12 - drivers/staging/media/omap4iss/Makefile | 9 - drivers/staging/media/omap4iss/TODO | 3 - drivers/staging/media/omap4iss/iss.c | 1354 --------------------- drivers/staging/media/omap4iss/iss.h | 247 ---- drivers/staging/media/omap4iss/iss_csi2.c | 1379 ---------------------- drivers/staging/media/omap4iss/iss_csi2.h | 155 --- drivers/staging/media/omap4iss/iss_csiphy.c | 277 ----- drivers/staging/media/omap4iss/iss_csiphy.h | 47 - drivers/staging/media/omap4iss/iss_ipipe.c | 579 --------- drivers/staging/media/omap4iss/iss_ipipe.h | 63 - drivers/staging/media/omap4iss/iss_ipipeif.c | 844 ------------- drivers/staging/media/omap4iss/iss_ipipeif.h | 89 -- drivers/staging/media/omap4iss/iss_regs.h | 899 -------------- drivers/staging/media/omap4iss/iss_resizer.c | 884 -------------- drivers/staging/media/omap4iss/iss_resizer.h | 72 -- drivers/staging/media/omap4iss/iss_video.c | 1274 -------------------- drivers/staging/media/omap4iss/iss_video.h | 203 ---- include/linux/platform_data/media/omap4iss.h | 66 -- 24 files changed, 8530 deletions(-) delete mode 100644 Documentation/admin-guide/media/omap4_camera.rst delete mode 100644 drivers/staging/media/omap4iss/Kconfig delete mode 100644 drivers/staging/media/omap4iss/Makefile delete mode 100644 drivers/staging/media/omap4iss/TODO delete mode 100644 drivers/staging/media/omap4iss/iss.c delete mode 100644 drivers/staging/media/omap4iss/iss.h delete mode 100644 drivers/staging/media/omap4iss/iss_csi2.c delete mode 100644 drivers/staging/media/omap4iss/iss_csi2.h delete mode 100644 drivers/staging/media/omap4iss/iss_csiphy.c delete mode 100644 drivers/staging/media/omap4iss/iss_csiphy.h delete mode 100644 drivers/staging/media/omap4iss/iss_ipipe.c delete mode 100644 drivers/staging/media/omap4iss/iss_ipipe.h delete mode 100644 drivers/staging/media/omap4iss/iss_ipipeif.c delete mode 100644 drivers/staging/media/omap4iss/iss_ipipeif.h delete mode 100644 drivers/staging/media/omap4iss/iss_regs.h delete mode 100644 drivers/staging/media/omap4iss/iss_resizer.c delete mode 100644 drivers/staging/media/omap4iss/iss_resizer.h delete mode 100644 drivers/staging/media/omap4iss/iss_video.c delete mode 100644 drivers/staging/media/omap4iss/iss_video.h delete mode 100644 include/linux/platform_data/media/omap4iss.h (limited to 'include') diff --git a/Documentation/admin-guide/media/omap4_camera.rst b/Documentation/admin-guide/media/omap4_camera.rst deleted file mode 100644 index 2ada9b1e6897..000000000000 --- a/Documentation/admin-guide/media/omap4_camera.rst +++ /dev/null @@ -1,62 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 - -OMAP4 ISS Driver -================ - -Author: Sergio Aguirre - -Copyright (C) 2012, Texas Instruments - -Introduction ------------- - -The OMAP44XX family of chips contains the Imaging SubSystem (a.k.a. ISS), -Which contains several components that can be categorized in 3 big groups: - -- Interfaces (2 Interfaces: CSI2-A & CSI2-B/CCP2) -- ISP (Image Signal Processor) -- SIMCOP (Still Image Coprocessor) - -For more information, please look in [#f1]_ for latest version of: -"OMAP4430 Multimedia Device Silicon Revision 2.x" - -As of Revision AB, the ISS is described in detail in section 8. - -This driver is supporting **only** the CSI2-A/B interfaces for now. - -It makes use of the Media Controller framework [#f2]_, and inherited most of the -code from OMAP3 ISP driver (found under drivers/media/platform/ti/omap3isp/\*), -except that it doesn't need an IOMMU now for ISS buffers memory mapping. - -Supports usage of MMAP buffers only (for now). - -Tested platforms ----------------- - -- OMAP4430SDP, w/ ES2.1 GP & SEVM4430-CAM-V1-0 (Contains IMX060 & OV5640, in - which only the last one is supported, outputting YUV422 frames). - -- TI Blaze MDP, w/ OMAP4430 ES2.2 EMU (Contains 1 IMX060 & 2 OV5650 sensors, in - which only the OV5650 are supported, outputting RAW10 frames). - -- PandaBoard, Rev. A2, w/ OMAP4430 ES2.1 GP & OV adapter board, tested with - following sensors: - * OV5640 - * OV5650 - -- Tested on mainline kernel: - - http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;a=summary - - Tag: v3.3 (commit c16fa4f2ad19908a47c63d8fa436a1178438c7e7) - -File list ---------- -drivers/staging/media/omap4iss/ -include/linux/platform_data/media/omap4iss.h - -References ----------- - -.. [#f1] http://focus.ti.com/general/docs/wtbu/wtbudocumentcenter.tsp?navigationId=12037&templateId=6123#62 -.. [#f2] http://lwn.net/Articles/420485/ diff --git a/Documentation/admin-guide/media/v4l-drivers.rst b/Documentation/admin-guide/media/v4l-drivers.rst index b6af448b9fe9..7d9029bbf109 100644 --- a/Documentation/admin-guide/media/v4l-drivers.rst +++ b/Documentation/admin-guide/media/v4l-drivers.rst @@ -20,7 +20,6 @@ Video4Linux (V4L) driver-specific documentation ivtv mgb4 omap3isp - omap4_camera philips qcom_camss raspberrypi-pisp-be diff --git a/MAINTAINERS b/MAINTAINERS index c27f3190737f..251ae8a52066 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16932,14 +16932,6 @@ S: Maintained F: Documentation/devicetree/bindings/i2c/ti,omap4-i2c.yaml F: drivers/i2c/busses/i2c-omap.c -OMAP IMAGING SUBSYSTEM (OMAP3 ISP and OMAP4 ISS) -M: Laurent Pinchart -L: linux-media@vger.kernel.org -S: Maintained -F: Documentation/devicetree/bindings/media/ti,omap3isp.txt -F: drivers/media/platform/ti/omap3isp/ -F: drivers/staging/media/omap4iss/ - OMAP MMC SUPPORT M: Aaro Koskinen L: linux-omap@vger.kernel.org diff --git a/drivers/staging/media/Kconfig b/drivers/staging/media/Kconfig index 554c2e475ce3..b44214854399 100644 --- a/drivers/staging/media/Kconfig +++ b/drivers/staging/media/Kconfig @@ -32,8 +32,6 @@ source "drivers/staging/media/max96712/Kconfig" source "drivers/staging/media/meson/vdec/Kconfig" -source "drivers/staging/media/omap4iss/Kconfig" - source "drivers/staging/media/rkvdec/Kconfig" source "drivers/staging/media/starfive/Kconfig" diff --git a/drivers/staging/media/Makefile b/drivers/staging/media/Makefile index dcaeeca0ee6d..ad4e9619a9e0 100644 --- a/drivers/staging/media/Makefile +++ b/drivers/staging/media/Makefile @@ -4,7 +4,6 @@ obj-$(CONFIG_INTEL_ATOMISP) += atomisp/ obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx/ obj-$(CONFIG_VIDEO_MAX96712) += max96712/ obj-$(CONFIG_VIDEO_MESON_VDEC) += meson/vdec/ -obj-$(CONFIG_VIDEO_OMAP4) += omap4iss/ obj-$(CONFIG_VIDEO_ROCKCHIP_VDEC) += rkvdec/ obj-$(CONFIG_VIDEO_STARFIVE_CAMSS) += starfive/ obj-$(CONFIG_VIDEO_SUNXI) += sunxi/ diff --git a/drivers/staging/media/omap4iss/Kconfig b/drivers/staging/media/omap4iss/Kconfig deleted file mode 100644 index 6d1f55b09132..000000000000 --- a/drivers/staging/media/omap4iss/Kconfig +++ /dev/null @@ -1,12 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 - -config VIDEO_OMAP4 - tristate "OMAP 4 Camera support" - depends on VIDEO_DEV && I2C - depends on ARCH_OMAP4 || COMPILE_TEST - select MEDIA_CONTROLLER - select VIDEO_V4L2_SUBDEV_API - select MFD_SYSCON - select VIDEOBUF2_DMA_CONTIG - help - Driver for an OMAP 4 ISS controller. diff --git a/drivers/staging/media/omap4iss/Makefile b/drivers/staging/media/omap4iss/Makefile deleted file mode 100644 index e64d489a4a76..000000000000 --- a/drivers/staging/media/omap4iss/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# Makefile for OMAP4 ISS driver -# - -omap4-iss-objs += \ - iss.o iss_csi2.o iss_csiphy.o iss_ipipeif.o iss_ipipe.o iss_resizer.o iss_video.o - -obj-$(CONFIG_VIDEO_OMAP4) += omap4-iss.o diff --git a/drivers/staging/media/omap4iss/TODO b/drivers/staging/media/omap4iss/TODO deleted file mode 100644 index 4d220ef82653..000000000000 --- a/drivers/staging/media/omap4iss/TODO +++ /dev/null @@ -1,3 +0,0 @@ -* Fix FIFO/buffer overflows and underflows -* Replace dummy resizer code with a real implementation -* Fix checkpatch errors and warnings diff --git a/drivers/staging/media/omap4iss/iss.c b/drivers/staging/media/omap4iss/iss.c deleted file mode 100644 index 0c4283bb48ad..000000000000 --- a/drivers/staging/media/omap4iss/iss.c +++ /dev/null @@ -1,1354 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * TI OMAP4 ISS V4L2 Driver - * - * Copyright (C) 2012, Texas Instruments - * - * Author: Sergio Aguirre - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "iss.h" -#include "iss_regs.h" - -#define ISS_PRINT_REGISTER(iss, name)\ - dev_dbg(iss->dev, "###ISS " #name "=0x%08x\n", \ - iss_reg_read(iss, OMAP4_ISS_MEM_TOP, ISS_##name)) - -static void iss_print_status(struct iss_device *iss) -{ - dev_dbg(iss->dev, "-------------ISS HL Register dump-------------\n"); - - ISS_PRINT_REGISTER(iss, HL_REVISION); - ISS_PRINT_REGISTER(iss, HL_SYSCONFIG); - ISS_PRINT_REGISTER(iss, HL_IRQSTATUS(5)); - ISS_PRINT_REGISTER(iss, HL_IRQENABLE_SET(5)); - ISS_PRINT_REGISTER(iss, HL_IRQENABLE_CLR(5)); - ISS_PRINT_REGISTER(iss, CTRL); - ISS_PRINT_REGISTER(iss, CLKCTRL); - ISS_PRINT_REGISTER(iss, CLKSTAT); - - dev_dbg(iss->dev, "-----------------------------------------------\n"); -} - -/* - * omap4iss_flush - Post pending L3 bus writes by doing a register readback - * @iss: OMAP4 ISS device - * - * In order to force posting of pending writes, we need to write and - * readback the same register, in this case the revision register. - * - * See this link for reference: - * https://www.mail-archive.com/linux-omap@vger.kernel.org/msg08149.html - */ -static void omap4iss_flush(struct iss_device *iss) -{ - iss_reg_write(iss, OMAP4_ISS_MEM_TOP, ISS_HL_REVISION, 0); - iss_reg_read(iss, OMAP4_ISS_MEM_TOP, ISS_HL_REVISION); -} - -/* - * iss_isp_enable_interrupts - Enable ISS ISP interrupts. - * @iss: OMAP4 ISS device - */ -static void omap4iss_isp_enable_interrupts(struct iss_device *iss) -{ - static const u32 isp_irq = ISP5_IRQ_OCP_ERR | - ISP5_IRQ_RSZ_FIFO_IN_BLK_ERR | - ISP5_IRQ_RSZ_FIFO_OVF | - ISP5_IRQ_RSZ_INT_DMA | - ISP5_IRQ_ISIF_INT(0); - - /* Enable ISP interrupts */ - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_SYS1, ISP5_IRQSTATUS(0), isp_irq); - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_SYS1, ISP5_IRQENABLE_SET(0), - isp_irq); -} - -/* - * iss_isp_disable_interrupts - Disable ISS interrupts. - * @iss: OMAP4 ISS device - */ -static void omap4iss_isp_disable_interrupts(struct iss_device *iss) -{ - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_SYS1, ISP5_IRQENABLE_CLR(0), ~0); -} - -/* - * iss_enable_interrupts - Enable ISS interrupts. - * @iss: OMAP4 ISS device - */ -static void iss_enable_interrupts(struct iss_device *iss) -{ - static const u32 hl_irq = ISS_HL_IRQ_CSIA | ISS_HL_IRQ_CSIB - | ISS_HL_IRQ_ISP(0); - - /* Enable HL interrupts */ - iss_reg_write(iss, OMAP4_ISS_MEM_TOP, ISS_HL_IRQSTATUS(5), hl_irq); - iss_reg_write(iss, OMAP4_ISS_MEM_TOP, ISS_HL_IRQENABLE_SET(5), hl_irq); - - if (iss->regs[OMAP4_ISS_MEM_ISP_SYS1]) - omap4iss_isp_enable_interrupts(iss); -} - -/* - * iss_disable_interrupts - Disable ISS interrupts. - * @iss: OMAP4 ISS device - */ -static void iss_disable_interrupts(struct iss_device *iss) -{ - if (iss->regs[OMAP4_ISS_MEM_ISP_SYS1]) - omap4iss_isp_disable_interrupts(iss); - - iss_reg_write(iss, OMAP4_ISS_MEM_TOP, ISS_HL_IRQENABLE_CLR(5), ~0); -} - -int omap4iss_get_external_info(struct iss_pipeline *pipe, - struct media_link *link) -{ - struct iss_device *iss = - container_of(pipe, struct iss_video, pipe)->iss; - struct v4l2_subdev_format fmt; - struct v4l2_ctrl *ctrl; - int ret; - - if (!pipe->external) - return 0; - - if (pipe->external_rate) - return 0; - - memset(&fmt, 0, sizeof(fmt)); - - fmt.pad = link->source->index; - fmt.which = V4L2_SUBDEV_FORMAT_ACTIVE; - ret = v4l2_subdev_call(media_entity_to_v4l2_subdev(link->sink->entity), - pad, get_fmt, NULL, &fmt); - if (ret < 0) - return -EPIPE; - - pipe->external_bpp = omap4iss_video_format_info(fmt.format.code)->bpp; - - ctrl = v4l2_ctrl_find(pipe->external->ctrl_handler, - V4L2_CID_PIXEL_RATE); - if (!ctrl) { - dev_warn(iss->dev, "no pixel rate control in subdev %s\n", - pipe->external->name); - return -EPIPE; - } - - pipe->external_rate = v4l2_ctrl_g_ctrl_int64(ctrl); - - return 0; -} - -/* - * Configure the bridge. Valid inputs are - * - * IPIPEIF_INPUT_CSI2A: CSI2a receiver - * IPIPEIF_INPUT_CSI2B: CSI2b receiver - * - * The bridge and lane shifter are configured according to the selected input - * and the ISP platform data. - */ -void omap4iss_configure_bridge(struct iss_device *iss, - enum ipipeif_input_entity input) -{ - u32 issctrl_val; - u32 isp5ctrl_val; - - issctrl_val = iss_reg_read(iss, OMAP4_ISS_MEM_TOP, ISS_CTRL); - issctrl_val &= ~ISS_CTRL_INPUT_SEL_MASK; - issctrl_val &= ~ISS_CTRL_CLK_DIV_MASK; - - isp5ctrl_val = iss_reg_read(iss, OMAP4_ISS_MEM_ISP_SYS1, ISP5_CTRL); - - switch (input) { - case IPIPEIF_INPUT_CSI2A: - issctrl_val |= ISS_CTRL_INPUT_SEL_CSI2A; - break; - - case IPIPEIF_INPUT_CSI2B: - issctrl_val |= ISS_CTRL_INPUT_SEL_CSI2B; - break; - - default: - return; - } - - issctrl_val |= ISS_CTRL_SYNC_DETECT_VS_RAISING; - - isp5ctrl_val |= ISP5_CTRL_VD_PULSE_EXT | ISP5_CTRL_PSYNC_CLK_SEL | - ISP5_CTRL_SYNC_ENABLE; - - iss_reg_write(iss, OMAP4_ISS_MEM_TOP, ISS_CTRL, issctrl_val); - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_SYS1, ISP5_CTRL, isp5ctrl_val); -} - -#ifdef ISS_ISR_DEBUG -static void iss_isr_dbg(struct iss_device *iss, u32 irqstatus) -{ - static const char * const name[] = { - "ISP_0", - "ISP_1", - "ISP_2", - "ISP_3", - "CSIA", - "CSIB", - "CCP2_0", - "CCP2_1", - "CCP2_2", - "CCP2_3", - "CBUFF", - "BTE", - "SIMCOP_0", - "SIMCOP_1", - "SIMCOP_2", - "SIMCOP_3", - "CCP2_8", - "HS_VS", - "18", - "19", - "20", - "21", - "22", - "23", - "24", - "25", - "26", - "27", - "28", - "29", - "30", - "31", - }; - unsigned int i; - - dev_dbg(iss->dev, "ISS IRQ: "); - - for (i = 0; i < ARRAY_SIZE(name); i++) { - if ((1 << i) & irqstatus) - pr_cont("%s ", name[i]); - } - pr_cont("\n"); -} - -static void iss_isp_isr_dbg(struct iss_device *iss, u32 irqstatus) -{ - static const char * const name[] = { - "ISIF_0", - "ISIF_1", - "ISIF_2", - "ISIF_3", - "IPIPEREQ", - "IPIPELAST_PIX", - "IPIPEDMA", - "IPIPEBSC", - "IPIPEHST", - "IPIPEIF", - "AEW", - "AF", - "H3A", - "RSZ_REG", - "RSZ_LAST_PIX", - "RSZ_DMA", - "RSZ_CYC_RZA", - "RSZ_CYC_RZB", - "RSZ_FIFO_OVF", - "RSZ_FIFO_IN_BLK_ERR", - "20", - "21", - "RSZ_EOF0", - "RSZ_EOF1", - "H3A_EOF", - "IPIPE_EOF", - "26", - "IPIPE_DPC_INI", - "IPIPE_DPC_RNEW0", - "IPIPE_DPC_RNEW1", - "30", - "OCP_ERR", - }; - unsigned int i; - - dev_dbg(iss->dev, "ISP IRQ: "); - - for (i = 0; i < ARRAY_SIZE(name); i++) { - if ((1 << i) & irqstatus) - pr_cont("%s ", name[i]); - } - pr_cont("\n"); -} -#endif - -/* - * iss_isr - Interrupt Service Routine for ISS module. - * @irq: Not used currently. - * @_iss: Pointer to the OMAP4 ISS device - * - * Handles the corresponding callback if plugged in. - * - * Returns IRQ_HANDLED when IRQ was correctly handled, or IRQ_NONE when the - * IRQ wasn't handled. - */ -static irqreturn_t iss_isr(int irq, void *_iss) -{ - static const u32 ipipeif_events = ISP5_IRQ_IPIPEIF_IRQ | - ISP5_IRQ_ISIF_INT(0); - static const u32 resizer_events = ISP5_IRQ_RSZ_FIFO_IN_BLK_ERR | - ISP5_IRQ_RSZ_FIFO_OVF | - ISP5_IRQ_RSZ_INT_DMA; - struct iss_device *iss = _iss; - u32 irqstatus; - - irqstatus = iss_reg_read(iss, OMAP4_ISS_MEM_TOP, ISS_HL_IRQSTATUS(5)); - iss_reg_write(iss, OMAP4_ISS_MEM_TOP, ISS_HL_IRQSTATUS(5), irqstatus); - - if (irqstatus & ISS_HL_IRQ_CSIA) - omap4iss_csi2_isr(&iss->csi2a); - - if (irqstatus & ISS_HL_IRQ_CSIB) - omap4iss_csi2_isr(&iss->csi2b); - - if (irqstatus & ISS_HL_IRQ_ISP(0)) { - u32 isp_irqstatus = iss_reg_read(iss, OMAP4_ISS_MEM_ISP_SYS1, - ISP5_IRQSTATUS(0)); - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_SYS1, ISP5_IRQSTATUS(0), - isp_irqstatus); - - if (isp_irqstatus & ISP5_IRQ_OCP_ERR) - dev_dbg(iss->dev, "ISP5 OCP Error!\n"); - - if (isp_irqstatus & ipipeif_events) { - omap4iss_ipipeif_isr(&iss->ipipeif, - isp_irqstatus & ipipeif_events); - } - - if (isp_irqstatus & resizer_events) - omap4iss_resizer_isr(&iss->resizer, - isp_irqstatus & resizer_events); - -#ifdef ISS_ISR_DEBUG - iss_isp_isr_dbg(iss, isp_irqstatus); -#endif - } - - omap4iss_flush(iss); - -#ifdef ISS_ISR_DEBUG - iss_isr_dbg(iss, irqstatus); -#endif - - return IRQ_HANDLED; -} - -static const struct media_device_ops iss_media_ops = { - .link_notify = v4l2_pipeline_link_notify, -}; - -/* ----------------------------------------------------------------------------- - * Pipeline stream management - */ - -/* - * iss_pipeline_disable - Disable streaming on a pipeline - * @pipe: ISS pipeline - * @until: entity at which to stop pipeline walk - * - * Walk the entities chain starting at the pipeline output video node and stop - * all modules in the chain. Wait synchronously for the modules to be stopped if - * necessary. - * - * If the until argument isn't NULL, stop the pipeline walk when reaching the - * until entity. This is used to disable a partially started pipeline due to a - * subdev start error. - */ -static int iss_pipeline_disable(struct iss_pipeline *pipe, - struct media_entity *until) -{ - struct iss_device *iss = pipe->output->iss; - struct media_entity *entity; - struct media_pad *pad; - struct v4l2_subdev *subdev; - int failure = 0; - int ret; - - entity = &pipe->output->video.entity; - while (1) { - pad = &entity->pads[0]; - if (!(pad->flags & MEDIA_PAD_FL_SINK)) - break; - - pad = media_pad_remote_pad_first(pad); - if (!pad || !is_media_entity_v4l2_subdev(pad->entity)) - break; - - entity = pad->entity; - if (entity == until) - break; - - subdev = media_entity_to_v4l2_subdev(entity); - ret = v4l2_subdev_call(subdev, video, s_stream, 0); - if (ret < 0) { - dev_warn(iss->dev, "%s: module stop timeout.\n", - subdev->name); - /* If the entity failed to stopped, assume it has - * crashed. Mark it as such, the ISS will be reset when - * applications will release it. - */ - media_entity_enum_set(&iss->crashed, &subdev->entity); - failure = -ETIMEDOUT; - } - } - - return failure; -} - -/* - * iss_pipeline_enable - Enable streaming on a pipeline - * @pipe: ISS pipeline - * @mode: Stream mode (single shot or continuous) - * - * Walk the entities chain starting at the pipeline output video node and start - * all modules in the chain in the given mode. - * - * Return 0 if successful, or the return value of the failed video::s_stream - * operation otherwise. - */ -static int iss_pipeline_enable(struct iss_pipeline *pipe, - enum iss_pipeline_stream_state mode) -{ - struct iss_device *iss = pipe->output->iss; - struct media_entity *entity; - struct media_pad *pad; - struct v4l2_subdev *subdev; - unsigned long flags; - int ret; - - /* If one of the entities in the pipeline has crashed it will not work - * properly. Refuse to start streaming in that case. This check must be - * performed before the loop below to avoid starting entities if the - * pipeline won't start anyway (those entities would then likely fail to - * stop, making the problem worse). - */ - if (media_entity_enum_intersects(&pipe->ent_enum, &iss->crashed)) - return -EIO; - - spin_lock_irqsave(&pipe->lock, flags); - pipe->state &= ~(ISS_PIPELINE_IDLE_INPUT | ISS_PIPELINE_IDLE_OUTPUT); - spin_unlock_irqrestore(&pipe->lock, flags); - - pipe->do_propagation = false; - - mutex_lock(&iss->media_dev.graph_mutex); - - entity = &pipe->output->video.entity; - while (1) { - pad = &entity->pads[0]; - if (!(pad->flags & MEDIA_PAD_FL_SINK)) - break; - - pad = media_pad_remote_pad_first(pad); - if (!pad || !is_media_entity_v4l2_subdev(pad->entity)) - break; - - entity = pad->entity; - subdev = media_entity_to_v4l2_subdev(entity); - - ret = v4l2_subdev_call(subdev, video, s_stream, mode); - if (ret < 0 && ret != -ENOIOCTLCMD) { - iss_pipeline_disable(pipe, entity); - mutex_unlock(&iss->media_dev.graph_mutex); - return ret; - } - - if (subdev == &iss->csi2a.subdev || - subdev == &iss->csi2b.subdev) - pipe->do_propagation = true; - } - - mutex_unlock(&iss->media_dev.graph_mutex); - iss_print_status(pipe->output->iss); - - return 0; -} - -/* - * omap4iss_pipeline_set_stream - Enable/disable streaming on a pipeline - * @pipe: ISS pipeline - * @state: Stream state (stopped, single shot or continuous) - * - * Set the pipeline to the given stream state. Pipelines can be started in - * single-shot or continuous mode. - * - * Return 0 if successful, or the return value of the failed video::s_stream - * operation otherwise. The pipeline state is not updated when the operation - * fails, except when stopping the pipeline. - */ -int omap4iss_pipeline_set_stream(struct iss_pipeline *pipe, - enum iss_pipeline_stream_state state) -{ - int ret; - - if (state == ISS_PIPELINE_STREAM_STOPPED) - ret = iss_pipeline_disable(pipe, NULL); - else - ret = iss_pipeline_enable(pipe, state); - - if (ret == 0 || state == ISS_PIPELINE_STREAM_STOPPED) - pipe->stream_state = state; - - return ret; -} - -/* - * omap4iss_pipeline_cancel_stream - Cancel stream on a pipeline - * @pipe: ISS pipeline - * - * Cancelling a stream mark all buffers on all video nodes in the pipeline as - * erroneous and makes sure no new buffer can be queued. This function is called - * when a fatal error that prevents any further operation on the pipeline - * occurs. - */ -void omap4iss_pipeline_cancel_stream(struct iss_pipeline *pipe) -{ - if (pipe->input) - omap4iss_video_cancel_stream(pipe->input); - if (pipe->output) - omap4iss_video_cancel_stream(pipe->output); -} - -/* - * iss_pipeline_is_last - Verify if entity has an enabled link to the output - * video node - * @me: ISS module's media entity - * - * Returns 1 if the entity has an enabled link to the output video node or 0 - * otherwise. It's true only while pipeline can have no more than one output - * node. - */ -static int iss_pipeline_is_last(struct media_entity *me) -{ - struct iss_pipeline *pipe; - struct media_pad *pad; - - pipe = to_iss_pipeline(me); - if (!pipe || pipe->stream_state == ISS_PIPELINE_STREAM_STOPPED) - return 0; - pad = media_pad_remote_pad_first(&pipe->output->pad); - return pad->entity == me; -} - -static int iss_reset(struct iss_device *iss) -{ - unsigned int timeout; - - iss_reg_set(iss, OMAP4_ISS_MEM_TOP, ISS_HL_SYSCONFIG, - ISS_HL_SYSCONFIG_SOFTRESET); - - timeout = iss_poll_condition_timeout( - !(iss_reg_read(iss, OMAP4_ISS_MEM_TOP, ISS_HL_SYSCONFIG) & - ISS_HL_SYSCONFIG_SOFTRESET), 1000, 10, 100); - if (timeout) { - dev_err(iss->dev, "ISS reset timeout\n"); - return -ETIMEDOUT; - } - - media_entity_enum_zero(&iss->crashed); - - return 0; -} - -static int iss_isp_reset(struct iss_device *iss) -{ - unsigned int timeout; - - /* Fist, ensure that the ISP is IDLE (no transactions happening) */ - iss_reg_update(iss, OMAP4_ISS_MEM_ISP_SYS1, ISP5_SYSCONFIG, - ISP5_SYSCONFIG_STANDBYMODE_MASK, - ISP5_SYSCONFIG_STANDBYMODE_SMART); - - iss_reg_set(iss, OMAP4_ISS_MEM_ISP_SYS1, ISP5_CTRL, ISP5_CTRL_MSTANDBY); - - timeout = iss_poll_condition_timeout( - iss_reg_read(iss, OMAP4_ISS_MEM_ISP_SYS1, ISP5_CTRL) & - ISP5_CTRL_MSTANDBY_WAIT, 1000000, 1000, 1500); - if (timeout) { - dev_err(iss->dev, "ISP5 standby timeout\n"); - return -ETIMEDOUT; - } - - /* Now finally, do the reset */ - iss_reg_set(iss, OMAP4_ISS_MEM_ISP_SYS1, ISP5_SYSCONFIG, - ISP5_SYSCONFIG_SOFTRESET); - - timeout = iss_poll_condition_timeout( - !(iss_reg_read(iss, OMAP4_ISS_MEM_ISP_SYS1, ISP5_SYSCONFIG) & - ISP5_SYSCONFIG_SOFTRESET), 1000000, 1000, 1500); - if (timeout) { - dev_err(iss->dev, "ISP5 reset timeout\n"); - return -ETIMEDOUT; - } - - return 0; -} - -/* - * iss_module_sync_idle - Helper to sync module with its idle state - * @me: ISS submodule's media entity - * @wait: ISS submodule's wait queue for streamoff/interrupt synchronization - * @stopping: flag which tells module wants to stop - * - * This function checks if ISS submodule needs to wait for next interrupt. If - * yes, makes the caller to sleep while waiting for such event. - */ -int omap4iss_module_sync_idle(struct media_entity *me, wait_queue_head_t *wait, - atomic_t *stopping) -{ - struct iss_pipeline *pipe = to_iss_pipeline(me); - struct iss_video *video = pipe->output; - unsigned long flags; - - if (pipe->stream_state == ISS_PIPELINE_STREAM_STOPPED || - (pipe->stream_state == ISS_PIPELINE_STREAM_SINGLESHOT && - !iss_pipeline_ready(pipe))) - return 0; - - /* - * atomic_set() doesn't include memory barrier on ARM platform for SMP - * scenario. We'll call it here to avoid race conditions. - */ - atomic_set(stopping, 1); - smp_wmb(); - - /* - * If module is the last one, it's writing to memory. In this case, - * it's necessary to check if the module is already paused due to - * DMA queue underrun or if it has to wait for next interrupt to be - * idle. - * If it isn't the last one, the function won't sleep but *stopping - * will still be set to warn next submodule caller's interrupt the - * module wants to be idle. - */ - if (!iss_pipeline_is_last(me)) - return 0; - - spin_lock_irqsave(&video->qlock, flags); - if (video->dmaqueue_flags & ISS_VIDEO_DMAQUEUE_UNDERRUN) { - spin_unlock_irqrestore(&video->qlock, flags); - atomic_set(stopping, 0); - smp_wmb(); - return 0; - } - spin_unlock_irqrestore(&video->qlock, flags); - if (!wait_event_timeout(*wait, !atomic_read(stopping), - msecs_to_jiffies(1000))) { - atomic_set(stopping, 0); - smp_wmb(); - return -ETIMEDOUT; - } - - return 0; -} - -/* - * omap4iss_module_sync_is_stopped - Helper to verify if module was stopping - * @wait: ISS submodule's wait queue for streamoff/interrupt synchronization - * @stopping: flag which tells module wants to stop - * - * This function checks if ISS submodule was stopping. In case of yes, it - * notices the caller by setting stopping to 0 and waking up the wait queue. - * Returns 1 if it was stopping or 0 otherwise. - */ -int omap4iss_module_sync_is_stopping(wait_queue_head_t *wait, - atomic_t *stopping) -{ - if (atomic_cmpxchg(stopping, 1, 0)) { - wake_up(wait); - return 1; - } - - return 0; -} - -/* -------------------------------------------------------------------------- - * Clock management - */ - -#define ISS_CLKCTRL_MASK (ISS_CLKCTRL_CSI2_A |\ - ISS_CLKCTRL_CSI2_B |\ - ISS_CLKCTRL_ISP) - -static int __iss_subclk_update(struct iss_device *iss) -{ - u32 clk = 0; - int ret = 0, timeout = 1000; - - if (iss->subclk_resources & OMAP4_ISS_SUBCLK_CSI2_A) - clk |= ISS_CLKCTRL_CSI2_A; - - if (iss->subclk_resources & OMAP4_ISS_SUBCLK_CSI2_B) - clk |= ISS_CLKCTRL_CSI2_B; - - if (iss->subclk_resources & OMAP4_ISS_SUBCLK_ISP) - clk |= ISS_CLKCTRL_ISP; - - iss_reg_update(iss, OMAP4_ISS_MEM_TOP, ISS_CLKCTRL, - ISS_CLKCTRL_MASK, clk); - - /* Wait for HW assertion */ - while (--timeout > 0) { - udelay(1); - if ((iss_reg_read(iss, OMAP4_ISS_MEM_TOP, ISS_CLKSTAT) & - ISS_CLKCTRL_MASK) == clk) - break; - } - - if (!timeout) - ret = -EBUSY; - - return ret; -} - -int omap4iss_subclk_enable(struct iss_device *iss, - enum iss_subclk_resource res) -{ - iss->subclk_resources |= res; - - return __iss_subclk_update(iss); -} - -int omap4iss_subclk_disable(struct iss_device *iss, - enum iss_subclk_resource res) -{ - iss->subclk_resources &= ~res; - - return __iss_subclk_update(iss); -} - -#define ISS_ISP5_CLKCTRL_MASK (ISP5_CTRL_BL_CLK_ENABLE |\ - ISP5_CTRL_ISIF_CLK_ENABLE |\ - ISP5_CTRL_H3A_CLK_ENABLE |\ - ISP5_CTRL_RSZ_CLK_ENABLE |\ - ISP5_CTRL_IPIPE_CLK_ENABLE |\ - ISP5_CTRL_IPIPEIF_CLK_ENABLE) - -static void __iss_isp_subclk_update(struct iss_device *iss) -{ - u32 clk = 0; - - if (iss->isp_subclk_resources & OMAP4_ISS_ISP_SUBCLK_ISIF) - clk |= ISP5_CTRL_ISIF_CLK_ENABLE; - - if (iss->isp_subclk_resources & OMAP4_ISS_ISP_SUBCLK_H3A) - clk |= ISP5_CTRL_H3A_CLK_ENABLE; - - if (iss->isp_subclk_resources & OMAP4_ISS_ISP_SUBCLK_RSZ) - clk |= ISP5_CTRL_RSZ_CLK_ENABLE; - - if (iss->isp_subclk_resources & OMAP4_ISS_ISP_SUBCLK_IPIPE) - clk |= ISP5_CTRL_IPIPE_CLK_ENABLE; - - if (iss->isp_subclk_resources & OMAP4_ISS_ISP_SUBCLK_IPIPEIF) - clk |= ISP5_CTRL_IPIPEIF_CLK_ENABLE; - - if (clk) - clk |= ISP5_CTRL_BL_CLK_ENABLE; - - iss_reg_update(iss, OMAP4_ISS_MEM_ISP_SYS1, ISP5_CTRL, - ISS_ISP5_CLKCTRL_MASK, clk); -} - -void omap4iss_isp_subclk_enable(struct iss_device *iss, - enum iss_isp_subclk_resource res) -{ - iss->isp_subclk_resources |= res; - - __iss_isp_subclk_update(iss); -} - -void omap4iss_isp_subclk_disable(struct iss_device *iss, - enum iss_isp_subclk_resource res) -{ - iss->isp_subclk_resources &= ~res; - - __iss_isp_subclk_update(iss); -} - -/* - * iss_enable_clocks - Enable ISS clocks - * @iss: OMAP4 ISS device - * - * Return 0 if successful, or clk_enable return value if any of tthem fails. - */ -static int iss_enable_clocks(struct iss_device *iss) -{ - int ret; - - ret = clk_enable(iss->iss_fck); - if (ret) { - dev_err(iss->dev, "clk_enable iss_fck failed\n"); - return ret; - } - - ret = clk_enable(iss->iss_ctrlclk); - if (ret) { - dev_err(iss->dev, "clk_enable iss_ctrlclk failed\n"); - clk_disable(iss->iss_fck); - return ret; - } - - return 0; -} - -/* - * iss_disable_clocks - Disable ISS clocks - * @iss: OMAP4 ISS device - */ -static void iss_disable_clocks(struct iss_device *iss) -{ - clk_disable(iss->iss_ctrlclk); - clk_disable(iss->iss_fck); -} - -static int iss_get_clocks(struct iss_device *iss) -{ - iss->iss_fck = devm_clk_get(iss->dev, "iss_fck"); - if (IS_ERR(iss->iss_fck)) { - dev_err(iss->dev, "Unable to get iss_fck clock info\n"); - return PTR_ERR(iss->iss_fck); - } - - iss->iss_ctrlclk = devm_clk_get(iss->dev, "iss_ctrlclk"); - if (IS_ERR(iss->iss_ctrlclk)) { - dev_err(iss->dev, "Unable to get iss_ctrlclk clock info\n"); - return PTR_ERR(iss->iss_ctrlclk); - } - - return 0; -} - -/* - * omap4iss_get - Acquire the ISS resource. - * - * Initializes the clocks for the first acquire. - * - * Increment the reference count on the ISS. If the first reference is taken, - * enable clocks and power-up all submodules. - * - * Return a pointer to the ISS device structure, or NULL if an error occurred. - */ -struct iss_device *omap4iss_get(struct iss_device *iss) -{ - struct iss_device *__iss = iss; - - if (!iss) - return NULL; - - mutex_lock(&iss->iss_mutex); - if (iss->ref_count > 0) - goto out; - - if (iss_enable_clocks(iss) < 0) { - __iss = NULL; - goto out; - } - - iss_enable_interrupts(iss); - -out: - if (__iss) - iss->ref_count++; - mutex_unlock(&iss->iss_mutex); - - return __iss; -} - -/* - * omap4iss_put - Release the ISS - * - * Decrement the reference count on the ISS. If the last reference is released, - * power-down all submodules, disable clocks and free temporary buffers. - */ -void omap4iss_put(struct iss_device *iss) -{ - if (!iss) - return; - - mutex_lock(&iss->iss_mutex); - WARN_ON(iss->ref_count == 0); - if (--iss->ref_count == 0) { - iss_disable_interrupts(iss); - /* Reset the ISS if an entity has failed to stop. This is the - * only way to recover from such conditions, although it would - * be worth investigating whether resetting the ISP only can't - * fix the problem in some cases. - */ - if (!media_entity_enum_empty(&iss->crashed)) - iss_reset(iss); - iss_disable_clocks(iss); - } - mutex_unlock(&iss->iss_mutex); -} - -static int iss_map_mem_resource(struct platform_device *pdev, - struct iss_device *iss, - enum iss_mem_resources res) -{ - iss->regs[res] = devm_platform_ioremap_resource(pdev, res); - - return PTR_ERR_OR_ZERO(iss->regs[res]); -} - -static void iss_unregister_entities(struct iss_device *iss) -{ - omap4iss_resizer_unregister_entities(&iss->resizer); - omap4iss_ipipe_unregister_entities(&iss->ipipe); - omap4iss_ipipeif_unregister_entities(&iss->ipipeif); - omap4iss_csi2_unregister_entities(&iss->csi2a); - omap4iss_csi2_unregister_entities(&iss->csi2b); - - v4l2_device_unregister(&iss->v4l2_dev); - media_device_unregister(&iss->media_dev); -} - -/* - * iss_register_subdev_group - Register a group of subdevices - * @iss: OMAP4 ISS device - * @board_info: I2C subdevs board information array - * - * Register all I2C subdevices in the board_info array. The array must be - * terminated by a NULL entry, and the first entry must be the sensor. - * - * Return a pointer to the sensor media entity if it has been successfully - * registered, or NULL otherwise. - */ -static struct v4l2_subdev * -iss_register_subdev_group(struct iss_device *iss, - struct iss_subdev_i2c_board_info *board_info) -{ - struct v4l2_subdev *sensor = NULL; - unsigned int first; - - if (!board_info->board_info) - return NULL; - - for (first = 1; board_info->board_info; ++board_info, first = 0) { - struct v4l2_subdev *subdev; - struct i2c_adapter *adapter; - - adapter = i2c_get_adapter(board_info->i2c_adapter_id); - if (!adapter) { - dev_err(iss->dev, - "%s: Unable to get I2C adapter %d for device %s\n", - __func__, board_info->i2c_adapter_id, - board_info->board_info->type); - continue; - } - - subdev = v4l2_i2c_new_subdev_board(&iss->v4l2_dev, adapter, - board_info->board_info, NULL); - if (!subdev) { - dev_err(iss->dev, "Unable to register subdev %s\n", - board_info->board_info->type); - continue; - } - - if (first) - sensor = subdev; - } - - return sensor; -} - -static int iss_register_entities(struct iss_device *iss) -{ - struct iss_platform_data *pdata = iss->pdata; - struct iss_v4l2_subdevs_group *subdevs; - int ret; - - iss->media_dev.dev = iss->dev; - strscpy(iss->media_dev.model, "TI OMAP4 ISS", - sizeof(iss->media_dev.model)); - iss->media_dev.hw_revision = iss->revision; - iss->media_dev.ops = &iss_media_ops; - ret = media_device_register(&iss->media_dev); - if (ret < 0) { - dev_err(iss->dev, "Media device registration failed (%d)\n", - ret); - return ret; - } - - iss->v4l2_dev.mdev = &iss->media_dev; - ret = v4l2_device_register(iss->dev, &iss->v4l2_dev); - if (ret < 0) { - dev_err(iss->dev, "V4L2 device registration failed (%d)\n", - ret); - goto done; - } - - /* Register internal entities */ - ret = omap4iss_csi2_register_entities(&iss->csi2a, &iss->v4l2_dev); - if (ret < 0) - goto done; - - ret = omap4iss_csi2_register_entities(&iss->csi2b, &iss->v4l2_dev); - if (ret < 0) - goto done; - - ret = omap4iss_ipipeif_register_entities(&iss->ipipeif, &iss->v4l2_dev); - if (ret < 0) - goto done; - - ret = omap4iss_ipipe_register_entities(&iss->ipipe, &iss->v4l2_dev); - if (ret < 0) - goto done; - - ret = omap4iss_resizer_register_entities(&iss->resizer, &iss->v4l2_dev); - if (ret < 0) - goto done; - - /* Register external entities */ - for (subdevs = pdata->subdevs; subdevs && subdevs->subdevs; ++subdevs) { - struct v4l2_subdev *sensor; - struct media_entity *input; - unsigned int flags; - unsigned int pad; - - sensor = iss_register_subdev_group(iss, subdevs->subdevs); - if (!sensor) - continue; - - sensor->host_priv = subdevs; - - /* Connect the sensor to the correct interface module. - * CSI2a receiver through CSIPHY1, or - * CSI2b receiver through CSIPHY2 - */ - switch (subdevs->interface) { - case ISS_INTERFACE_CSI2A_PHY1: - input = &iss->csi2a.subdev.entity; - pad = CSI2_PAD_SINK; - flags = MEDIA_LNK_FL_IMMUTABLE - | MEDIA_LNK_FL_ENABLED; - break; - - case ISS_INTERFACE_CSI2B_PHY2: - input = &iss->csi2b.subdev.entity; - pad = CSI2_PAD_SINK; - flags = MEDIA_LNK_FL_IMMUTABLE - | MEDIA_LNK_FL_ENABLED; - break; - - default: - dev_err(iss->dev, "invalid interface type %u\n", - subdevs->interface); - ret = -EINVAL; - goto done; - } - - ret = media_create_pad_link(&sensor->entity, 0, input, pad, - flags); - if (ret < 0) - goto done; - } - - ret = v4l2_device_register_subdev_nodes(&iss->v4l2_dev); - -done: - if (ret < 0) - iss_unregister_entities(iss); - - return ret; -} - -/* - * iss_create_links() - Pads links creation for the subdevices - * @iss : Pointer to ISS device - * - * return negative error code or zero on success - */ -static int iss_create_links(struct iss_device *iss) -{ - int ret; - - ret = omap4iss_csi2_create_links(iss); - if (ret < 0) { - dev_err(iss->dev, "CSI2 pads links creation failed\n"); - return ret; - } - - ret = omap4iss_ipipeif_create_links(iss); - if (ret < 0) { - dev_err(iss->dev, "ISP IPIPEIF pads links creation failed\n"); - return ret; - } - - ret = omap4iss_resizer_create_links(iss); - if (ret < 0) { - dev_err(iss->dev, "ISP RESIZER pads links creation failed\n"); - return ret; - } - - /* Connect the submodules. */ - ret = media_create_pad_link( - &iss->csi2a.subdev.entity, CSI2_PAD_SOURCE, - &iss->ipipeif.subdev.entity, IPIPEIF_PAD_SINK, 0); - if (ret < 0) - return ret; - - ret = media_create_pad_link( - &iss->csi2b.subdev.entity, CSI2_PAD_SOURCE, - &iss->ipipeif.subdev.entity, IPIPEIF_PAD_SINK, 0); - if (ret < 0) - return ret; - - ret = media_create_pad_link( - &iss->ipipeif.subdev.entity, IPIPEIF_PAD_SOURCE_VP, - &iss->resizer.subdev.entity, RESIZER_PAD_SINK, 0); - if (ret < 0) - return ret; - - ret = media_create_pad_link( - &iss->ipipeif.subdev.entity, IPIPEIF_PAD_SOURCE_VP, - &iss->ipipe.subdev.entity, IPIPE_PAD_SINK, 0); - if (ret < 0) - return ret; - - ret = media_create_pad_link( - &iss->ipipe.subdev.entity, IPIPE_PAD_SOURCE_VP, - &iss->resizer.subdev.entity, RESIZER_PAD_SINK, 0); - if (ret < 0) - return ret; - - return 0; -}; - -static void iss_cleanup_modules(struct iss_device *iss) -{ - omap4iss_csi2_cleanup(iss); - omap4iss_ipipeif_cleanup(iss); - omap4iss_ipipe_cleanup(iss); - omap4iss_resizer_cleanup(iss); -} - -static int iss_initialize_modules(struct iss_device *iss) -{ - int ret; - - ret = omap4iss_csiphy_init(iss); - if (ret < 0) { - dev_err(iss->dev, "CSI PHY initialization failed\n"); - goto error_csiphy; - } - - ret = omap4iss_csi2_init(iss); - if (ret < 0) { - dev_err(iss->dev, "CSI2 initialization failed\n"); - goto error_csi2; - } - - ret = omap4iss_ipipeif_init(iss); - if (ret < 0) { - dev_err(iss->dev, "ISP IPIPEIF initialization failed\n"); - goto error_ipipeif; - } - - ret = omap4iss_ipipe_init(iss); - if (ret < 0) { - dev_err(iss->dev, "ISP IPIPE initialization failed\n"); - goto error_ipipe; - } - - ret = omap4iss_resizer_init(iss); - if (ret < 0) { - dev_err(iss->dev, "ISP RESIZER initialization failed\n"); - goto error_resizer; - } - - return 0; - -error_resizer: - omap4iss_ipipe_cleanup(iss); -error_ipipe: - omap4iss_ipipeif_cleanup(iss); -error_ipipeif: - omap4iss_csi2_cleanup(iss); -error_csi2: -error_csiphy: - return ret; -} - -static int iss_probe(struct platform_device *pdev) -{ - struct iss_platform_data *pdata = pdev->dev.platform_data; - struct iss_device *iss; - unsigned int i; - int ret; - - if (!pdata) - return -EINVAL; - - iss = devm_kzalloc(&pdev->dev, sizeof(*iss), GFP_KERNEL); - if (!iss) - return -ENOMEM; - - mutex_init(&iss->iss_mutex); - - iss->dev = &pdev->dev; - iss->pdata = pdata; - - iss->raw_dmamask = DMA_BIT_MASK(32); - iss->dev->dma_mask = &iss->raw_dmamask; - iss->dev->coherent_dma_mask = DMA_BIT_MASK(32); - - platform_set_drvdata(pdev, iss); - - /* - * TODO: When implementing DT support switch to syscon regmap lookup by - * phandle. - */ - iss->syscon = syscon_regmap_lookup_by_compatible("syscon"); - if (IS_ERR(iss->syscon)) { - ret = PTR_ERR(iss->syscon); - goto error; - } - - /* Clocks */ - ret = iss_map_mem_resource(pdev, iss, OMAP4_ISS_MEM_TOP); - if (ret < 0) - goto error; - - ret = iss_get_clocks(iss); - if (ret < 0) - goto error; - - if (!omap4iss_get(iss)) { - ret = -EINVAL; - goto error; - } - - ret = iss_reset(iss); - if (ret < 0) - goto error_iss; - - iss->revision = iss_reg_read(iss, OMAP4_ISS_MEM_TOP, ISS_HL_REVISION); - dev_info(iss->dev, "Revision %08x found\n", iss->revision); - - for (i = 1; i < OMAP4_ISS_MEM_LAST; i++) { - ret = iss_map_mem_resource(pdev, iss, i); - if (ret) - goto error_iss; - } - - /* Configure BTE BW_LIMITER field to max recommended value (1 GB) */ - iss_reg_update(iss, OMAP4_ISS_MEM_BTE, BTE_CTRL, - BTE_CTRL_BW_LIMITER_MASK, - 18 << BTE_CTRL_BW_LIMITER_SHIFT); - - /* Perform ISP reset */ - ret = omap4iss_subclk_enable(iss, OMAP4_ISS_SUBCLK_ISP); - if (ret < 0) - goto error_iss; - - ret = iss_isp_reset(iss); - if (ret < 0) - goto error_iss; - - dev_info(iss->dev, "ISP Revision %08x found\n", - iss_reg_read(iss, OMAP4_ISS_MEM_ISP_SYS1, ISP5_REVISION)); - - /* Interrupt */ - ret = platform_get_irq(pdev, 0); - if (ret <= 0) { - ret = -ENODEV; - goto error_iss; - } - iss->irq_num = ret; - - if (devm_request_irq(iss->dev, iss->irq_num, iss_isr, IRQF_SHARED, - "OMAP4 ISS", iss)) { - dev_err(iss->dev, "Unable to request IRQ\n"); - ret = -EINVAL; - goto error_iss; - } - - /* Entities */ - ret = iss_initialize_modules(iss); - if (ret < 0) - goto error_iss; - - ret = iss_register_entities(iss); - if (ret < 0) - goto error_modules; - - ret = media_entity_enum_init(&iss->crashed, &iss->media_dev); - if (ret) - goto error_entities; - - ret = iss_create_links(iss); - if (ret < 0) - goto error_entities; - - omap4iss_put(iss); - - return 0; - -error_entities: - iss_unregister_entities(iss); - media_entity_enum_cleanup(&iss->crashed); -error_modules: - iss_cleanup_modules(iss); -error_iss: - omap4iss_put(iss); -error: - mutex_destroy(&iss->iss_mutex); - - return ret; -} - -static void iss_remove(struct platform_device *pdev) -{ - struct iss_device *iss = platform_get_drvdata(pdev); - - iss_unregister_entities(iss); - media_entity_enum_cleanup(&iss->crashed); - iss_cleanup_modules(iss); -} - -static const struct platform_device_id omap4iss_id_table[] = { - { "omap4iss", 0 }, - { }, -}; -MODULE_DEVICE_TABLE(platform, omap4iss_id_table); - -static struct platform_driver iss_driver = { - .probe = iss_probe, - .remove_new = iss_remove, - .id_table = omap4iss_id_table, - .driver = { - .name = "omap4iss", - }, -}; - -module_platform_driver(iss_driver); - -MODULE_DESCRIPTION("TI OMAP4 ISS driver"); -MODULE_AUTHOR("Sergio Aguirre "); -MODULE_LICENSE("GPL"); diff --git a/drivers/staging/media/omap4iss/iss.h b/drivers/staging/media/omap4iss/iss.h deleted file mode 100644 index 3f587e000729..000000000000 --- a/drivers/staging/media/omap4iss/iss.h +++ /dev/null @@ -1,247 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0+ */ -/* - * TI OMAP4 ISS V4L2 Driver - * - * Copyright (C) 2012 Texas Instruments. - * - * Author: Sergio Aguirre - */ - -#ifndef _OMAP4_ISS_H_ -#define _OMAP4_ISS_H_ - -#include -#include - -#include -#include -#include -#include - -#include - -#include "iss_regs.h" -#include "iss_csiphy.h" -#include "iss_csi2.h" -#include "iss_ipipeif.h" -#include "iss_ipipe.h" -#include "iss_resizer.h" - -struct regmap; - -#define to_iss_device(ptr_module) \ - container_of(ptr_module, struct iss_device, ptr_module) -#define to_device(ptr_module) \ - (to_iss_device(ptr_module)->dev) - -enum iss_mem_resources { - OMAP4_ISS_MEM_TOP, - OMAP4_ISS_MEM_CSI2_A_REGS1, - OMAP4_ISS_MEM_CAMERARX_CORE1, - OMAP4_ISS_MEM_CSI2_B_REGS1, - OMAP4_ISS_MEM_CAMERARX_CORE2, - OMAP4_ISS_MEM_BTE, - OMAP4_ISS_MEM_ISP_SYS1, - OMAP4_ISS_MEM_ISP_RESIZER, - OMAP4_ISS_MEM_ISP_IPIPE, - OMAP4_ISS_MEM_ISP_ISIF, - OMAP4_ISS_MEM_ISP_IPIPEIF, - OMAP4_ISS_MEM_LAST, -}; - -enum iss_subclk_resource { - OMAP4_ISS_SUBCLK_SIMCOP = (1 << 0), - OMAP4_ISS_SUBCLK_ISP = (1 << 1), - OMAP4_ISS_SUBCLK_CSI2_A = (1 << 2), - OMAP4_ISS_SUBCLK_CSI2_B = (1 << 3), - OMAP4_ISS_SUBCLK_CCP2 = (1 << 4), -}; - -enum iss_isp_subclk_resource { - OMAP4_ISS_ISP_SUBCLK_BL = (1 << 0), - OMAP4_ISS_ISP_SUBCLK_ISIF = (1 << 1), - OMAP4_ISS_ISP_SUBCLK_H3A = (1 << 2), - OMAP4_ISS_ISP_SUBCLK_RSZ = (1 << 3), - OMAP4_ISS_ISP_SUBCLK_IPIPE = (1 << 4), - OMAP4_ISS_ISP_SUBCLK_IPIPEIF = (1 << 5), -}; - -/* - * struct iss_reg - Structure for ISS register values. - * @reg: 32-bit Register address. - * @val: 32-bit Register value. - */ -struct iss_reg { - enum iss_mem_resources mmio_range; - u32 reg; - u32 val; -}; - -/* - * struct iss_device - ISS device structure. - * @syscon: Regmap for the syscon register space - * @crashed: Crashed entities - */ -struct iss_device { - struct v4l2_device v4l2_dev; - struct media_device media_dev; - struct device *dev; - u32 revision; - - /* platform HW resources */ - struct iss_platform_data *pdata; - unsigned int irq_num; - - struct resource *res[OMAP4_ISS_MEM_LAST]; - void __iomem *regs[OMAP4_ISS_MEM_LAST]; - struct regmap *syscon; - - u64 raw_dmamask; - - struct mutex iss_mutex; /* For handling ref_count field */ - struct media_entity_enum crashed; - int has_context; - int ref_count; - - struct clk *iss_fck; - struct clk *iss_ctrlclk; - - /* ISS modules */ - struct iss_csi2_device csi2a; - struct iss_csi2_device csi2b; - struct iss_csiphy csiphy1; - struct iss_csiphy csiphy2; - struct iss_ipipeif_device ipipeif; - struct iss_ipipe_device ipipe; - struct iss_resizer_device resizer; - - unsigned int subclk_resources; - unsigned int isp_subclk_resources; -}; - -int omap4iss_get_external_info(struct iss_pipeline *pipe, - struct media_link *link); - -int omap4iss_module_sync_idle(struct media_entity *me, wait_queue_head_t *wait, - atomic_t *stopping); - -int omap4iss_module_sync_is_stopping(wait_queue_head_t *wait, - atomic_t *stopping); - -int omap4iss_pipeline_set_stream(struct iss_pipeline *pipe, - enum iss_pipeline_stream_state state); -void omap4iss_pipeline_cancel_stream(struct iss_pipeline *pipe); - -void omap4iss_configure_bridge(struct iss_device *iss, - enum ipipeif_input_entity input); - -struct iss_device *omap4iss_get(struct iss_device *iss); -void omap4iss_put(struct iss_device *iss); -int omap4iss_subclk_enable(struct iss_device *iss, - enum iss_subclk_resource res); -int omap4iss_subclk_disable(struct iss_device *iss, - enum iss_subclk_resource res); -void omap4iss_isp_subclk_enable(struct iss_device *iss, - enum iss_isp_subclk_resource res); -void omap4iss_isp_subclk_disable(struct iss_device *iss, - enum iss_isp_subclk_resource res); - -int omap4iss_register_entities(struct platform_device *pdev, - struct v4l2_device *v4l2_dev); -void omap4iss_unregister_entities(struct platform_device *pdev); - -/* - * iss_reg_read - Read the value of an OMAP4 ISS register - * @iss: the ISS device - * @res: memory resource in which the register is located - * @offset: register offset in the memory resource - * - * Return the register value. - */ -static inline -u32 iss_reg_read(struct iss_device *iss, enum iss_mem_resources res, - u32 offset) -{ - return readl(iss->regs[res] + offset); -} - -/* - * iss_reg_write - Write a value to an OMAP4 ISS register - * @iss: the ISS device - * @res: memory resource in which the register is located - * @offset: register offset in the memory resource - * @value: value to be written - */ -static inline -void iss_reg_write(struct iss_device *iss, enum iss_mem_resources res, - u32 offset, u32 value) -{ - writel(value, iss->regs[res] + offset); -} - -/* - * iss_reg_clr - Clear bits in an OMAP4 ISS register - * @iss: the ISS device - * @res: memory resource in which the register is located - * @offset: register offset in the memory resource - * @clr: bit mask to be cleared - */ -static inline -void iss_reg_clr(struct iss_device *iss, enum iss_mem_resources res, - u32 offset, u32 clr) -{ - u32 v = iss_reg_read(iss, res, offset); - - iss_reg_write(iss, res, offset, v & ~clr); -} - -/* - * iss_reg_set - Set bits in an OMAP4 ISS register - * @iss: the ISS device - * @res: memory resource in which the register is located - * @offset: register offset in the memory resource - * @set: bit mask to be set - */ -static inline -void iss_reg_set(struct iss_device *iss, enum iss_mem_resources res, - u32 offset, u32 set) -{ - u32 v = iss_reg_read(iss, res, offset); - - iss_reg_write(iss, res, offset, v | set); -} - -/* - * iss_reg_update - Clear and set bits in an OMAP4 ISS register - * @iss: the ISS device - * @res: memory resource in which the register is located - * @offset: register offset in the memory resource - * @clr: bit mask to be cleared - * @set: bit mask to be set - * - * Clear the clr mask first and then set the set mask. - */ -static inline -void iss_reg_update(struct iss_device *iss, enum iss_mem_resources res, - u32 offset, u32 clr, u32 set) -{ - u32 v = iss_reg_read(iss, res, offset); - - iss_reg_write(iss, res, offset, (v & ~clr) | set); -} - -#define iss_poll_condition_timeout(cond, timeout, min_ival, max_ival) \ -({ \ - unsigned long __timeout = jiffies + usecs_to_jiffies(timeout); \ - unsigned int __min_ival = (min_ival); \ - unsigned int __max_ival = (max_ival); \ - bool __cond; \ - while (!(__cond = (cond))) { \ - if (time_after(jiffies, __timeout)) \ - break; \ - usleep_range(__min_ival, __max_ival); \ - } \ - !__cond; \ -}) - -#endif /* _OMAP4_ISS_H_ */ diff --git a/drivers/staging/media/omap4iss/iss_csi2.c b/drivers/staging/media/omap4iss/iss_csi2.c deleted file mode 100644 index 0e6c5bd81930..000000000000 --- a/drivers/staging/media/omap4iss/iss_csi2.c +++ /dev/null @@ -1,1379 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * TI OMAP4 ISS V4L2 Driver - CSI PHY module - * - * Copyright (C) 2012 Texas Instruments, Inc. - * - * Author: Sergio Aguirre - */ - -#include -#include -#include -#include - -#include "iss.h" -#include "iss_regs.h" -#include "iss_csi2.h" - -/* - * csi2_if_enable - Enable CSI2 Receiver interface. - * @enable: enable flag - * - */ -static void csi2_if_enable(struct iss_csi2_device *csi2, u8 enable) -{ - struct iss_csi2_ctrl_cfg *currctrl = &csi2->ctrl; - - iss_reg_update(csi2->iss, csi2->regs1, CSI2_CTRL, CSI2_CTRL_IF_EN, - enable ? CSI2_CTRL_IF_EN : 0); - - currctrl->if_enable = enable; -} - -/* - * csi2_recv_config - CSI2 receiver module configuration. - * @currctrl: iss_csi2_ctrl_cfg structure - * - */ -static void csi2_recv_config(struct iss_csi2_device *csi2, - struct iss_csi2_ctrl_cfg *currctrl) -{ - u32 reg = 0; - - if (currctrl->frame_mode) - reg |= CSI2_CTRL_FRAME; - else - reg &= ~CSI2_CTRL_FRAME; - - if (currctrl->vp_clk_enable) - reg |= CSI2_CTRL_VP_CLK_EN; - else - reg &= ~CSI2_CTRL_VP_CLK_EN; - - if (currctrl->vp_only_enable) - reg |= CSI2_CTRL_VP_ONLY_EN; - else - reg &= ~CSI2_CTRL_VP_ONLY_EN; - - reg &= ~CSI2_CTRL_VP_OUT_CTRL_MASK; - reg |= currctrl->vp_out_ctrl << CSI2_CTRL_VP_OUT_CTRL_SHIFT; - - if (currctrl->ecc_enable) - reg |= CSI2_CTRL_ECC_EN; - else - reg &= ~CSI2_CTRL_ECC_EN; - - /* - * Set MFlag assertion boundaries to: - * Low: 4/8 of FIFO size - * High: 6/8 of FIFO size - */ - reg &= ~(CSI2_CTRL_MFLAG_LEVH_MASK | CSI2_CTRL_MFLAG_LEVL_MASK); - reg |= (2 << CSI2_CTRL_MFLAG_LEVH_SHIFT) | - (4 << CSI2_CTRL_MFLAG_LEVL_SHIFT); - - /* Generation of 16x64-bit bursts (Recommended) */ - reg |= CSI2_CTRL_BURST_SIZE_EXPAND; - - /* Do Non-Posted writes (Recommended) */ - reg |= CSI2_CTRL_NON_POSTED_WRITE; - - /* - * Enforce Little endian for all formats, including: - * YUV4:2:2 8-bit and YUV4:2:0 Legacy - */ - reg |= CSI2_CTRL_ENDIANNESS; - - iss_reg_write(csi2->iss, csi2->regs1, CSI2_CTRL, reg); -} - -static const unsigned int csi2_input_fmts[] = { - MEDIA_BUS_FMT_SGRBG10_1X10, - MEDIA_BUS_FMT_SGRBG10_DPCM8_1X8, - MEDIA_BUS_FMT_SRGGB10_1X10, - MEDIA_BUS_FMT_SRGGB10_DPCM8_1X8, - MEDIA_BUS_FMT_SBGGR10_1X10, - MEDIA_BUS_FMT_SBGGR10_DPCM8_1X8, - MEDIA_BUS_FMT_SGBRG10_1X10, - MEDIA_BUS_FMT_SGBRG10_DPCM8_1X8, - MEDIA_BUS_FMT_SBGGR8_1X8, - MEDIA_BUS_FMT_SGBRG8_1X8, - MEDIA_BUS_FMT_SGRBG8_1X8, - MEDIA_BUS_FMT_SRGGB8_1X8, - MEDIA_BUS_FMT_UYVY8_1X16, - MEDIA_BUS_FMT_YUYV8_1X16, -}; - -/* To set the format on the CSI2 requires a mapping function that takes - * the following inputs: - * - 3 different formats (at this time) - * - 2 destinations (mem, vp+mem) (vp only handled separately) - * - 2 decompression options (on, off) - * Output should be CSI2 frame format code - * Array indices as follows: [format][dest][decompr] - * Not all combinations are valid. 0 means invalid. - */ -static const u16 __csi2_fmt_map[][2][2] = { - /* RAW10 formats */ - { - /* Output to memory */ - { - /* No DPCM decompression */ - CSI2_PIX_FMT_RAW10_EXP16, - /* DPCM decompression */ - 0, - }, - /* Output to both */ - { - /* No DPCM decompression */ - CSI2_PIX_FMT_RAW10_EXP16_VP, - /* DPCM decompression */ - 0, - }, - }, - /* RAW10 DPCM8 formats */ - { - /* Output to memory */ - { - /* No DPCM decompression */ - CSI2_USERDEF_8BIT_DATA1, - /* DPCM decompression */ - CSI2_USERDEF_8BIT_DATA1_DPCM10, - }, - /* Output to both */ - { - /* No DPCM decompression */ - CSI2_PIX_FMT_RAW8_VP, - /* DPCM decompression */ - CSI2_USERDEF_8BIT_DATA1_DPCM10_VP, - }, - }, - /* RAW8 formats */ - { - /* Output to memory */ - { - /* No DPCM decompression */ - CSI2_PIX_FMT_RAW8, - /* DPCM decompression */ - 0, - }, - /* Output to both */ - { - /* No DPCM decompression */ - CSI2_PIX_FMT_RAW8_VP, - /* DPCM decompression */ - 0, - }, - }, - /* YUV422 formats */ - { - /* Output to memory */ - { - /* No DPCM decompression */ - CSI2_PIX_FMT_YUV422_8BIT, - /* DPCM decompression */ - 0, - }, - /* Output to both */ - { - /* No DPCM decompression */ - CSI2_PIX_FMT_YUV422_8BIT_VP16, - /* DPCM decompression */ - 0, - }, - }, -}; - -/* - * csi2_ctx_map_format - Map CSI2 sink media bus format to CSI2 format ID - * @csi2: ISS CSI2 device - * - * Returns CSI2 physical format id - */ -static u16 csi2_ctx_map_format(struct iss_csi2_device *csi2) -{ - const struct v4l2_mbus_framefmt *fmt = &csi2->formats[CSI2_PAD_SINK]; - int fmtidx, destidx; - - switch (fmt->code) { - case MEDIA_BUS_FMT_SGRBG10_1X10: - case MEDIA_BUS_FMT_SRGGB10_1X10: - case MEDIA_BUS_FMT_SBGGR10_1X10: - case MEDIA_BUS_FMT_SGBRG10_1X10: - fmtidx = 0; - break; - case MEDIA_BUS_FMT_SGRBG10_DPCM8_1X8: - case MEDIA_BUS_FMT_SRGGB10_DPCM8_1X8: - case MEDIA_BUS_FMT_SBGGR10_DPCM8_1X8: - case MEDIA_BUS_FMT_SGBRG10_DPCM8_1X8: - fmtidx = 1; - break; - case MEDIA_BUS_FMT_SBGGR8_1X8: - case MEDIA_BUS_FMT_SGBRG8_1X8: - case MEDIA_BUS_FMT_SGRBG8_1X8: - case MEDIA_BUS_FMT_SRGGB8_1X8: - fmtidx = 2; - break; - case MEDIA_BUS_FMT_UYVY8_1X16: - case MEDIA_BUS_FMT_YUYV8_1X16: - fmtidx = 3; - break; - default: - WARN(1, "CSI2: pixel format %08x unsupported!\n", - fmt->code); - return 0; - } - - if (!(csi2->output & CSI2_OUTPUT_IPIPEIF) && - !(csi2->output & CSI2_OUTPUT_MEMORY)) { - /* Neither output enabled is a valid combination */ - return CSI2_PIX_FMT_OTHERS; - } - - /* If we need to skip frames at the beginning of the stream disable the - * video port to avoid sending the skipped frames to the IPIPEIF. - */ - destidx = csi2->frame_skip ? 0 : !!(csi2->output & CSI2_OUTPUT_IPIPEIF); - - return __csi2_fmt_map[fmtidx][destidx][csi2->dpcm_decompress]; -} - -/* - * csi2_set_outaddr - Set memory address to save output image - * @csi2: Pointer to ISS CSI2a device. - * @addr: 32-bit memory address aligned on 32 byte boundary. - * - * Sets the memory address where the output will be saved. - * - * Returns 0 if successful, or -EINVAL if the address is not in the 32 byte - * boundary. - */ -static void csi2_set_outaddr(struct iss_csi2_device *csi2, u32 addr) -{ - struct iss_csi2_ctx_cfg *ctx = &csi2->contexts[0]; - - ctx->ping_addr = addr; - ctx->pong_addr = addr; - iss_reg_write(csi2->iss, csi2->regs1, CSI2_CTX_PING_ADDR(ctx->ctxnum), - ctx->ping_addr); - iss_reg_write(csi2->iss, csi2->regs1, CSI2_CTX_PONG_ADDR(ctx->ctxnum), - ctx->pong_addr); -} - -/* - * is_usr_def_mapping - Checks whether USER_DEF_MAPPING should - * be enabled by CSI2. - * @format_id: mapped format id - * - */ -static inline int is_usr_def_mapping(u32 format_id) -{ - return (format_id & 0xf0) == 0x40 ? 1 : 0; -} - -/* - * csi2_ctx_enable - Enable specified CSI2 context - * @ctxnum: Context number, valid between 0 and 7 values. - * @enable: enable - * - */ -static void csi2_ctx_enable(struct iss_csi2_device *csi2, u8 ctxnum, u8 enable) -{ - struct iss_csi2_ctx_cfg *ctx = &csi2->contexts[ctxnum]; - u32 reg; - - reg = iss_reg_read(csi2->iss, csi2->regs1, CSI2_CTX_CTRL1(ctxnum)); - - if (enable) { - unsigned int skip = 0; - - if (csi2->frame_skip) - skip = csi2->frame_skip; - else if (csi2->output & CSI2_OUTPUT_MEMORY) - skip = 1; - - reg &= ~CSI2_CTX_CTRL1_COUNT_MASK; - reg |= CSI2_CTX_CTRL1_COUNT_UNLOCK - | (skip << CSI2_CTX_CTRL1_COUNT_SHIFT) - | CSI2_CTX_CTRL1_CTX_EN; - } else { - reg &= ~CSI2_CTX_CTRL1_CTX_EN; - } - - iss_reg_write(csi2->iss, csi2->regs1, CSI2_CTX_CTRL1(ctxnum), reg); - ctx->enabled = enable; -} - -/* - * csi2_ctx_config - CSI2 context configuration. - * @ctx: context configuration - * - */ -static void csi2_ctx_config(struct iss_csi2_device *csi2, - struct iss_csi2_ctx_cfg *ctx) -{ - u32 reg = 0; - - ctx->frame = 0; - - /* Set up CSI2_CTx_CTRL1 */ - if (ctx->eof_enabled) - reg = CSI2_CTX_CTRL1_EOF_EN; - - if (ctx->eol_enabled) - reg |= CSI2_CTX_CTRL1_EOL_EN; - - if (ctx->checksum_enabled) - reg |= CSI2_CTX_CTRL1_CS_EN; - - iss_reg_write(csi2->iss, csi2->regs1, CSI2_CTX_CTRL1(ctx->ctxnum), reg); - - /* Set up CSI2_CTx_CTRL2 */ - reg = ctx->virtual_id << CSI2_CTX_CTRL2_VIRTUAL_ID_SHIFT; - reg |= ctx->format_id << CSI2_CTX_CTRL2_FORMAT_SHIFT; - - if (ctx->dpcm_decompress && ctx->dpcm_predictor) - reg |= CSI2_CTX_CTRL2_DPCM_PRED; - - if (is_usr_def_mapping(ctx->format_id)) - reg |= 2 << CSI2_CTX_CTRL2_USER_DEF_MAP_SHIFT; - - iss_reg_write(csi2->iss, csi2->regs1, CSI2_CTX_CTRL2(ctx->ctxnum), reg); - - /* Set up CSI2_CTx_CTRL3 */ - iss_reg_write(csi2->iss, csi2->regs1, CSI2_CTX_CTRL3(ctx->ctxnum), - ctx->alpha << CSI2_CTX_CTRL3_ALPHA_SHIFT); - - /* Set up CSI2_CTx_DAT_OFST */ - iss_reg_update(csi2->iss, csi2->regs1, CSI2_CTX_DAT_OFST(ctx->ctxnum), - CSI2_CTX_DAT_OFST_MASK, ctx->data_offset); - - iss_reg_write(csi2->iss, csi2->regs1, CSI2_CTX_PING_ADDR(ctx->ctxnum), - ctx->ping_addr); - iss_reg_write(csi2->iss, csi2->regs1, CSI2_CTX_PONG_ADDR(ctx->ctxnum), - ctx->pong_addr); -} - -/* - * csi2_timing_config - CSI2 timing configuration. - * @timing: csi2_timing_cfg structure - */ -static void csi2_timing_config(struct iss_csi2_device *csi2, - struct iss_csi2_timing_cfg *timing) -{ - u32 reg; - - reg = iss_reg_read(csi2->iss, csi2->regs1, CSI2_TIMING); - - if (timing->force_rx_mode) - reg |= CSI2_TIMING_FORCE_RX_MODE_IO1; - else - reg &= ~CSI2_TIMING_FORCE_RX_MODE_IO1; - - if (timing->stop_state_16x) - reg |= CSI2_TIMING_STOP_STATE_X16_IO1; - else - reg &= ~CSI2_TIMING_STOP_STATE_X16_IO1; - - if (timing->stop_state_4x) - reg |= CSI2_TIMING_STOP_STATE_X4_IO1; - else - reg &= ~CSI2_TIMING_STOP_STATE_X4_IO1; - - reg &= ~CSI2_TIMING_STOP_STATE_COUNTER_IO1_MASK; - reg |= timing->stop_state_counter << - CSI2_TIMING_STOP_STATE_COUNTER_IO1_SHIFT; - - iss_reg_write(csi2->iss, csi2->regs1, CSI2_TIMING, reg); -} - -/* - * csi2_irq_ctx_set - Enables CSI2 Context IRQs. - * @enable: Enable/disable CSI2 Context interrupts - */ -static void csi2_irq_ctx_set(struct iss_csi2_device *csi2, int enable) -{ - const u32 mask = CSI2_CTX_IRQ_FE | CSI2_CTX_IRQ_FS; - int i; - - for (i = 0; i < 8; i++) { - iss_reg_write(csi2->iss, csi2->regs1, CSI2_CTX_IRQSTATUS(i), - mask); - if (enable) - iss_reg_set(csi2->iss, csi2->regs1, - CSI2_CTX_IRQENABLE(i), mask); - else - iss_reg_clr(csi2->iss, csi2->regs1, - CSI2_CTX_IRQENABLE(i), mask); - } -} - -/* - * csi2_irq_complexio1_set - Enables CSI2 ComplexIO IRQs. - * @enable: Enable/disable CSI2 ComplexIO #1 interrupts - */ -static void csi2_irq_complexio1_set(struct iss_csi2_device *csi2, int enable) -{ - u32 reg; - - reg = CSI2_COMPLEXIO_IRQ_STATEALLULPMEXIT | - CSI2_COMPLEXIO_IRQ_STATEALLULPMENTER | - CSI2_COMPLEXIO_IRQ_STATEULPM5 | - CSI2_COMPLEXIO_IRQ_ERRCONTROL5 | - CSI2_COMPLEXIO_IRQ_ERRESC5 | - CSI2_COMPLEXIO_IRQ_ERRSOTSYNCHS5 | - CSI2_COMPLEXIO_IRQ_ERRSOTHS5 | - CSI2_COMPLEXIO_IRQ_STATEULPM4 | - CSI2_COMPLEXIO_IRQ_ERRCONTROL4 | - CSI2_COMPLEXIO_IRQ_ERRESC4 | - CSI2_COMPLEXIO_IRQ_ERRSOTSYNCHS4 | - CSI2_COMPLEXIO_IRQ_ERRSOTHS4 | - CSI2_COMPLEXIO_IRQ_STATEULPM3 | - CSI2_COMPLEXIO_IRQ_ERRCONTROL3 | - CSI2_COMPLEXIO_IRQ_ERRESC3 | - CSI2_COMPLEXIO_IRQ_ERRSOTSYNCHS3 | - CSI2_COMPLEXIO_IRQ_ERRSOTHS3 | - CSI2_COMPLEXIO_IRQ_STATEULPM2 | - CSI2_COMPLEXIO_IRQ_ERRCONTROL2 | - CSI2_COMPLEXIO_IRQ_ERRESC2 | - CSI2_COMPLEXIO_IRQ_ERRSOTSYNCHS2 | - CSI2_COMPLEXIO_IRQ_ERRSOTHS2 | - CSI2_COMPLEXIO_IRQ_STATEULPM1 | - CSI2_COMPLEXIO_IRQ_ERRCONTROL1 | - CSI2_COMPLEXIO_IRQ_ERRESC1 | - CSI2_COMPLEXIO_IRQ_ERRSOTSYNCHS1 | - CSI2_COMPLEXIO_IRQ_ERRSOTHS1; - iss_reg_write(csi2->iss, csi2->regs1, CSI2_COMPLEXIO_IRQSTATUS, reg); - if (enable) - iss_reg_set(csi2->iss, csi2->regs1, CSI2_COMPLEXIO_IRQENABLE, - reg); - else - iss_reg_write(csi2->iss, csi2->regs1, CSI2_COMPLEXIO_IRQENABLE, - 0); -} - -/* - * csi2_irq_status_set - Enables CSI2 Status IRQs. - * @enable: Enable/disable CSI2 Status interrupts - */ -static void csi2_irq_status_set(struct iss_csi2_device *csi2, int enable) -{ - u32 reg; - - reg = CSI2_IRQ_OCP_ERR | - CSI2_IRQ_SHORT_PACKET | - CSI2_IRQ_ECC_CORRECTION | - CSI2_IRQ_ECC_NO_CORRECTION | - CSI2_IRQ_COMPLEXIO_ERR | - CSI2_IRQ_FIFO_OVF | - CSI2_IRQ_CONTEXT0; - iss_reg_write(csi2->iss, csi2->regs1, CSI2_IRQSTATUS, reg); - if (enable) - iss_reg_set(csi2->iss, csi2->regs1, CSI2_IRQENABLE, reg); - else - iss_reg_write(csi2->iss, csi2->regs1, CSI2_IRQENABLE, 0); -} - -/* - * omap4iss_csi2_reset - Resets the CSI2 module. - * - * Must be called with the phy lock held. - * - * Returns 0 if successful, or -EBUSY if power command didn't respond. - */ -int omap4iss_csi2_reset(struct iss_csi2_device *csi2) -{ - unsigned int timeout; - - if (!csi2->available) - return -ENODEV; - - if (csi2->phy->phy_in_use) - return -EBUSY; - - iss_reg_set(csi2->iss, csi2->regs1, CSI2_SYSCONFIG, - CSI2_SYSCONFIG_SOFT_RESET); - - timeout = iss_poll_condition_timeout( - iss_reg_read(csi2->iss, csi2->regs1, CSI2_SYSSTATUS) & - CSI2_SYSSTATUS_RESET_DONE, 500, 100, 200); - if (timeout) { - dev_err(csi2->iss->dev, "CSI2: Soft reset timeout!\n"); - return -EBUSY; - } - - iss_reg_set(csi2->iss, csi2->regs1, CSI2_COMPLEXIO_CFG, - CSI2_COMPLEXIO_CFG_RESET_CTRL); - - timeout = iss_poll_condition_timeout( - iss_reg_read(csi2->iss, csi2->phy->phy_regs, REGISTER1) & - REGISTER1_RESET_DONE_CTRLCLK, 10000, 100, 500); - if (timeout) { - dev_err(csi2->iss->dev, "CSI2: CSI2_96M_FCLK reset timeout!\n"); - return -EBUSY; - } - - iss_reg_update(csi2->iss, csi2->regs1, CSI2_SYSCONFIG, - CSI2_SYSCONFIG_MSTANDBY_MODE_MASK | - CSI2_SYSCONFIG_AUTO_IDLE, - CSI2_SYSCONFIG_MSTANDBY_MODE_NO); - - return 0; -} - -static int csi2_configure(struct iss_csi2_device *csi2) -{ - const struct iss_v4l2_subdevs_group *pdata; - struct iss_csi2_timing_cfg *timing = &csi2->timing[0]; - struct v4l2_subdev *sensor; - struct media_pad *pad; - - /* - * CSI2 fields that can be updated while the context has - * been enabled or the interface has been enabled are not - * updated dynamically currently. So we do not allow to - * reconfigure if either has been enabled - */ - if (csi2->contexts[0].enabled || csi2->ctrl.if_enable) - return -EBUSY; - - pad = media_pad_remote_pad_first(&csi2->pads[CSI2_PAD_SINK]); - sensor = media_entity_to_v4l2_subdev(pad->entity); - pdata = sensor->host_priv; - - csi2->frame_skip = 0; - v4l2_subdev_call(sensor, sensor, g_skip_frames, &csi2->frame_skip); - - csi2->ctrl.vp_out_ctrl = pdata->bus.csi2.vpclk_div; - csi2->ctrl.frame_mode = ISS_CSI2_FRAME_IMMEDIATE; - csi2->ctrl.ecc_enable = pdata->bus.csi2.crc; - - timing->force_rx_mode = 1; - timing->stop_state_16x = 1; - timing->stop_state_4x = 1; - timing->stop_state_counter = 0x1ff; - - /* - * The CSI2 receiver can't do any format conversion except DPCM - * decompression, so every set_format call configures both pads - * and enables DPCM decompression as a special case: - */ - if (csi2->formats[CSI2_PAD_SINK].code != - csi2->formats[CSI2_PAD_SOURCE].code) - csi2->dpcm_decompress = true; - else - csi2->dpcm_decompress = false; - - csi2->contexts[0].format_id = csi2_ctx_map_format(csi2); - - if (csi2->video_out.bpl_padding == 0) - csi2->contexts[0].data_offset = 0; - else - csi2->contexts[0].data_offset = csi2->video_out.bpl_value; - - /* - * Enable end of frame and end of line signals generation for - * context 0. These signals are generated from CSI2 receiver to - * qualify the last pixel of a frame and the last pixel of a line. - * Without enabling the signals CSI2 receiver writes data to memory - * beyond buffer size and/or data line offset is not handled correctly. - */ - csi2->contexts[0].eof_enabled = 1; - csi2->contexts[0].eol_enabled = 1; - - csi2_irq_complexio1_set(csi2, 1); - csi2_irq_ctx_set(csi2, 1); - csi2_irq_status_set(csi2, 1); - - /* Set configuration (timings, format and links) */ - csi2_timing_config(csi2, timing); - csi2_recv_config(csi2, &csi2->ctrl); - csi2_ctx_config(csi2, &csi2->contexts[0]); - - return 0; -} - -/* - * csi2_print_status - Prints CSI2 debug information. - */ -#define CSI2_PRINT_REGISTER(iss, regs, name)\ - dev_dbg(iss->dev, "###CSI2 " #name "=0x%08x\n", \ - iss_reg_read(iss, regs, CSI2_##name)) - -static void csi2_print_status(struct iss_csi2_device *csi2) -{ - struct iss_device *iss = csi2->iss; - - if (!csi2->available) - return; - - dev_dbg(iss->dev, "-------------CSI2 Register dump-------------\n"); - - CSI2_PRINT_REGISTER(iss, csi2->regs1, SYSCONFIG); - CSI2_PRINT_REGISTER(iss, csi2->regs1, SYSSTATUS); - CSI2_PRINT_REGISTER(iss, csi2->regs1, IRQENABLE); - CSI2_PRINT_REGISTER(iss, csi2->regs1, IRQSTATUS); - CSI2_PRINT_REGISTER(iss, csi2->regs1, CTRL); - CSI2_PRINT_REGISTER(iss, csi2->regs1, DBG_H); - CSI2_PRINT_REGISTER(iss, csi2->regs1, COMPLEXIO_CFG); - CSI2_PRINT_REGISTER(iss, csi2->regs1, COMPLEXIO_IRQSTATUS); - CSI2_PRINT_REGISTER(iss, csi2->regs1, SHORT_PACKET); - CSI2_PRINT_REGISTER(iss, csi2->regs1, COMPLEXIO_IRQENABLE); - CSI2_PRINT_REGISTER(iss, csi2->regs1, DBG_P); - CSI2_PRINT_REGISTER(iss, csi2->regs1, TIMING); - CSI2_PRINT_REGISTER(iss, csi2->regs1, CTX_CTRL1(0)); - CSI2_PRINT_REGISTER(iss, csi2->regs1, CTX_CTRL2(0)); - CSI2_PRINT_REGISTER(iss, csi2->regs1, CTX_DAT_OFST(0)); - CSI2_PRINT_REGISTER(iss, csi2->regs1, CTX_PING_ADDR(0)); - CSI2_PRINT_REGISTER(iss, csi2->regs1, CTX_PONG_ADDR(0)); - CSI2_PRINT_REGISTER(iss, csi2->regs1, CTX_IRQENABLE(0)); - CSI2_PRINT_REGISTER(iss, csi2->regs1, CTX_IRQSTATUS(0)); - CSI2_PRINT_REGISTER(iss, csi2->regs1, CTX_CTRL3(0)); - - dev_dbg(iss->dev, "--------------------------------------------\n"); -} - -/* ----------------------------------------------------------------------------- - * Interrupt handling - */ - -/* - * csi2_isr_buffer - Does buffer handling at end-of-frame - * when writing to memory. - */ -static void csi2_isr_buffer(struct iss_csi2_device *csi2) -{ - struct iss_buffer *buffer; - - csi2_ctx_enable(csi2, 0, 0); - - buffer = omap4iss_video_buffer_next(&csi2->video_out); - - /* - * Let video queue operation restart engine if there is an underrun - * condition. - */ - if (!buffer) - return; - - csi2_set_outaddr(csi2, buffer->iss_addr); - csi2_ctx_enable(csi2, 0, 1); -} - -static void csi2_isr_ctx(struct iss_csi2_device *csi2, - struct iss_csi2_ctx_cfg *ctx) -{ - unsigned int n = ctx->ctxnum; - u32 status; - - status = iss_reg_read(csi2->iss, csi2->regs1, CSI2_CTX_IRQSTATUS(n)); - iss_reg_write(csi2->iss, csi2->regs1, CSI2_CTX_IRQSTATUS(n), status); - - if (omap4iss_module_sync_is_stopping(&csi2->wait, &csi2->stopping)) - return; - - /* Propagate frame number */ - if (status & CSI2_CTX_IRQ_FS) { - struct iss_pipeline *pipe = - to_iss_pipeline(&csi2->subdev.entity); - u16 frame; - u16 delta; - - frame = iss_reg_read(csi2->iss, csi2->regs1, - CSI2_CTX_CTRL2(ctx->ctxnum)) - >> CSI2_CTX_CTRL2_FRAME_SHIFT; - - if (frame == 0) { - /* A zero value means that the counter isn't implemented - * by the source. Increment the frame number in software - * in that case. - */ - atomic_inc(&pipe->frame_number); - } else { - /* Extend the 16 bit frame number to 32 bits by - * computing the delta between two consecutive CSI2 - * frame numbers and adding it to the software frame - * number. The hardware counter starts at 1 and wraps - * from 0xffff to 1 without going through 0, so subtract - * 1 when the counter wraps. - */ - delta = frame - ctx->frame; - if (frame < ctx->frame) - delta--; - ctx->frame = frame; - - atomic_add(delta, &pipe->frame_number); - } - } - - if (!(status & CSI2_CTX_IRQ_FE)) - return; - - /* Skip interrupts until we reach the frame skip count. The CSI2 will be - * automatically disabled, as the frame skip count has been programmed - * in the CSI2_CTx_CTRL1::COUNT field, so re-enable it. - * - * It would have been nice to rely on the FRAME_NUMBER interrupt instead - * but it turned out that the interrupt is only generated when the CSI2 - * writes to memory (the CSI2_CTx_CTRL1::COUNT field is decreased - * correctly and reaches 0 when data is forwarded to the video port only - * but no interrupt arrives). Maybe a CSI2 hardware bug. - */ - if (csi2->frame_skip) { - csi2->frame_skip--; - if (csi2->frame_skip == 0) { - ctx->format_id = csi2_ctx_map_format(csi2); - csi2_ctx_config(csi2, ctx); - csi2_ctx_enable(csi2, n, 1); - } - return; - } - - if (csi2->output & CSI2_OUTPUT_MEMORY) - csi2_isr_buffer(csi2); -} - -/* - * omap4iss_csi2_isr - CSI2 interrupt handling. - */ -void omap4iss_csi2_isr(struct iss_csi2_device *csi2) -{ - struct iss_pipeline *pipe = to_iss_pipeline(&csi2->subdev.entity); - u32 csi2_irqstatus, cpxio1_irqstatus; - struct iss_device *iss = csi2->iss; - - if (!csi2->available) - return; - - csi2_irqstatus = iss_reg_read(csi2->iss, csi2->regs1, CSI2_IRQSTATUS); - iss_reg_write(csi2->iss, csi2->regs1, CSI2_IRQSTATUS, csi2_irqstatus); - - /* Failure Cases */ - if (csi2_irqstatus & CSI2_IRQ_COMPLEXIO_ERR) { - cpxio1_irqstatus = iss_reg_read(csi2->iss, csi2->regs1, - CSI2_COMPLEXIO_IRQSTATUS); - iss_reg_write(csi2->iss, csi2->regs1, CSI2_COMPLEXIO_IRQSTATUS, - cpxio1_irqstatus); - dev_dbg(iss->dev, "CSI2: ComplexIO Error IRQ %x\n", - cpxio1_irqstatus); - pipe->error = true; - } - - if (csi2_irqstatus & (CSI2_IRQ_OCP_ERR | - CSI2_IRQ_SHORT_PACKET | - CSI2_IRQ_ECC_NO_CORRECTION | - CSI2_IRQ_COMPLEXIO_ERR | - CSI2_IRQ_FIFO_OVF)) { - dev_dbg(iss->dev, - "CSI2 Err: OCP:%d SHORT:%d ECC:%d CPXIO:%d OVF:%d\n", - csi2_irqstatus & CSI2_IRQ_OCP_ERR ? 1 : 0, - csi2_irqstatus & CSI2_IRQ_SHORT_PACKET ? 1 : 0, - csi2_irqstatus & CSI2_IRQ_ECC_NO_CORRECTION ? 1 : 0, - csi2_irqstatus & CSI2_IRQ_COMPLEXIO_ERR ? 1 : 0, - csi2_irqstatus & CSI2_IRQ_FIFO_OVF ? 1 : 0); - pipe->error = true; - } - - /* Successful cases */ - if (csi2_irqstatus & CSI2_IRQ_CONTEXT0) - csi2_isr_ctx(csi2, &csi2->contexts[0]); - - if (csi2_irqstatus & CSI2_IRQ_ECC_CORRECTION) - dev_dbg(iss->dev, "CSI2: ECC correction done\n"); -} - -/* ----------------------------------------------------------------------------- - * ISS video operations - */ - -/* - * csi2_queue - Queues the first buffer when using memory output - * @video: The video node - * @buffer: buffer to queue - */ -static int csi2_queue(struct iss_video *video, struct iss_buffer *buffer) -{ - struct iss_csi2_device *csi2 = container_of(video, - struct iss_csi2_device, video_out); - - csi2_set_outaddr(csi2, buffer->iss_addr); - - /* - * If streaming was enabled before there was a buffer queued - * or underrun happened in the ISR, the hardware was not enabled - * and DMA queue flag ISS_VIDEO_DMAQUEUE_UNDERRUN is still set. - * Enable it now. - */ - if (csi2->video_out.dmaqueue_flags & ISS_VIDEO_DMAQUEUE_UNDERRUN) { - /* Enable / disable context 0 and IRQs */ - csi2_if_enable(csi2, 1); - csi2_ctx_enable(csi2, 0, 1); - iss_video_dmaqueue_flags_clr(&csi2->video_out); - } - - return 0; -} - -static const struct iss_video_operations csi2_issvideo_ops = { - .queue = csi2_queue, -}; - -/* ----------------------------------------------------------------------------- - * V4L2 subdev operations - */ - -static struct v4l2_mbus_framefmt * -__csi2_get_format(struct iss_csi2_device *csi2, - struct v4l2_subdev_state *sd_state, - unsigned int pad, - enum v4l2_subdev_format_whence which) -{ - if (which == V4L2_SUBDEV_FORMAT_TRY) - return v4l2_subdev_state_get_format(sd_state, pad); - - return &csi2->formats[pad]; -} - -static void -csi2_try_format(struct iss_csi2_device *csi2, - struct v4l2_subdev_state *sd_state, - unsigned int pad, - struct v4l2_mbus_framefmt *fmt, - enum v4l2_subdev_format_whence which) -{ - u32 pixelcode; - struct v4l2_mbus_framefmt *format; - const struct iss_format_info *info; - unsigned int i; - - switch (pad) { - case CSI2_PAD_SINK: - /* Clamp the width and height to valid range (1-8191). */ - for (i = 0; i < ARRAY_SIZE(csi2_input_fmts); i++) { - if (fmt->code == csi2_input_fmts[i]) - break; - } - - /* If not found, use SGRBG10 as default */ - if (i >= ARRAY_SIZE(csi2_input_fmts)) - fmt->code = MEDIA_BUS_FMT_SGRBG10_1X10; - - fmt->width = clamp_t(u32, fmt->width, 1, 8191); - fmt->height = clamp_t(u32, fmt->height, 1, 8191); - break; - - case CSI2_PAD_SOURCE: - /* Source format same as sink format, except for DPCM - * compression. - */ - pixelcode = fmt->code; - format = __csi2_get_format(csi2, sd_state, CSI2_PAD_SINK, - which); - memcpy(fmt, format, sizeof(*fmt)); - - /* - * Only Allow DPCM decompression, and check that the - * pattern is preserved - */ - info = omap4iss_video_format_info(fmt->code); - if (info->uncompressed == pixelcode) - fmt->code = pixelcode; - break; - } - - /* RGB, non-interlaced */ - fmt->colorspace = V4L2_COLORSPACE_SRGB; - fmt->field = V4L2_FIELD_NONE; -} - -/* - * csi2_enum_mbus_code - Handle pixel format enumeration - * @sd : pointer to v4l2 subdev structure - * @sd_state: V4L2 subdev state - * @code : pointer to v4l2_subdev_mbus_code_enum structure - * return -EINVAL or zero on success - */ -static int csi2_enum_mbus_code(struct v4l2_subdev *sd, - struct v4l2_subdev_state *sd_state, - struct v4l2_subdev_mbus_code_enum *code) -{ - struct iss_csi2_device *csi2 = v4l2_get_subdevdata(sd); - struct v4l2_mbus_framefmt *format; - const struct iss_format_info *info; - - if (code->pad == CSI2_PAD_SINK) { - if (code->index >= ARRAY_SIZE(csi2_input_fmts)) - return -EINVAL; - - code->code = csi2_input_fmts[code->index]; - } else { - format = __csi2_get_format(csi2, sd_state, CSI2_PAD_SINK, - code->which); - switch (code->index) { - case 0: - /* Passthrough sink pad code */ - code->code = format->code; - break; - case 1: - /* Uncompressed code */ - info = omap4iss_video_format_info(format->code); - if (info->uncompressed == format->code) - return -EINVAL; - - code->code = info->uncompressed; - break; - default: - return -EINVAL; - } - } - - return 0; -} - -static int csi2_enum_frame_size(struct v4l2_subdev *sd, - struct v4l2_subdev_state *sd_state, - struct v4l2_subdev_frame_size_enum *fse) -{ - struct iss_csi2_device *csi2 = v4l2_get_subdevdata(sd); - struct v4l2_mbus_framefmt format; - - if (fse->index != 0) - return -EINVAL; - - format.code = fse->code; - format.width = 1; - format.height = 1; - csi2_try_format(csi2, sd_state, fse->pad, &format, fse->which); - fse->min_width = format.width; - fse->min_height = format.height; - - if (format.code != fse->code) - return -EINVAL; - - format.code = fse->code; - format.width = -1; - format.height = -1; - csi2_try_format(csi2, sd_state, fse->pad, &format, fse->which); - fse->max_width = format.width; - fse->max_height = format.height; - - return 0; -} - -/* - * csi2_get_format - Handle get format by pads subdev method - * @sd : pointer to v4l2 subdev structure - * @sd_state: V4L2 subdev state - * @fmt: pointer to v4l2 subdev format structure - * return -EINVAL or zero on success - */ -static int csi2_get_format(struct v4l2_subdev *sd, - struct v4l2_subdev_state *sd_state, - struct v4l2_subdev_format *fmt) -{ - struct iss_csi2_device *csi2 = v4l2_get_subdevdata(sd); - struct v4l2_mbus_framefmt *format; - - format = __csi2_get_format(csi2, sd_state, fmt->pad, fmt->which); - if (!format) - return -EINVAL; - - fmt->format = *format; - return 0; -} - -/* - * csi2_set_format - Handle set format by pads subdev method - * @sd : pointer to v4l2 subdev structure - * @sd_state: V4L2 subdev state - * @fmt: pointer to v4l2 subdev format structure - * return -EINVAL or zero on success - */ -static int csi2_set_format(struct v4l2_subdev *sd, - struct v4l2_subdev_state *sd_state, - struct v4l2_subdev_format *fmt) -{ - struct iss_csi2_device *csi2 = v4l2_get_subdevdata(sd); - struct v4l2_mbus_framefmt *format; - - format = __csi2_get_format(csi2, sd_state, fmt->pad, fmt->which); - if (!format) - return -EINVAL; - - csi2_try_format(csi2, sd_state, fmt->pad, &fmt->format, fmt->which); - *format = fmt->format; - - /* Propagate the format from sink to source */ - if (fmt->pad == CSI2_PAD_SINK) { - format = __csi2_get_format(csi2, sd_state, CSI2_PAD_SOURCE, - fmt->which); - *format = fmt->format; - csi2_try_format(csi2, sd_state, CSI2_PAD_SOURCE, format, - fmt->which); - } - - return 0; -} - -static int csi2_link_validate(struct v4l2_subdev *sd, struct media_link *link, - struct v4l2_subdev_format *source_fmt, - struct v4l2_subdev_format *sink_fmt) -{ - struct iss_csi2_device *csi2 = v4l2_get_subdevdata(sd); - struct iss_pipeline *pipe = to_iss_pipeline(&csi2->subdev.entity); - int rval; - - pipe->external = media_entity_to_v4l2_subdev(link->source->entity); - rval = omap4iss_get_external_info(pipe, link); - if (rval < 0) - return rval; - - return v4l2_subdev_link_validate_default(sd, link, source_fmt, - sink_fmt); -} - -/* - * csi2_init_formats - Initialize formats on all pads - * @sd: ISS CSI2 V4L2 subdevice - * @fh: V4L2 subdev file handle - * - * Initialize all pad formats with default values. If fh is not NULL, try - * formats are initialized on the file handle. Otherwise active formats are - * initialized on the device. - */ -static int csi2_init_formats(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh) -{ - struct v4l2_subdev_format format; - - memset(&format, 0, sizeof(format)); - format.pad = CSI2_PAD_SINK; - format.which = fh ? V4L2_SUBDEV_FORMAT_TRY : V4L2_SUBDEV_FORMAT_ACTIVE; - format.format.code = MEDIA_BUS_FMT_SGRBG10_1X10; - format.format.width = 4096; - format.format.height = 4096; - csi2_set_format(sd, fh ? fh->state : NULL, &format); - - return 0; -} - -/* - * csi2_set_stream - Enable/Disable streaming on the CSI2 module - * @sd: ISS CSI2 V4L2 subdevice - * @enable: ISS pipeline stream state - * - * Return 0 on success or a negative error code otherwise. - */ -static int csi2_set_stream(struct v4l2_subdev *sd, int enable) -{ - struct iss_csi2_device *csi2 = v4l2_get_subdevdata(sd); - struct iss_device *iss = csi2->iss; - struct iss_video *video_out = &csi2->video_out; - int ret = 0; - - if (csi2->state == ISS_PIPELINE_STREAM_STOPPED) { - if (enable == ISS_PIPELINE_STREAM_STOPPED) - return 0; - - omap4iss_subclk_enable(iss, csi2->subclk); - } - - switch (enable) { - case ISS_PIPELINE_STREAM_CONTINUOUS: { - ret = omap4iss_csiphy_config(iss, sd); - if (ret < 0) - return ret; - - if (omap4iss_csiphy_acquire(csi2->phy) < 0) - return -ENODEV; - csi2_configure(csi2); - csi2_print_status(csi2); - - /* - * When outputting to memory with no buffer available, let the - * buffer queue handler start the hardware. A DMA queue flag - * ISS_VIDEO_DMAQUEUE_QUEUED will be set as soon as there is - * a buffer available. - */ - if (csi2->output & CSI2_OUTPUT_MEMORY && - !(video_out->dmaqueue_flags & ISS_VIDEO_DMAQUEUE_QUEUED)) - break; - /* Enable context 0 and IRQs */ - atomic_set(&csi2->stopping, 0); - csi2_ctx_enable(csi2, 0, 1); - csi2_if_enable(csi2, 1); - iss_video_dmaqueue_flags_clr(video_out); - break; - } - case ISS_PIPELINE_STREAM_STOPPED: - if (csi2->state == ISS_PIPELINE_STREAM_STOPPED) - return 0; - if (omap4iss_module_sync_idle(&sd->entity, &csi2->wait, - &csi2->stopping)) - ret = -ETIMEDOUT; - csi2_ctx_enable(csi2, 0, 0); - csi2_if_enable(csi2, 0); - csi2_irq_ctx_set(csi2, 0); - omap4iss_csiphy_release(csi2->phy); - omap4iss_subclk_disable(iss, csi2->subclk); - iss_video_dmaqueue_flags_clr(video_out); - break; - } - - csi2->state = enable; - return ret; -} - -/* subdev video operations */ -static const struct v4l2_subdev_video_ops csi2_video_ops = { - .s_stream = csi2_set_stream, -}; - -/* subdev pad operations */ -static const struct v4l2_subdev_pad_ops csi2_pad_ops = { - .enum_mbus_code = csi2_enum_mbus_code, - .enum_frame_size = csi2_enum_frame_size, - .get_fmt = csi2_get_format, - .set_fmt = csi2_set_format, - .link_validate = csi2_link_validate, -}; - -/* subdev operations */ -static const struct v4l2_subdev_ops csi2_ops = { - .video = &csi2_video_ops, - .pad = &csi2_pad_ops, -}; - -/* subdev internal operations */ -static const struct v4l2_subdev_internal_ops csi2_internal_ops = { - .open = csi2_init_formats, -}; - -/* ----------------------------------------------------------------------------- - * Media entity operations - */ - -/* - * csi2_link_setup - Setup CSI2 connections. - * @entity : Pointer to media entity structure - * @local : Pointer to local pad array - * @remote : Pointer to remote pad array - * @flags : Link flags - * return -EINVAL or zero on success - */ -static int csi2_link_setup(struct media_entity *entity, - const struct media_pad *local, - const struct media_pad *remote, u32 flags) -{ - struct v4l2_subdev *sd = media_entity_to_v4l2_subdev(entity); - struct iss_csi2_device *csi2 = v4l2_get_subdevdata(sd); - struct iss_csi2_ctrl_cfg *ctrl = &csi2->ctrl; - unsigned int index = local->index; - - /* FIXME: this is actually a hack! */ - if (is_media_entity_v4l2_subdev(remote->entity)) - index |= 2 << 16; - - /* - * The ISS core doesn't support pipelines with multiple video outputs. - * Revisit this when it will be implemented, and return -EBUSY for now. - */ - - switch (index) { - case CSI2_PAD_SOURCE: - if (flags & MEDIA_LNK_FL_ENABLED) { - if (csi2->output & ~CSI2_OUTPUT_MEMORY) - return -EBUSY; - csi2->output |= CSI2_OUTPUT_MEMORY; - } else { - csi2->output &= ~CSI2_OUTPUT_MEMORY; - } - break; - - case CSI2_PAD_SOURCE | 2 << 16: - if (flags & MEDIA_LNK_FL_ENABLED) { - if (csi2->output & ~CSI2_OUTPUT_IPIPEIF) - return -EBUSY; - csi2->output |= CSI2_OUTPUT_IPIPEIF; - } else { - csi2->output &= ~CSI2_OUTPUT_IPIPEIF; - } - break; - - default: - /* Link from camera to CSI2 is fixed... */ - return -EINVAL; - } - - ctrl->vp_only_enable = csi2->output & CSI2_OUTPUT_MEMORY ? false : true; - ctrl->vp_clk_enable = !!(csi2->output & CSI2_OUTPUT_IPIPEIF); - - return 0; -} - -/* media operations */ -static const struct media_entity_operations csi2_media_ops = { - .link_setup = csi2_link_setup, - .link_validate = v4l2_subdev_link_validate, -}; - -void omap4iss_csi2_unregister_entities(struct iss_csi2_device *csi2) -{ - v4l2_device_unregister_subdev(&csi2->subdev); - omap4iss_video_unregister(&csi2->video_out); -} - -int omap4iss_csi2_register_entities(struct iss_csi2_device *csi2, - struct v4l2_device *vdev) -{ - int ret; - - /* Register the subdev and video nodes. */ - ret = v4l2_device_register_subdev(vdev, &csi2->subdev); - if (ret < 0) - goto error; - - ret = omap4iss_video_register(&csi2->video_out, vdev); - if (ret < 0) - goto error; - - return 0; - -error: - omap4iss_csi2_unregister_entities(csi2); - return ret; -} - -/* ----------------------------------------------------------------------------- - * ISS CSI2 initialisation and cleanup - */ - -/* - * csi2_init_entities - Initialize subdev and media entity. - * @csi2: Pointer to csi2 structure. - * return -ENOMEM or zero on success - */ -static int csi2_init_entities(struct iss_csi2_device *csi2, const char *subname) -{ - struct v4l2_subdev *sd = &csi2->subdev; - struct media_pad *pads = csi2->pads; - struct media_entity *me = &sd->entity; - int ret; - char name[32]; - - v4l2_subdev_init(sd, &csi2_ops); - sd->internal_ops = &csi2_internal_ops; - snprintf(name, sizeof(name), "CSI2%s", subname); - snprintf(sd->name, sizeof(sd->name), "OMAP4 ISS %s", name); - - sd->grp_id = BIT(16); /* group ID for iss subdevs */ - v4l2_set_subdevdata(sd, csi2); - sd->flags |= V4L2_SUBDEV_FL_HAS_DEVNODE; - - pads[CSI2_PAD_SOURCE].flags = MEDIA_PAD_FL_SOURCE; - pads[CSI2_PAD_SINK].flags = MEDIA_PAD_FL_SINK; - - me->ops = &csi2_media_ops; - ret = media_entity_pads_init(me, CSI2_PADS_NUM, pads); - if (ret < 0) - return ret; - - csi2_init_formats(sd, NULL); - - /* Video device node */ - csi2->video_out.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; - csi2->video_out.ops = &csi2_issvideo_ops; - csi2->video_out.bpl_alignment = 32; - csi2->video_out.bpl_zero_padding = 1; - csi2->video_out.bpl_max = 0x1ffe0; - csi2->video_out.iss = csi2->iss; - csi2->video_out.capture_mem = PAGE_ALIGN(4096 * 4096) * 3; - - ret = omap4iss_video_init(&csi2->video_out, name); - if (ret < 0) - goto error_video; - - return 0; - -error_video: - media_entity_cleanup(&csi2->subdev.entity); - return ret; -} - -/* - * omap4iss_csi2_init - Routine for module driver init - */ -int omap4iss_csi2_init(struct iss_device *iss) -{ - struct iss_csi2_device *csi2a = &iss->csi2a; - struct iss_csi2_device *csi2b = &iss->csi2b; - int ret; - - csi2a->iss = iss; - csi2a->available = 1; - csi2a->regs1 = OMAP4_ISS_MEM_CSI2_A_REGS1; - csi2a->phy = &iss->csiphy1; - csi2a->subclk = OMAP4_ISS_SUBCLK_CSI2_A; - csi2a->state = ISS_PIPELINE_STREAM_STOPPED; - init_waitqueue_head(&csi2a->wait); - - ret = csi2_init_entities(csi2a, "a"); - if (ret < 0) - return ret; - - csi2b->iss = iss; - csi2b->available = 1; - csi2b->regs1 = OMAP4_ISS_MEM_CSI2_B_REGS1; - csi2b->phy = &iss->csiphy2; - csi2b->subclk = OMAP4_ISS_SUBCLK_CSI2_B; - csi2b->state = ISS_PIPELINE_STREAM_STOPPED; - init_waitqueue_head(&csi2b->wait); - - ret = csi2_init_entities(csi2b, "b"); - if (ret < 0) - return ret; - - return 0; -} - -/* - * omap4iss_csi2_create_links() - CSI2 pads links creation - * @iss: Pointer to ISS device - * - * return negative error code or zero on success - */ -int omap4iss_csi2_create_links(struct iss_device *iss) -{ - struct iss_csi2_device *csi2a = &iss->csi2a; - struct iss_csi2_device *csi2b = &iss->csi2b; - int ret; - - /* Connect the CSI2a subdev to the video node. */ - ret = media_create_pad_link(&csi2a->subdev.entity, CSI2_PAD_SOURCE, - &csi2a->video_out.video.entity, 0, 0); - if (ret < 0) - return ret; - - /* Connect the CSI2b subdev to the video node. */ - ret = media_create_pad_link(&csi2b->subdev.entity, CSI2_PAD_SOURCE, - &csi2b->video_out.video.entity, 0, 0); - if (ret < 0) - return ret; - - return 0; -} - -/* - * omap4iss_csi2_cleanup - Routine for module driver cleanup - */ -void omap4iss_csi2_cleanup(struct iss_device *iss) -{ - struct iss_csi2_device *csi2a = &iss->csi2a; - struct iss_csi2_device *csi2b = &iss->csi2b; - - omap4iss_video_cleanup(&csi2a->video_out); - media_entity_cleanup(&csi2a->subdev.entity); - - omap4iss_video_cleanup(&csi2b->video_out); - media_entity_cleanup(&csi2b->subdev.entity); -} diff --git a/drivers/staging/media/omap4iss/iss_csi2.h b/drivers/staging/media/omap4iss/iss_csi2.h deleted file mode 100644 index 3f7fd9cff41d..000000000000 --- a/drivers/staging/media/omap4iss/iss_csi2.h +++ /dev/null @@ -1,155 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0+ */ -/* - * TI OMAP4 ISS V4L2 Driver - CSI2 module - * - * Copyright (C) 2012 Texas Instruments, Inc. - * - * Author: Sergio Aguirre - */ - -#ifndef OMAP4_ISS_CSI2_H -#define OMAP4_ISS_CSI2_H - -#include -#include - -#include "iss_video.h" - -struct iss_csiphy; - -/* This is not an exhaustive list */ -enum iss_csi2_pix_formats { - CSI2_PIX_FMT_OTHERS = 0, - CSI2_PIX_FMT_YUV422_8BIT = 0x1e, - CSI2_PIX_FMT_YUV422_8BIT_VP = 0x9e, - CSI2_PIX_FMT_YUV422_8BIT_VP16 = 0xde, - CSI2_PIX_FMT_RAW10_EXP16 = 0xab, - CSI2_PIX_FMT_RAW10_EXP16_VP = 0x12f, - CSI2_PIX_FMT_RAW8 = 0x2a, - CSI2_PIX_FMT_RAW8_DPCM10_EXP16 = 0x2aa, - CSI2_PIX_FMT_RAW8_DPCM10_VP = 0x32a, - CSI2_PIX_FMT_RAW8_VP = 0x12a, - CSI2_USERDEF_8BIT_DATA1_DPCM10_VP = 0x340, - CSI2_USERDEF_8BIT_DATA1_DPCM10 = 0x2c0, - CSI2_USERDEF_8BIT_DATA1 = 0x40, -}; - -enum iss_csi2_irqevents { - OCP_ERR_IRQ = 0x4000, - SHORT_PACKET_IRQ = 0x2000, - ECC_CORRECTION_IRQ = 0x1000, - ECC_NO_CORRECTION_IRQ = 0x800, - COMPLEXIO2_ERR_IRQ = 0x400, - COMPLEXIO1_ERR_IRQ = 0x200, - FIFO_OVF_IRQ = 0x100, - CONTEXT7 = 0x80, - CONTEXT6 = 0x40, - CONTEXT5 = 0x20, - CONTEXT4 = 0x10, - CONTEXT3 = 0x8, - CONTEXT2 = 0x4, - CONTEXT1 = 0x2, - CONTEXT0 = 0x1, -}; - -enum iss_csi2_ctx_irqevents { - CTX_ECC_CORRECTION = 0x100, - CTX_LINE_NUMBER = 0x80, - CTX_FRAME_NUMBER = 0x40, - CTX_CS = 0x20, - CTX_LE = 0x8, - CTX_LS = 0x4, - CTX_FE = 0x2, - CTX_FS = 0x1, -}; - -enum iss_csi2_frame_mode { - ISS_CSI2_FRAME_IMMEDIATE, - ISS_CSI2_FRAME_AFTERFEC, -}; - -#define ISS_CSI2_MAX_CTX_NUM 7 - -struct iss_csi2_ctx_cfg { - u8 ctxnum; /* context number 0 - 7 */ - u8 dpcm_decompress; - - /* Fields in CSI2_CTx_CTRL2 - locked by CSI2_CTx_CTRL1.CTX_EN */ - u8 virtual_id; - u16 format_id; /* as in CSI2_CTx_CTRL2[9:0] */ - u8 dpcm_predictor; /* 1: simple, 0: advanced */ - u16 frame; - - /* Fields in CSI2_CTx_CTRL1/3 - Shadowed */ - u16 alpha; - u16 data_offset; - u32 ping_addr; - u32 pong_addr; - u8 eof_enabled; - u8 eol_enabled; - u8 checksum_enabled; - u8 enabled; -}; - -struct iss_csi2_timing_cfg { - u8 ionum; /* IO1 or IO2 as in CSI2_TIMING */ - unsigned force_rx_mode:1; - unsigned stop_state_16x:1; - unsigned stop_state_4x:1; - u16 stop_state_counter; -}; - -struct iss_csi2_ctrl_cfg { - bool vp_clk_enable; - bool vp_only_enable; - u8 vp_out_ctrl; - enum iss_csi2_frame_mode frame_mode; - bool ecc_enable; - bool if_enable; -}; - -#define CSI2_PAD_SINK 0 -#define CSI2_PAD_SOURCE 1 -#define CSI2_PADS_NUM 2 - -#define CSI2_OUTPUT_IPIPEIF BIT(0) -#define CSI2_OUTPUT_MEMORY BIT(1) - -struct iss_csi2_device { - struct v4l2_subdev subdev; - struct media_pad pads[CSI2_PADS_NUM]; - struct v4l2_mbus_framefmt formats[CSI2_PADS_NUM]; - - struct iss_video video_out; - struct iss_device *iss; - - u8 available; /* Is the IP present on the silicon? */ - - /* memory resources, as defined in enum iss_mem_resources */ - unsigned int regs1; - unsigned int regs2; - /* ISP subclock, as defined in enum iss_isp_subclk_resource */ - unsigned int subclk; - - u32 output; /* output to IPIPEIF, memory or both? */ - bool dpcm_decompress; - unsigned int frame_skip; - - struct iss_csiphy *phy; - struct iss_csi2_ctx_cfg contexts[ISS_CSI2_MAX_CTX_NUM + 1]; - struct iss_csi2_timing_cfg timing[2]; - struct iss_csi2_ctrl_cfg ctrl; - enum iss_pipeline_stream_state state; - wait_queue_head_t wait; - atomic_t stopping; -}; - -void omap4iss_csi2_isr(struct iss_csi2_device *csi2); -int omap4iss_csi2_reset(struct iss_csi2_device *csi2); -int omap4iss_csi2_init(struct iss_device *iss); -int omap4iss_csi2_create_links(struct iss_device *iss); -void omap4iss_csi2_cleanup(struct iss_device *iss); -void omap4iss_csi2_unregister_entities(struct iss_csi2_device *csi2); -int omap4iss_csi2_register_entities(struct iss_csi2_device *csi2, - struct v4l2_device *vdev); -#endif /* OMAP4_ISS_CSI2_H */ diff --git a/drivers/staging/media/omap4iss/iss_csiphy.c b/drivers/staging/media/omap4iss/iss_csiphy.c deleted file mode 100644 index 96f2ce045138..000000000000 --- a/drivers/staging/media/omap4iss/iss_csiphy.c +++ /dev/null @@ -1,277 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * TI OMAP4 ISS V4L2 Driver - CSI PHY module - * - * Copyright (C) 2012 Texas Instruments, Inc. - * - * Author: Sergio Aguirre - */ - -#include -#include -#include - -#include "../../../../arch/arm/mach-omap2/control.h" - -#include "iss.h" -#include "iss_regs.h" -#include "iss_csiphy.h" - -/* - * csiphy_lanes_config - Configuration of CSIPHY lanes. - * - * Updates HW configuration. - * Called with phy->mutex taken. - */ -static void csiphy_lanes_config(struct iss_csiphy *phy) -{ - unsigned int i; - u32 reg; - - reg = iss_reg_read(phy->iss, phy->cfg_regs, CSI2_COMPLEXIO_CFG); - - for (i = 0; i < phy->max_data_lanes; i++) { - reg &= ~(CSI2_COMPLEXIO_CFG_DATA_POL(i + 1) | - CSI2_COMPLEXIO_CFG_DATA_POSITION_MASK(i + 1)); - reg |= (phy->lanes.data[i].pol ? - CSI2_COMPLEXIO_CFG_DATA_POL(i + 1) : 0); - reg |= (phy->lanes.data[i].pos << - CSI2_COMPLEXIO_CFG_DATA_POSITION_SHIFT(i + 1)); - } - - reg &= ~(CSI2_COMPLEXIO_CFG_CLOCK_POL | - CSI2_COMPLEXIO_CFG_CLOCK_POSITION_MASK); - reg |= phy->lanes.clk.pol ? CSI2_COMPLEXIO_CFG_CLOCK_POL : 0; - reg |= phy->lanes.clk.pos << CSI2_COMPLEXIO_CFG_CLOCK_POSITION_SHIFT; - - iss_reg_write(phy->iss, phy->cfg_regs, CSI2_COMPLEXIO_CFG, reg); -} - -/* - * csiphy_set_power - * @power: Power state to be set. - * - * Returns 0 if successful, or -EBUSY if the retry count is exceeded. - */ -static int csiphy_set_power(struct iss_csiphy *phy, u32 power) -{ - u32 reg; - u8 retry_count; - - iss_reg_update(phy->iss, phy->cfg_regs, CSI2_COMPLEXIO_CFG, - CSI2_COMPLEXIO_CFG_PWD_CMD_MASK, - power | CSI2_COMPLEXIO_CFG_PWR_AUTO); - - retry_count = 0; - do { - udelay(1); - reg = iss_reg_read(phy->iss, phy->cfg_regs, CSI2_COMPLEXIO_CFG) - & CSI2_COMPLEXIO_CFG_PWD_STATUS_MASK; - - if (reg != power >> 2) - retry_count++; - - } while ((reg != power >> 2) && (retry_count < 250)); - - if (retry_count == 250) { - dev_err(phy->iss->dev, "CSI2 CIO set power failed!\n"); - return -EBUSY; - } - - return 0; -} - -/* - * csiphy_dphy_config - Configure CSI2 D-PHY parameters. - * - * Called with phy->mutex taken. - */ -static void csiphy_dphy_config(struct iss_csiphy *phy) -{ - u32 reg; - - /* Set up REGISTER0 */ - reg = phy->dphy.ths_term << REGISTER0_THS_TERM_SHIFT; - reg |= phy->dphy.ths_settle << REGISTER0_THS_SETTLE_SHIFT; - - iss_reg_write(phy->iss, phy->phy_regs, REGISTER0, reg); - - /* Set up REGISTER1 */ - reg = phy->dphy.tclk_term << REGISTER1_TCLK_TERM_SHIFT; - reg |= phy->dphy.tclk_miss << REGISTER1_CTRLCLK_DIV_FACTOR_SHIFT; - reg |= phy->dphy.tclk_settle << REGISTER1_TCLK_SETTLE_SHIFT; - reg |= 0xb8 << REGISTER1_DPHY_HS_SYNC_PATTERN_SHIFT; - - iss_reg_write(phy->iss, phy->phy_regs, REGISTER1, reg); -} - -/* - * TCLK values are OK at their reset values - */ -#define TCLK_TERM 0 -#define TCLK_MISS 1 -#define TCLK_SETTLE 14 - -int omap4iss_csiphy_config(struct iss_device *iss, - struct v4l2_subdev *csi2_subdev) -{ - struct iss_csi2_device *csi2 = v4l2_get_subdevdata(csi2_subdev); - struct iss_pipeline *pipe = to_iss_pipeline(&csi2_subdev->entity); - struct iss_v4l2_subdevs_group *subdevs = pipe->external->host_priv; - struct iss_csiphy_dphy_cfg csi2phy; - int csi2_ddrclk_khz; - struct iss_csiphy_lanes_cfg *lanes; - unsigned int used_lanes = 0; - u32 cam_rx_ctrl; - unsigned int i; - - lanes = &subdevs->bus.csi2.lanecfg; - - /* - * SCM.CONTROL_CAMERA_RX - * - bit [31] : CSIPHY2 lane 2 enable (4460+ only) - * - bit [30:29] : CSIPHY2 per-lane enable (1 to 0) - * - bit [28:24] : CSIPHY1 per-lane enable (4 to 0) - * - bit [21] : CSIPHY2 CTRLCLK enable - * - bit [20:19] : CSIPHY2 config: 00 d-phy, 01/10 ccp2 - * - bit [18] : CSIPHY1 CTRLCLK enable - * - bit [17:16] : CSIPHY1 config: 00 d-phy, 01/10 ccp2 - */ - /* - * TODO: When implementing DT support specify the CONTROL_CAMERA_RX - * register offset in the syscon property instead of hardcoding it. - */ - regmap_read(iss->syscon, 0x68, &cam_rx_ctrl); - - if (subdevs->interface == ISS_INTERFACE_CSI2A_PHY1) { - cam_rx_ctrl &= ~(OMAP4_CAMERARX_CSI21_LANEENABLE_MASK | - OMAP4_CAMERARX_CSI21_CAMMODE_MASK); - /* NOTE: Leave CSIPHY1 config to 0x0: D-PHY mode */ - /* Enable all lanes for now */ - cam_rx_ctrl |= - 0x1f << OMAP4_CAMERARX_CSI21_LANEENABLE_SHIFT; - /* Enable CTRLCLK */ - cam_rx_ctrl |= OMAP4_CAMERARX_CSI21_CTRLCLKEN_MASK; - } - - if (subdevs->interface == ISS_INTERFACE_CSI2B_PHY2) { - cam_rx_ctrl &= ~(OMAP4_CAMERARX_CSI22_LANEENABLE_MASK | - OMAP4_CAMERARX_CSI22_CAMMODE_MASK); - /* NOTE: Leave CSIPHY2 config to 0x0: D-PHY mode */ - /* Enable all lanes for now */ - cam_rx_ctrl |= - 0x3 << OMAP4_CAMERARX_CSI22_LANEENABLE_SHIFT; - /* Enable CTRLCLK */ - cam_rx_ctrl |= OMAP4_CAMERARX_CSI22_CTRLCLKEN_MASK; - } - - regmap_write(iss->syscon, 0x68, cam_rx_ctrl); - - /* Reset used lane count */ - csi2->phy->used_data_lanes = 0; - - /* Clock and data lanes verification */ - for (i = 0; i < csi2->phy->max_data_lanes; i++) { - if (lanes->data[i].pos == 0) - continue; - - if (lanes->data[i].pol > 1 || - lanes->data[i].pos > (csi2->phy->max_data_lanes + 1)) - return -EINVAL; - - if (used_lanes & (1 << lanes->data[i].pos)) - return -EINVAL; - - used_lanes |= 1 << lanes->data[i].pos; - csi2->phy->used_data_lanes++; - } - - if (lanes->clk.pol > 1 || - lanes->clk.pos > (csi2->phy->max_data_lanes + 1)) - return -EINVAL; - - if (lanes->clk.pos == 0 || used_lanes & (1 << lanes->clk.pos)) - return -EINVAL; - - csi2_ddrclk_khz = pipe->external_rate / 1000 - / (2 * csi2->phy->used_data_lanes) - * pipe->external_bpp; - - /* - * THS_TERM: Programmed value = ceil(12.5 ns/DDRClk period) - 1. - * THS_SETTLE: Programmed value = ceil(90 ns/DDRClk period) + 3. - */ - csi2phy.ths_term = DIV_ROUND_UP(25 * csi2_ddrclk_khz, 2000000) - 1; - csi2phy.ths_settle = DIV_ROUND_UP(90 * csi2_ddrclk_khz, 1000000) + 3; - csi2phy.tclk_term = TCLK_TERM; - csi2phy.tclk_miss = TCLK_MISS; - csi2phy.tclk_settle = TCLK_SETTLE; - - mutex_lock(&csi2->phy->mutex); - csi2->phy->dphy = csi2phy; - csi2->phy->lanes = *lanes; - mutex_unlock(&csi2->phy->mutex); - - return 0; -} - -int omap4iss_csiphy_acquire(struct iss_csiphy *phy) -{ - int rval; - - mutex_lock(&phy->mutex); - - rval = omap4iss_csi2_reset(phy->csi2); - if (rval) - goto done; - - csiphy_dphy_config(phy); - csiphy_lanes_config(phy); - - rval = csiphy_set_power(phy, CSI2_COMPLEXIO_CFG_PWD_CMD_ON); - if (rval) - goto done; - - phy->phy_in_use = 1; - -done: - mutex_unlock(&phy->mutex); - return rval; -} - -void omap4iss_csiphy_release(struct iss_csiphy *phy) -{ - mutex_lock(&phy->mutex); - if (phy->phy_in_use) { - csiphy_set_power(phy, CSI2_COMPLEXIO_CFG_PWD_CMD_OFF); - phy->phy_in_use = 0; - } - mutex_unlock(&phy->mutex); -} - -/* - * omap4iss_csiphy_init - Initialize the CSI PHY frontends - */ -int omap4iss_csiphy_init(struct iss_device *iss) -{ - struct iss_csiphy *phy1 = &iss->csiphy1; - struct iss_csiphy *phy2 = &iss->csiphy2; - - phy1->iss = iss; - phy1->csi2 = &iss->csi2a; - phy1->max_data_lanes = ISS_CSIPHY1_NUM_DATA_LANES; - phy1->used_data_lanes = 0; - phy1->cfg_regs = OMAP4_ISS_MEM_CSI2_A_REGS1; - phy1->phy_regs = OMAP4_ISS_MEM_CAMERARX_CORE1; - mutex_init(&phy1->mutex); - - phy2->iss = iss; - phy2->csi2 = &iss->csi2b; - phy2->max_data_lanes = ISS_CSIPHY2_NUM_DATA_LANES; - phy2->used_data_lanes = 0; - phy2->cfg_regs = OMAP4_ISS_MEM_CSI2_B_REGS1; - phy2->phy_regs = OMAP4_ISS_MEM_CAMERARX_CORE2; - mutex_init(&phy2->mutex); - - return 0; -} diff --git a/drivers/staging/media/omap4iss/iss_csiphy.h b/drivers/staging/media/omap4iss/iss_csiphy.h deleted file mode 100644 index 44408e4fcf3b..000000000000 --- a/drivers/staging/media/omap4iss/iss_csiphy.h +++ /dev/null @@ -1,47 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0+ */ -/* - * TI OMAP4 ISS V4L2 Driver - CSI PHY module - * - * Copyright (C) 2012 Texas Instruments, Inc. - * - * Author: Sergio Aguirre - */ - -#ifndef OMAP4_ISS_CSI_PHY_H -#define OMAP4_ISS_CSI_PHY_H - -#include - -struct iss_csi2_device; - -struct iss_csiphy_dphy_cfg { - u8 ths_term; - u8 ths_settle; - u8 tclk_term; - unsigned tclk_miss:1; - u8 tclk_settle; -}; - -struct iss_csiphy { - struct iss_device *iss; - struct mutex mutex; /* serialize csiphy configuration */ - u8 phy_in_use; - struct iss_csi2_device *csi2; - - /* memory resources, as defined in enum iss_mem_resources */ - unsigned int cfg_regs; - unsigned int phy_regs; - - u8 max_data_lanes; /* number of CSI2 Data Lanes supported */ - u8 used_data_lanes; /* number of CSI2 Data Lanes used */ - struct iss_csiphy_lanes_cfg lanes; - struct iss_csiphy_dphy_cfg dphy; -}; - -int omap4iss_csiphy_config(struct iss_device *iss, - struct v4l2_subdev *csi2_subdev); -int omap4iss_csiphy_acquire(struct iss_csiphy *phy); -void omap4iss_csiphy_release(struct iss_csiphy *phy); -int omap4iss_csiphy_init(struct iss_device *iss); - -#endif /* OMAP4_ISS_CSI_PHY_H */ diff --git a/drivers/staging/media/omap4iss/iss_ipipe.c b/drivers/staging/media/omap4iss/iss_ipipe.c deleted file mode 100644 index 4a4eae290d65..000000000000 --- a/drivers/staging/media/omap4iss/iss_ipipe.c +++ /dev/null @@ -1,579 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * TI OMAP4 ISS V4L2 Driver - ISP IPIPE module - * - * Copyright (C) 2012 Texas Instruments, Inc. - * - * Author: Sergio Aguirre - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "iss.h" -#include "iss_regs.h" -#include "iss_ipipe.h" - -static struct v4l2_mbus_framefmt * -__ipipe_get_format(struct iss_ipipe_device *ipipe, - struct v4l2_subdev_state *sd_state, - unsigned int pad, - enum v4l2_subdev_format_whence which); - -static const unsigned int ipipe_fmts[] = { - MEDIA_BUS_FMT_SGRBG10_1X10, - MEDIA_BUS_FMT_SRGGB10_1X10, - MEDIA_BUS_FMT_SBGGR10_1X10, - MEDIA_BUS_FMT_SGBRG10_1X10, -}; - -/* - * ipipe_print_status - Print current IPIPE Module register values. - * @ipipe: Pointer to ISS ISP IPIPE device. - * - * Also prints other debug information stored in the IPIPE module. - */ -#define IPIPE_PRINT_REGISTER(iss, name)\ - dev_dbg(iss->dev, "###IPIPE " #name "=0x%08x\n", \ - iss_reg_read(iss, OMAP4_ISS_MEM_ISP_IPIPE, IPIPE_##name)) - -static void ipipe_print_status(struct iss_ipipe_device *ipipe) -{ - struct iss_device *iss = to_iss_device(ipipe); - - dev_dbg(iss->dev, "-------------IPIPE Register dump-------------\n"); - - IPIPE_PRINT_REGISTER(iss, SRC_EN); - IPIPE_PRINT_REGISTER(iss, SRC_MODE); - IPIPE_PRINT_REGISTER(iss, SRC_FMT); - IPIPE_PRINT_REGISTER(iss, SRC_COL); - IPIPE_PRINT_REGISTER(iss, SRC_VPS); - IPIPE_PRINT_REGISTER(iss, SRC_VSZ); - IPIPE_PRINT_REGISTER(iss, SRC_HPS); - IPIPE_PRINT_REGISTER(iss, SRC_HSZ); - IPIPE_PRINT_REGISTER(iss, GCK_MMR); - IPIPE_PRINT_REGISTER(iss, YUV_PHS); - - dev_dbg(iss->dev, "-----------------------------------------------\n"); -} - -/* - * ipipe_enable - Enable/Disable IPIPE. - * @enable: enable flag - * - */ -static void ipipe_enable(struct iss_ipipe_device *ipipe, u8 enable) -{ - struct iss_device *iss = to_iss_device(ipipe); - - iss_reg_update(iss, OMAP4_ISS_MEM_ISP_IPIPE, IPIPE_SRC_EN, - IPIPE_SRC_EN_EN, enable ? IPIPE_SRC_EN_EN : 0); -} - -/* ----------------------------------------------------------------------------- - * Format- and pipeline-related configuration helpers - */ - -static void ipipe_configure(struct iss_ipipe_device *ipipe) -{ - struct iss_device *iss = to_iss_device(ipipe); - struct v4l2_mbus_framefmt *format; - - /* IPIPE_PAD_SINK */ - format = &ipipe->formats[IPIPE_PAD_SINK]; - - /* NOTE: Currently just supporting pipeline IN: RGB, OUT: YUV422 */ - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_IPIPE, IPIPE_SRC_FMT, - IPIPE_SRC_FMT_RAW2YUV); - - /* Enable YUV444 -> YUV422 conversion */ - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_IPIPE, IPIPE_YUV_PHS, - IPIPE_YUV_PHS_LPF); - - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_IPIPE, IPIPE_SRC_VPS, 0); - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_IPIPE, IPIPE_SRC_HPS, 0); - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_IPIPE, IPIPE_SRC_VSZ, - (format->height - 2) & IPIPE_SRC_VSZ_MASK); - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_IPIPE, IPIPE_SRC_HSZ, - (format->width - 1) & IPIPE_SRC_HSZ_MASK); - - /* Ignore ipipeif_wrt signal, and operate on-the-fly. */ - iss_reg_clr(iss, OMAP4_ISS_MEM_ISP_IPIPE, IPIPE_SRC_MODE, - IPIPE_SRC_MODE_WRT | IPIPE_SRC_MODE_OST); - - /* HACK: Values tuned for Ducati SW (OV) */ - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_IPIPE, IPIPE_SRC_COL, - IPIPE_SRC_COL_EE_B | IPIPE_SRC_COL_EO_GB | - IPIPE_SRC_COL_OE_GR | IPIPE_SRC_COL_OO_R); - - /* IPIPE_PAD_SOURCE_VP */ - format = &ipipe->formats[IPIPE_PAD_SOURCE_VP]; - /* Do nothing? */ -} - -/* ----------------------------------------------------------------------------- - * V4L2 subdev operations - */ - -/* - * ipipe_set_stream - Enable/Disable streaming on the IPIPE module - * @sd: ISP IPIPE V4L2 subdevice - * @enable: Enable/disable stream - */ -static int ipipe_set_stream(struct v4l2_subdev *sd, int enable) -{ - struct iss_ipipe_device *ipipe = v4l2_get_subdevdata(sd); - struct iss_device *iss = to_iss_device(ipipe); - int ret = 0; - - if (ipipe->state == ISS_PIPELINE_STREAM_STOPPED) { - if (enable == ISS_PIPELINE_STREAM_STOPPED) - return 0; - - omap4iss_isp_subclk_enable(iss, OMAP4_ISS_ISP_SUBCLK_IPIPE); - - /* Enable clk_arm_g0 */ - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_IPIPE, IPIPE_GCK_MMR, - IPIPE_GCK_MMR_REG); - - /* Enable clk_pix_g[3:0] */ - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_IPIPE, IPIPE_GCK_PIX, - IPIPE_GCK_PIX_G3 | IPIPE_GCK_PIX_G2 | - IPIPE_GCK_PIX_G1 | IPIPE_GCK_PIX_G0); - } - - switch (enable) { - case ISS_PIPELINE_STREAM_CONTINUOUS: - - ipipe_configure(ipipe); - ipipe_print_status(ipipe); - - atomic_set(&ipipe->stopping, 0); - ipipe_enable(ipipe, 1); - break; - - case ISS_PIPELINE_STREAM_STOPPED: - if (ipipe->state == ISS_PIPELINE_STREAM_STOPPED) - return 0; - if (omap4iss_module_sync_idle(&sd->entity, &ipipe->wait, - &ipipe->stopping)) - ret = -ETIMEDOUT; - - ipipe_enable(ipipe, 0); - omap4iss_isp_subclk_disable(iss, OMAP4_ISS_ISP_SUBCLK_IPIPE); - break; - } - - ipipe->state = enable; - return ret; -} - -static struct v4l2_mbus_framefmt * -__ipipe_get_format(struct iss_ipipe_device *ipipe, - struct v4l2_subdev_state *sd_state, - unsigned int pad, - enum v4l2_subdev_format_whence which) -{ - if (which == V4L2_SUBDEV_FORMAT_TRY) - return v4l2_subdev_state_get_format(sd_state, pad); - - return &ipipe->formats[pad]; -} - -/* - * ipipe_try_format - Try video format on a pad - * @ipipe: ISS IPIPE device - * @sd_state: V4L2 subdev state - * @pad: Pad number - * @fmt: Format - */ -static void -ipipe_try_format(struct iss_ipipe_device *ipipe, - struct v4l2_subdev_state *sd_state, - unsigned int pad, - struct v4l2_mbus_framefmt *fmt, - enum v4l2_subdev_format_whence which) -{ - struct v4l2_mbus_framefmt *format; - unsigned int width = fmt->width; - unsigned int height = fmt->height; - unsigned int i; - - switch (pad) { - case IPIPE_PAD_SINK: - for (i = 0; i < ARRAY_SIZE(ipipe_fmts); i++) { - if (fmt->code == ipipe_fmts[i]) - break; - } - - /* If not found, use SGRBG10 as default */ - if (i >= ARRAY_SIZE(ipipe_fmts)) - fmt->code = MEDIA_BUS_FMT_SGRBG10_1X10; - - /* Clamp the input size. */ - fmt->width = clamp_t(u32, width, 1, 8192); - fmt->height = clamp_t(u32, height, 1, 8192); - fmt->colorspace = V4L2_COLORSPACE_SRGB; - break; - - case IPIPE_PAD_SOURCE_VP: - format = __ipipe_get_format(ipipe, sd_state, IPIPE_PAD_SINK, - which); - memcpy(fmt, format, sizeof(*fmt)); - - fmt->code = MEDIA_BUS_FMT_UYVY8_1X16; - fmt->width = clamp_t(u32, width, 32, fmt->width); - fmt->height = clamp_t(u32, height, 32, fmt->height); - fmt->colorspace = V4L2_COLORSPACE_JPEG; - break; - } - - fmt->field = V4L2_FIELD_NONE; -} - -/* - * ipipe_enum_mbus_code - Handle pixel format enumeration - * @sd : pointer to v4l2 subdev structure - * @sd_state: V4L2 subdev state - * @code : pointer to v4l2_subdev_mbus_code_enum structure - * return -EINVAL or zero on success - */ -static int ipipe_enum_mbus_code(struct v4l2_subdev *sd, - struct v4l2_subdev_state *sd_state, - struct v4l2_subdev_mbus_code_enum *code) -{ - switch (code->pad) { - case IPIPE_PAD_SINK: - if (code->index >= ARRAY_SIZE(ipipe_fmts)) - return -EINVAL; - - code->code = ipipe_fmts[code->index]; - break; - - case IPIPE_PAD_SOURCE_VP: - /* FIXME: Forced format conversion inside IPIPE ? */ - if (code->index != 0) - return -EINVAL; - - code->code = MEDIA_BUS_FMT_UYVY8_1X16; - break; - - default: - return -EINVAL; - } - - return 0; -} - -static int ipipe_enum_frame_size(struct v4l2_subdev *sd, - struct v4l2_subdev_state *sd_state, - struct v4l2_subdev_frame_size_enum *fse) -{ - struct iss_ipipe_device *ipipe = v4l2_get_subdevdata(sd); - struct v4l2_mbus_framefmt format; - - if (fse->index != 0) - return -EINVAL; - - format.code = fse->code; - format.width = 1; - format.height = 1; - ipipe_try_format(ipipe, sd_state, fse->pad, &format, fse->which); - fse->min_width = format.width; - fse->min_height = format.height; - - if (format.code != fse->code) - return -EINVAL; - - format.code = fse->code; - format.width = -1; - format.height = -1; - ipipe_try_format(ipipe, sd_state, fse->pad, &format, fse->which); - fse->max_width = format.width; - fse->max_height = format.height; - - return 0; -} - -/* - * ipipe_get_format - Retrieve the video format on a pad - * @sd : ISP IPIPE V4L2 subdevice - * @sd_state: V4L2 subdev state - * @fmt: Format - * - * Return 0 on success or -EINVAL if the pad is invalid or doesn't correspond - * to the format type. - */ -static int ipipe_get_format(struct v4l2_subdev *sd, - struct v4l2_subdev_state *sd_state, - struct v4l2_subdev_format *fmt) -{ - struct iss_ipipe_device *ipipe = v4l2_get_subdevdata(sd); - struct v4l2_mbus_framefmt *format; - - format = __ipipe_get_format(ipipe, sd_state, fmt->pad, fmt->which); - if (!format) - return -EINVAL; - - fmt->format = *format; - return 0; -} - -/* - * ipipe_set_format - Set the video format on a pad - * @sd : ISP IPIPE V4L2 subdevice - * @sd_state: V4L2 subdev state - * @fmt: Format - * - * Return 0 on success or -EINVAL if the pad is invalid or doesn't correspond - * to the format type. - */ -static int ipipe_set_format(struct v4l2_subdev *sd, - struct v4l2_subdev_state *sd_state, - struct v4l2_subdev_format *fmt) -{ - struct iss_ipipe_device *ipipe = v4l2_get_subdevdata(sd); - struct v4l2_mbus_framefmt *format; - - format = __ipipe_get_format(ipipe, sd_state, fmt->pad, fmt->which); - if (!format) - return -EINVAL; - - ipipe_try_format(ipipe, sd_state, fmt->pad, &fmt->format, fmt->which); - *format = fmt->format; - - /* Propagate the format from sink to source */ - if (fmt->pad == IPIPE_PAD_SINK) { - format = __ipipe_get_format(ipipe, sd_state, - IPIPE_PAD_SOURCE_VP, - fmt->which); - *format = fmt->format; - ipipe_try_format(ipipe, sd_state, IPIPE_PAD_SOURCE_VP, format, - fmt->which); - } - - return 0; -} - -static int ipipe_link_validate(struct v4l2_subdev *sd, struct media_link *link, - struct v4l2_subdev_format *source_fmt, - struct v4l2_subdev_format *sink_fmt) -{ - /* Check if the two ends match */ - if (source_fmt->format.width != sink_fmt->format.width || - source_fmt->format.height != sink_fmt->format.height) - return -EPIPE; - - if (source_fmt->format.code != sink_fmt->format.code) - return -EPIPE; - - return 0; -} - -/* - * ipipe_init_formats - Initialize formats on all pads - * @sd: ISP IPIPE V4L2 subdevice - * @fh: V4L2 subdev file handle - * - * Initialize all pad formats with default values. If fh is not NULL, try - * formats are initialized on the file handle. Otherwise active formats are - * initialized on the device. - */ -static int ipipe_init_formats(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh) -{ - struct v4l2_subdev_format format; - - memset(&format, 0, sizeof(format)); - format.pad = IPIPE_PAD_SINK; - format.which = fh ? V4L2_SUBDEV_FORMAT_TRY : V4L2_SUBDEV_FORMAT_ACTIVE; - format.format.code = MEDIA_BUS_FMT_SGRBG10_1X10; - format.format.width = 4096; - format.format.height = 4096; - ipipe_set_format(sd, fh ? fh->state : NULL, &format); - - return 0; -} - -/* V4L2 subdev video operations */ -static const struct v4l2_subdev_video_ops ipipe_v4l2_video_ops = { - .s_stream = ipipe_set_stream, -}; - -/* V4L2 subdev pad operations */ -static const struct v4l2_subdev_pad_ops ipipe_v4l2_pad_ops = { - .enum_mbus_code = ipipe_enum_mbus_code, - .enum_frame_size = ipipe_enum_frame_size, - .get_fmt = ipipe_get_format, - .set_fmt = ipipe_set_format, - .link_validate = ipipe_link_validate, -}; - -/* V4L2 subdev operations */ -static const struct v4l2_subdev_ops ipipe_v4l2_ops = { - .video = &ipipe_v4l2_video_ops, - .pad = &ipipe_v4l2_pad_ops, -}; - -/* V4L2 subdev internal operations */ -static const struct v4l2_subdev_internal_ops ipipe_v4l2_internal_ops = { - .open = ipipe_init_formats, -}; - -/* ----------------------------------------------------------------------------- - * Media entity operations - */ - -/* - * ipipe_link_setup - Setup IPIPE connections - * @entity: IPIPE media entity - * @local: Pad at the local end of the link - * @remote: Pad at the remote end of the link - * @flags: Link flags - * - * return -EINVAL or zero on success - */ -static int ipipe_link_setup(struct media_entity *entity, - const struct media_pad *local, - const struct media_pad *remote, u32 flags) -{ - struct v4l2_subdev *sd = media_entity_to_v4l2_subdev(entity); - struct iss_ipipe_device *ipipe = v4l2_get_subdevdata(sd); - struct iss_device *iss = to_iss_device(ipipe); - - if (!is_media_entity_v4l2_subdev(remote->entity)) - return -EINVAL; - - switch (local->index) { - case IPIPE_PAD_SINK: - /* Read from IPIPEIF. */ - if (!(flags & MEDIA_LNK_FL_ENABLED)) { - ipipe->input = IPIPE_INPUT_NONE; - break; - } - - if (ipipe->input != IPIPE_INPUT_NONE) - return -EBUSY; - - if (remote->entity == &iss->ipipeif.subdev.entity) - ipipe->input = IPIPE_INPUT_IPIPEIF; - - break; - - case IPIPE_PAD_SOURCE_VP: - /* Send to RESIZER */ - if (flags & MEDIA_LNK_FL_ENABLED) { - if (ipipe->output & ~IPIPE_OUTPUT_VP) - return -EBUSY; - ipipe->output |= IPIPE_OUTPUT_VP; - } else { - ipipe->output &= ~IPIPE_OUTPUT_VP; - } - break; - - default: - return -EINVAL; - } - - return 0; -} - -/* media operations */ -static const struct media_entity_operations ipipe_media_ops = { - .link_setup = ipipe_link_setup, - .link_validate = v4l2_subdev_link_validate, -}; - -/* - * ipipe_init_entities - Initialize V4L2 subdev and media entity - * @ipipe: ISS ISP IPIPE module - * - * Return 0 on success and a negative error code on failure. - */ -static int ipipe_init_entities(struct iss_ipipe_device *ipipe) -{ - struct v4l2_subdev *sd = &ipipe->subdev; - struct media_pad *pads = ipipe->pads; - struct media_entity *me = &sd->entity; - int ret; - - ipipe->input = IPIPE_INPUT_NONE; - - v4l2_subdev_init(sd, &ipipe_v4l2_ops); - sd->internal_ops = &ipipe_v4l2_internal_ops; - strscpy(sd->name, "OMAP4 ISS ISP IPIPE", sizeof(sd->name)); - sd->grp_id = BIT(16); /* group ID for iss subdevs */ - v4l2_set_subdevdata(sd, ipipe); - sd->flags |= V4L2_SUBDEV_FL_HAS_DEVNODE; - - pads[IPIPE_PAD_SINK].flags = MEDIA_PAD_FL_SINK; - pads[IPIPE_PAD_SOURCE_VP].flags = MEDIA_PAD_FL_SOURCE; - - me->ops = &ipipe_media_ops; - ret = media_entity_pads_init(me, IPIPE_PADS_NUM, pads); - if (ret < 0) - return ret; - - ipipe_init_formats(sd, NULL); - - return 0; -} - -void omap4iss_ipipe_unregister_entities(struct iss_ipipe_device *ipipe) -{ - v4l2_device_unregister_subdev(&ipipe->subdev); -} - -int omap4iss_ipipe_register_entities(struct iss_ipipe_device *ipipe, - struct v4l2_device *vdev) -{ - int ret; - - /* Register the subdev and video node. */ - ret = v4l2_device_register_subdev(vdev, &ipipe->subdev); - if (ret < 0) - goto error; - - return 0; - -error: - omap4iss_ipipe_unregister_entities(ipipe); - return ret; -} - -/* ----------------------------------------------------------------------------- - * ISP IPIPE initialisation and cleanup - */ - -/* - * omap4iss_ipipe_init - IPIPE module initialization. - * @iss: Device pointer specific to the OMAP4 ISS. - * - * TODO: Get the initialisation values from platform data. - * - * Return 0 on success or a negative error code otherwise. - */ -int omap4iss_ipipe_init(struct iss_device *iss) -{ - struct iss_ipipe_device *ipipe = &iss->ipipe; - - ipipe->state = ISS_PIPELINE_STREAM_STOPPED; - init_waitqueue_head(&ipipe->wait); - - return ipipe_init_entities(ipipe); -} - -/* - * omap4iss_ipipe_cleanup - IPIPE module cleanup. - * @iss: Device pointer specific to the OMAP4 ISS. - */ -void omap4iss_ipipe_cleanup(struct iss_device *iss) -{ - struct iss_ipipe_device *ipipe = &iss->ipipe; - - media_entity_cleanup(&ipipe->subdev.entity); -} diff --git a/drivers/staging/media/omap4iss/iss_ipipe.h b/drivers/staging/media/omap4iss/iss_ipipe.h deleted file mode 100644 index 53b42aac1696..000000000000 --- a/drivers/staging/media/omap4iss/iss_ipipe.h +++ /dev/null @@ -1,63 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0+ */ -/* - * TI OMAP4 ISS V4L2 Driver - ISP IPIPE module - * - * Copyright (C) 2012 Texas Instruments, Inc. - * - * Author: Sergio Aguirre - */ - -#ifndef OMAP4_ISS_IPIPE_H -#define OMAP4_ISS_IPIPE_H - -#include "iss_video.h" - -enum ipipe_input_entity { - IPIPE_INPUT_NONE, - IPIPE_INPUT_IPIPEIF, -}; - -#define IPIPE_OUTPUT_VP BIT(0) - -/* Sink and source IPIPE pads */ -#define IPIPE_PAD_SINK 0 -#define IPIPE_PAD_SOURCE_VP 1 -#define IPIPE_PADS_NUM 2 - -/* - * struct iss_ipipe_device - Structure for the IPIPE module to store its own - * information - * @subdev: V4L2 subdevice - * @pads: Sink and source media entity pads - * @formats: Active video formats - * @input: Active input - * @output: Active outputs - * @error: A hardware error occurred during capture - * @state: Streaming state - * @wait: Wait queue used to stop the module - * @stopping: Stopping state - */ -struct iss_ipipe_device { - struct v4l2_subdev subdev; - struct media_pad pads[IPIPE_PADS_NUM]; - struct v4l2_mbus_framefmt formats[IPIPE_PADS_NUM]; - - enum ipipe_input_entity input; - unsigned int output; - unsigned int error; - - enum iss_pipeline_stream_state state; - wait_queue_head_t wait; - atomic_t stopping; -}; - -struct iss_device; - -int omap4iss_ipipe_register_entities(struct iss_ipipe_device *ipipe, - struct v4l2_device *vdev); -void omap4iss_ipipe_unregister_entities(struct iss_ipipe_device *ipipe); - -int omap4iss_ipipe_init(struct iss_device *iss); -void omap4iss_ipipe_cleanup(struct iss_device *iss); - -#endif /* OMAP4_ISS_IPIPE_H */ diff --git a/drivers/staging/media/omap4iss/iss_ipipeif.c b/drivers/staging/media/omap4iss/iss_ipipeif.c deleted file mode 100644 index 8fa99532d9d4..000000000000 --- a/drivers/staging/media/omap4iss/iss_ipipeif.c +++ /dev/null @@ -1,844 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * TI OMAP4 ISS V4L2 Driver - ISP IPIPEIF module - * - * Copyright (C) 2012 Texas Instruments, Inc. - * - * Author: Sergio Aguirre - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "iss.h" -#include "iss_regs.h" -#include "iss_ipipeif.h" - -static const unsigned int ipipeif_fmts[] = { - MEDIA_BUS_FMT_SGRBG10_1X10, - MEDIA_BUS_FMT_SRGGB10_1X10, - MEDIA_BUS_FMT_SBGGR10_1X10, - MEDIA_BUS_FMT_SGBRG10_1X10, - MEDIA_BUS_FMT_UYVY8_1X16, - MEDIA_BUS_FMT_YUYV8_1X16, -}; - -/* - * ipipeif_print_status - Print current IPIPEIF Module register values. - * @ipipeif: Pointer to ISS ISP IPIPEIF device. - * - * Also prints other debug information stored in the IPIPEIF module. - */ -#define IPIPEIF_PRINT_REGISTER(iss, name)\ - dev_dbg(iss->dev, "###IPIPEIF " #name "=0x%08x\n", \ - iss_reg_read(iss, OMAP4_ISS_MEM_ISP_IPIPEIF, IPIPEIF_##name)) - -#define ISIF_PRINT_REGISTER(iss, name)\ - dev_dbg(iss->dev, "###ISIF " #name "=0x%08x\n", \ - iss_reg_read(iss, OMAP4_ISS_MEM_ISP_ISIF, ISIF_##name)) - -#define ISP5_PRINT_REGISTER(iss, name)\ - dev_dbg(iss->dev, "###ISP5 " #name "=0x%08x\n", \ - iss_reg_read(iss, OMAP4_ISS_MEM_ISP_SYS1, ISP5_##name)) - -static void ipipeif_print_status(struct iss_ipipeif_device *ipipeif) -{ - struct iss_device *iss = to_iss_device(ipipeif); - - dev_dbg(iss->dev, "-------------IPIPEIF Register dump-------------\n"); - - IPIPEIF_PRINT_REGISTER(iss, CFG1); - IPIPEIF_PRINT_REGISTER(iss, CFG2); - - ISIF_PRINT_REGISTER(iss, SYNCEN); - ISIF_PRINT_REGISTER(iss, CADU); - ISIF_PRINT_REGISTER(iss, CADL); - ISIF_PRINT_REGISTER(iss, MODESET); - ISIF_PRINT_REGISTER(iss, CCOLP); - ISIF_PRINT_REGISTER(iss, SPH); - ISIF_PRINT_REGISTER(iss, LNH); - ISIF_PRINT_REGISTER(iss, LNV); - ISIF_PRINT_REGISTER(iss, VDINT(0)); - ISIF_PRINT_REGISTER(iss, HSIZE); - - ISP5_PRINT_REGISTER(iss, SYSCONFIG); - ISP5_PRINT_REGISTER(iss, CTRL); - ISP5_PRINT_REGISTER(iss, IRQSTATUS(0)); - ISP5_PRINT_REGISTER(iss, IRQENABLE_SET(0)); - ISP5_PRINT_REGISTER(iss, IRQENABLE_CLR(0)); - - dev_dbg(iss->dev, "-----------------------------------------------\n"); -} - -static void ipipeif_write_enable(struct iss_ipipeif_device *ipipeif, u8 enable) -{ - struct iss_device *iss = to_iss_device(ipipeif); - - iss_reg_update(iss, OMAP4_ISS_MEM_ISP_ISIF, ISIF_SYNCEN, - ISIF_SYNCEN_DWEN, enable ? ISIF_SYNCEN_DWEN : 0); -} - -/* - * ipipeif_enable - Enable/Disable IPIPEIF. - * @enable: enable flag - * - */ -static void ipipeif_enable(struct iss_ipipeif_device *ipipeif, u8 enable) -{ - struct iss_device *iss = to_iss_device(ipipeif); - - iss_reg_update(iss, OMAP4_ISS_MEM_ISP_ISIF, ISIF_SYNCEN, - ISIF_SYNCEN_SYEN, enable ? ISIF_SYNCEN_SYEN : 0); -} - -/* ----------------------------------------------------------------------------- - * Format- and pipeline-related configuration helpers - */ - -/* - * ipipeif_set_outaddr - Set memory address to save output image - * @ipipeif: Pointer to ISP IPIPEIF device. - * @addr: 32-bit memory address aligned on 32 byte boundary. - * - * Sets the memory address where the output will be saved. - */ -static void ipipeif_set_outaddr(struct iss_ipipeif_device *ipipeif, u32 addr) -{ - struct iss_device *iss = to_iss_device(ipipeif); - - /* Save address split in Base Address H & L */ - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_ISIF, ISIF_CADU, - (addr >> (16 + 5)) & ISIF_CADU_MASK); - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_ISIF, ISIF_CADL, - (addr >> 5) & ISIF_CADL_MASK); -} - -static void ipipeif_configure(struct iss_ipipeif_device *ipipeif) -{ - struct iss_device *iss = to_iss_device(ipipeif); - const struct iss_format_info *info; - struct v4l2_mbus_framefmt *format; - u32 isif_ccolp = 0; - - omap4iss_configure_bridge(iss, ipipeif->input); - - /* IPIPEIF_PAD_SINK */ - format = &ipipeif->formats[IPIPEIF_PAD_SINK]; - - /* IPIPEIF with YUV422 input from ISIF */ - iss_reg_clr(iss, OMAP4_ISS_MEM_ISP_IPIPEIF, IPIPEIF_CFG1, - IPIPEIF_CFG1_INPSRC1_MASK | IPIPEIF_CFG1_INPSRC2_MASK); - - /* Select ISIF/IPIPEIF input format */ - switch (format->code) { - case MEDIA_BUS_FMT_UYVY8_1X16: - case MEDIA_BUS_FMT_YUYV8_1X16: - iss_reg_update(iss, OMAP4_ISS_MEM_ISP_ISIF, ISIF_MODESET, - ISIF_MODESET_CCDMD | ISIF_MODESET_INPMOD_MASK | - ISIF_MODESET_CCDW_MASK, - ISIF_MODESET_INPMOD_YCBCR16); - - iss_reg_update(iss, OMAP4_ISS_MEM_ISP_IPIPEIF, IPIPEIF_CFG2, - IPIPEIF_CFG2_YUV8, IPIPEIF_CFG2_YUV16); - - break; - case MEDIA_BUS_FMT_SGRBG10_1X10: - isif_ccolp = ISIF_CCOLP_CP0_F0_GR | - ISIF_CCOLP_CP1_F0_R | - ISIF_CCOLP_CP2_F0_B | - ISIF_CCOLP_CP3_F0_GB; - goto cont_raw; - case MEDIA_BUS_FMT_SRGGB10_1X10: - isif_ccolp = ISIF_CCOLP_CP0_F0_R | - ISIF_CCOLP_CP1_F0_GR | - ISIF_CCOLP_CP2_F0_GB | - ISIF_CCOLP_CP3_F0_B; - goto cont_raw; - case MEDIA_BUS_FMT_SBGGR10_1X10: - isif_ccolp = ISIF_CCOLP_CP0_F0_B | - ISIF_CCOLP_CP1_F0_GB | - ISIF_CCOLP_CP2_F0_GR | - ISIF_CCOLP_CP3_F0_R; - goto cont_raw; - case MEDIA_BUS_FMT_SGBRG10_1X10: - isif_ccolp = ISIF_CCOLP_CP0_F0_GB | - ISIF_CCOLP_CP1_F0_B | - ISIF_CCOLP_CP2_F0_R | - ISIF_CCOLP_CP3_F0_GR; -cont_raw: - iss_reg_clr(iss, OMAP4_ISS_MEM_ISP_IPIPEIF, IPIPEIF_CFG2, - IPIPEIF_CFG2_YUV16); - - iss_reg_update(iss, OMAP4_ISS_MEM_ISP_ISIF, ISIF_MODESET, - ISIF_MODESET_CCDMD | ISIF_MODESET_INPMOD_MASK | - ISIF_MODESET_CCDW_MASK, ISIF_MODESET_INPMOD_RAW | - ISIF_MODESET_CCDW_2BIT); - - info = omap4iss_video_format_info(format->code); - iss_reg_update(iss, OMAP4_ISS_MEM_ISP_ISIF, ISIF_CGAMMAWD, - ISIF_CGAMMAWD_GWDI_MASK, - ISIF_CGAMMAWD_GWDI(info->bpp)); - - /* Set RAW Bayer pattern */ - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_ISIF, ISIF_CCOLP, - isif_ccolp); - break; - } - - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_ISIF, ISIF_SPH, 0 & ISIF_SPH_MASK); - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_ISIF, ISIF_LNH, - (format->width - 1) & ISIF_LNH_MASK); - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_ISIF, ISIF_LNV, - (format->height - 1) & ISIF_LNV_MASK); - - /* Generate ISIF0 on the last line of the image */ - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_ISIF, ISIF_VDINT(0), - format->height - 1); - - /* IPIPEIF_PAD_SOURCE_ISIF_SF */ - format = &ipipeif->formats[IPIPEIF_PAD_SOURCE_ISIF_SF]; - - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_ISIF, ISIF_HSIZE, - (ipipeif->video_out.bpl_value >> 5) & - ISIF_HSIZE_HSIZE_MASK); - - /* IPIPEIF_PAD_SOURCE_VP */ - /* Do nothing? */ -} - -/* ----------------------------------------------------------------------------- - * Interrupt handling - */ - -static void ipipeif_isr_buffer(struct iss_ipipeif_device *ipipeif) -{ - struct iss_buffer *buffer; - - /* The ISIF generates VD0 interrupts even when writes are disabled. - * deal with it anyway). Disabling the ISIF when no buffer is available - * is thus not be enough, we need to handle the situation explicitly. - */ - if (list_empty(&ipipeif->video_out.dmaqueue)) - return; - - ipipeif_write_enable(ipipeif, 0); - - buffer = omap4iss_video_buffer_next(&ipipeif->video_out); - if (!buffer) - return; - - ipipeif_set_outaddr(ipipeif, buffer->iss_addr); - - ipipeif_write_enable(ipipeif, 1); -} - -/* - * omap4iss_ipipeif_isr - Configure ipipeif during interframe time. - * @ipipeif: Pointer to ISP IPIPEIF device. - * @events: IPIPEIF events - */ -void omap4iss_ipipeif_isr(struct iss_ipipeif_device *ipipeif, u32 events) -{ - if (omap4iss_module_sync_is_stopping(&ipipeif->wait, - &ipipeif->stopping)) - return; - - if ((events & ISP5_IRQ_ISIF_INT(0)) && - (ipipeif->output & IPIPEIF_OUTPUT_MEMORY)) - ipipeif_isr_buffer(ipipeif); -} - -/* ----------------------------------------------------------------------------- - * ISP video operations - */ - -static int ipipeif_video_queue(struct iss_video *video, - struct iss_buffer *buffer) -{ - struct iss_ipipeif_device *ipipeif = container_of(video, - struct iss_ipipeif_device, video_out); - - if (!(ipipeif->output & IPIPEIF_OUTPUT_MEMORY)) - return -ENODEV; - - ipipeif_set_outaddr(ipipeif, buffer->iss_addr); - - /* - * If streaming was enabled before there was a buffer queued - * or underrun happened in the ISR, the hardware was not enabled - * and DMA queue flag ISS_VIDEO_DMAQUEUE_UNDERRUN is still set. - * Enable it now. - */ - if (video->dmaqueue_flags & ISS_VIDEO_DMAQUEUE_UNDERRUN) { - if (ipipeif->output & IPIPEIF_OUTPUT_MEMORY) - ipipeif_write_enable(ipipeif, 1); - ipipeif_enable(ipipeif, 1); - iss_video_dmaqueue_flags_clr(video); - } - - return 0; -} - -static const struct iss_video_operations ipipeif_video_ops = { - .queue = ipipeif_video_queue, -}; - -/* ----------------------------------------------------------------------------- - * V4L2 subdev operations - */ - -#define IPIPEIF_DRV_SUBCLK_MASK (OMAP4_ISS_ISP_SUBCLK_IPIPEIF |\ - OMAP4_ISS_ISP_SUBCLK_ISIF) -/* - * ipipeif_set_stream - Enable/Disable streaming on the IPIPEIF module - * @sd: ISP IPIPEIF V4L2 subdevice - * @enable: Enable/disable stream - */ -static int ipipeif_set_stream(struct v4l2_subdev *sd, int enable) -{ - struct iss_ipipeif_device *ipipeif = v4l2_get_subdevdata(sd); - struct iss_device *iss = to_iss_device(ipipeif); - struct iss_video *video_out = &ipipeif->video_out; - int ret = 0; - - if (ipipeif->state == ISS_PIPELINE_STREAM_STOPPED) { - if (enable == ISS_PIPELINE_STREAM_STOPPED) - return 0; - - omap4iss_isp_subclk_enable(iss, IPIPEIF_DRV_SUBCLK_MASK); - } - - switch (enable) { - case ISS_PIPELINE_STREAM_CONTINUOUS: - - ipipeif_configure(ipipeif); - ipipeif_print_status(ipipeif); - - /* - * When outputting to memory with no buffer available, let the - * buffer queue handler start the hardware. A DMA queue flag - * ISS_VIDEO_DMAQUEUE_QUEUED will be set as soon as there is - * a buffer available. - */ - if (ipipeif->output & IPIPEIF_OUTPUT_MEMORY && - !(video_out->dmaqueue_flags & ISS_VIDEO_DMAQUEUE_QUEUED)) - break; - - atomic_set(&ipipeif->stopping, 0); - if (ipipeif->output & IPIPEIF_OUTPUT_MEMORY) - ipipeif_write_enable(ipipeif, 1); - ipipeif_enable(ipipeif, 1); - iss_video_dmaqueue_flags_clr(video_out); - break; - - case ISS_PIPELINE_STREAM_STOPPED: - if (ipipeif->state == ISS_PIPELINE_STREAM_STOPPED) - return 0; - if (omap4iss_module_sync_idle(&sd->entity, &ipipeif->wait, - &ipipeif->stopping)) - ret = -ETIMEDOUT; - - if (ipipeif->output & IPIPEIF_OUTPUT_MEMORY) - ipipeif_write_enable(ipipeif, 0); - ipipeif_enable(ipipeif, 0); - omap4iss_isp_subclk_disable(iss, IPIPEIF_DRV_SUBCLK_MASK); - iss_video_dmaqueue_flags_clr(video_out); - break; - } - - ipipeif->state = enable; - return ret; -} - -static struct v4l2_mbus_framefmt * -__ipipeif_get_format(struct iss_ipipeif_device *ipipeif, - struct v4l2_subdev_state *sd_state, unsigned int pad, - enum v4l2_subdev_format_whence which) -{ - if (which == V4L2_SUBDEV_FORMAT_TRY) - return v4l2_subdev_state_get_format(sd_state, pad); - return &ipipeif->formats[pad]; -} - -/* - * ipipeif_try_format - Try video format on a pad - * @ipipeif: ISS IPIPEIF device - * @sd_state: V4L2 subdev state - * @pad: Pad number - * @fmt: Format - */ -static void -ipipeif_try_format(struct iss_ipipeif_device *ipipeif, - struct v4l2_subdev_state *sd_state, unsigned int pad, - struct v4l2_mbus_framefmt *fmt, - enum v4l2_subdev_format_whence which) -{ - struct v4l2_mbus_framefmt *format; - unsigned int width = fmt->width; - unsigned int height = fmt->height; - unsigned int i; - - switch (pad) { - case IPIPEIF_PAD_SINK: - /* TODO: If the IPIPEIF output formatter pad is connected - * directly to the resizer, only YUV formats can be used. - */ - for (i = 0; i < ARRAY_SIZE(ipipeif_fmts); i++) { - if (fmt->code == ipipeif_fmts[i]) - break; - } - - /* If not found, use SGRBG10 as default */ - if (i >= ARRAY_SIZE(ipipeif_fmts)) - fmt->code = MEDIA_BUS_FMT_SGRBG10_1X10; - - /* Clamp the input size. */ - fmt->width = clamp_t(u32, width, 1, 8192); - fmt->height = clamp_t(u32, height, 1, 8192); - break; - - case IPIPEIF_PAD_SOURCE_ISIF_SF: - format = __ipipeif_get_format(ipipeif, sd_state, - IPIPEIF_PAD_SINK, - which); - memcpy(fmt, format, sizeof(*fmt)); - - /* The data formatter truncates the number of horizontal output - * pixels to a multiple of 16. To avoid clipping data, allow - * callers to request an output size bigger than the input size - * up to the nearest multiple of 16. - */ - fmt->width = clamp_t(u32, width, 32, (fmt->width + 15) & ~15); - fmt->width &= ~15; - fmt->height = clamp_t(u32, height, 32, fmt->height); - break; - - case IPIPEIF_PAD_SOURCE_VP: - format = __ipipeif_get_format(ipipeif, sd_state, - IPIPEIF_PAD_SINK, - which); - memcpy(fmt, format, sizeof(*fmt)); - - fmt->width = clamp_t(u32, width, 32, fmt->width); - fmt->height = clamp_t(u32, height, 32, fmt->height); - break; - } - - /* Data is written to memory unpacked, each 10-bit or 12-bit pixel is - * stored on 2 bytes. - */ - fmt->colorspace = V4L2_COLORSPACE_SRGB; - fmt->field = V4L2_FIELD_NONE; -} - -/* - * ipipeif_enum_mbus_code - Handle pixel format enumeration - * @sd : pointer to v4l2 subdev structure - * @sd_state: V4L2 subdev state - * @code : pointer to v4l2_subdev_mbus_code_enum structure - * return -EINVAL or zero on success - */ -static int ipipeif_enum_mbus_code(struct v4l2_subdev *sd, - struct v4l2_subdev_state *sd_state, - struct v4l2_subdev_mbus_code_enum *code) -{ - struct iss_ipipeif_device *ipipeif = v4l2_get_subdevdata(sd); - struct v4l2_mbus_framefmt *format; - - switch (code->pad) { - case IPIPEIF_PAD_SINK: - if (code->index >= ARRAY_SIZE(ipipeif_fmts)) - return -EINVAL; - - code->code = ipipeif_fmts[code->index]; - break; - - case IPIPEIF_PAD_SOURCE_ISIF_SF: - case IPIPEIF_PAD_SOURCE_VP: - /* No format conversion inside IPIPEIF */ - if (code->index != 0) - return -EINVAL; - - format = __ipipeif_get_format(ipipeif, sd_state, - IPIPEIF_PAD_SINK, - code->which); - - code->code = format->code; - break; - - default: - return -EINVAL; - } - - return 0; -} - -static int ipipeif_enum_frame_size(struct v4l2_subdev *sd, - struct v4l2_subdev_state *sd_state, - struct v4l2_subdev_frame_size_enum *fse) -{ - struct iss_ipipeif_device *ipipeif = v4l2_get_subdevdata(sd); - struct v4l2_mbus_framefmt format; - - if (fse->index != 0) - return -EINVAL; - - format.code = fse->code; - format.width = 1; - format.height = 1; - ipipeif_try_format(ipipeif, sd_state, fse->pad, &format, fse->which); - fse->min_width = format.width; - fse->min_height = format.height; - - if (format.code != fse->code) - return -EINVAL; - - format.code = fse->code; - format.width = -1; - format.height = -1; - ipipeif_try_format(ipipeif, sd_state, fse->pad, &format, fse->which); - fse->max_width = format.width; - fse->max_height = format.height; - - return 0; -} - -/* - * ipipeif_get_format - Retrieve the video format on a pad - * @sd : ISP IPIPEIF V4L2 subdevice - * @sd_state: V4L2 subdev state - * @fmt: Format - * - * Return 0 on success or -EINVAL if the pad is invalid or doesn't correspond - * to the format type. - */ -static int ipipeif_get_format(struct v4l2_subdev *sd, - struct v4l2_subdev_state *sd_state, - struct v4l2_subdev_format *fmt) -{ - struct iss_ipipeif_device *ipipeif = v4l2_get_subdevdata(sd); - struct v4l2_mbus_framefmt *format; - - format = __ipipeif_get_format(ipipeif, sd_state, fmt->pad, fmt->which); - if (!format) - return -EINVAL; - - fmt->format = *format; - return 0; -} - -/* - * ipipeif_set_format - Set the video format on a pad - * @sd : ISP IPIPEIF V4L2 subdevice - * @sd_state: V4L2 subdev state - * @fmt: Format - * - * Return 0 on success or -EINVAL if the pad is invalid or doesn't correspond - * to the format type. - */ -static int ipipeif_set_format(struct v4l2_subdev *sd, - struct v4l2_subdev_state *sd_state, - struct v4l2_subdev_format *fmt) -{ - struct iss_ipipeif_device *ipipeif = v4l2_get_subdevdata(sd); - struct v4l2_mbus_framefmt *format; - - format = __ipipeif_get_format(ipipeif, sd_state, fmt->pad, fmt->which); - if (!format) - return -EINVAL; - - ipipeif_try_format(ipipeif, sd_state, fmt->pad, &fmt->format, - fmt->which); - *format = fmt->format; - - /* Propagate the format from sink to source */ - if (fmt->pad == IPIPEIF_PAD_SINK) { - format = __ipipeif_get_format(ipipeif, sd_state, - IPIPEIF_PAD_SOURCE_ISIF_SF, - fmt->which); - *format = fmt->format; - ipipeif_try_format(ipipeif, sd_state, - IPIPEIF_PAD_SOURCE_ISIF_SF, - format, fmt->which); - - format = __ipipeif_get_format(ipipeif, sd_state, - IPIPEIF_PAD_SOURCE_VP, - fmt->which); - *format = fmt->format; - ipipeif_try_format(ipipeif, sd_state, IPIPEIF_PAD_SOURCE_VP, - format, - fmt->which); - } - - return 0; -} - -static int ipipeif_link_validate(struct v4l2_subdev *sd, - struct media_link *link, - struct v4l2_subdev_format *source_fmt, - struct v4l2_subdev_format *sink_fmt) -{ - /* Check if the two ends match */ - if (source_fmt->format.width != sink_fmt->format.width || - source_fmt->format.height != sink_fmt->format.height) - return -EPIPE; - - if (source_fmt->format.code != sink_fmt->format.code) - return -EPIPE; - - return 0; -} - -/* - * ipipeif_init_formats - Initialize formats on all pads - * @sd: ISP IPIPEIF V4L2 subdevice - * @fh: V4L2 subdev file handle - * - * Initialize all pad formats with default values. If fh is not NULL, try - * formats are initialized on the file handle. Otherwise active formats are - * initialized on the device. - */ -static int ipipeif_init_formats(struct v4l2_subdev *sd, - struct v4l2_subdev_fh *fh) -{ - struct v4l2_subdev_format format; - - memset(&format, 0, sizeof(format)); - format.pad = IPIPEIF_PAD_SINK; - format.which = fh ? V4L2_SUBDEV_FORMAT_TRY : V4L2_SUBDEV_FORMAT_ACTIVE; - format.format.code = MEDIA_BUS_FMT_SGRBG10_1X10; - format.format.width = 4096; - format.format.height = 4096; - ipipeif_set_format(sd, fh ? fh->state : NULL, &format); - - return 0; -} - -/* V4L2 subdev video operations */ -static const struct v4l2_subdev_video_ops ipipeif_v4l2_video_ops = { - .s_stream = ipipeif_set_stream, -}; - -/* V4L2 subdev pad operations */ -static const struct v4l2_subdev_pad_ops ipipeif_v4l2_pad_ops = { - .enum_mbus_code = ipipeif_enum_mbus_code, - .enum_frame_size = ipipeif_enum_frame_size, - .get_fmt = ipipeif_get_format, - .set_fmt = ipipeif_set_format, - .link_validate = ipipeif_link_validate, -}; - -/* V4L2 subdev operations */ -static const struct v4l2_subdev_ops ipipeif_v4l2_ops = { - .video = &ipipeif_v4l2_video_ops, - .pad = &ipipeif_v4l2_pad_ops, -}; - -/* V4L2 subdev internal operations */ -static const struct v4l2_subdev_internal_ops ipipeif_v4l2_internal_ops = { - .open = ipipeif_init_formats, -}; - -/* ----------------------------------------------------------------------------- - * Media entity operations - */ - -/* - * ipipeif_link_setup - Setup IPIPEIF connections - * @entity: IPIPEIF media entity - * @local: Pad at the local end of the link - * @remote: Pad at the remote end of the link - * @flags: Link flags - * - * return -EINVAL or zero on success - */ -static int ipipeif_link_setup(struct media_entity *entity, - const struct media_pad *local, - const struct media_pad *remote, u32 flags) -{ - struct v4l2_subdev *sd = media_entity_to_v4l2_subdev(entity); - struct iss_ipipeif_device *ipipeif = v4l2_get_subdevdata(sd); - struct iss_device *iss = to_iss_device(ipipeif); - unsigned int index = local->index; - - /* FIXME: this is actually a hack! */ - if (is_media_entity_v4l2_subdev(remote->entity)) - index |= 2 << 16; - - switch (index) { - case IPIPEIF_PAD_SINK | 2 << 16: - /* Read from the sensor CSI2a or CSI2b. */ - if (!(flags & MEDIA_LNK_FL_ENABLED)) { - ipipeif->input = IPIPEIF_INPUT_NONE; - break; - } - - if (ipipeif->input != IPIPEIF_INPUT_NONE) - return -EBUSY; - - if (remote->entity == &iss->csi2a.subdev.entity) - ipipeif->input = IPIPEIF_INPUT_CSI2A; - else if (remote->entity == &iss->csi2b.subdev.entity) - ipipeif->input = IPIPEIF_INPUT_CSI2B; - - break; - - case IPIPEIF_PAD_SOURCE_ISIF_SF: - /* Write to memory */ - if (flags & MEDIA_LNK_FL_ENABLED) { - if (ipipeif->output & ~IPIPEIF_OUTPUT_MEMORY) - return -EBUSY; - ipipeif->output |= IPIPEIF_OUTPUT_MEMORY; - } else { - ipipeif->output &= ~IPIPEIF_OUTPUT_MEMORY; - } - break; - - case IPIPEIF_PAD_SOURCE_VP | 2 << 16: - /* Send to IPIPE/RESIZER */ - if (flags & MEDIA_LNK_FL_ENABLED) { - if (ipipeif->output & ~IPIPEIF_OUTPUT_VP) - return -EBUSY; - ipipeif->output |= IPIPEIF_OUTPUT_VP; - } else { - ipipeif->output &= ~IPIPEIF_OUTPUT_VP; - } - break; - - default: - return -EINVAL; - } - - return 0; -} - -/* media operations */ -static const struct media_entity_operations ipipeif_media_ops = { - .link_setup = ipipeif_link_setup, - .link_validate = v4l2_subdev_link_validate, -}; - -/* - * ipipeif_init_entities - Initialize V4L2 subdev and media entity - * @ipipeif: ISS ISP IPIPEIF module - * - * Return 0 on success and a negative error code on failure. - */ -static int ipipeif_init_entities(struct iss_ipipeif_device *ipipeif) -{ - struct v4l2_subdev *sd = &ipipeif->subdev; - struct media_pad *pads = ipipeif->pads; - struct media_entity *me = &sd->entity; - int ret; - - ipipeif->input = IPIPEIF_INPUT_NONE; - - v4l2_subdev_init(sd, &ipipeif_v4l2_ops); - sd->internal_ops = &ipipeif_v4l2_internal_ops; - strscpy(sd->name, "OMAP4 ISS ISP IPIPEIF", sizeof(sd->name)); - sd->grp_id = BIT(16); /* group ID for iss subdevs */ - v4l2_set_subdevdata(sd, ipipeif); - sd->flags |= V4L2_SUBDEV_FL_HAS_DEVNODE; - - pads[IPIPEIF_PAD_SINK].flags = MEDIA_PAD_FL_SINK; - pads[IPIPEIF_PAD_SOURCE_ISIF_SF].flags = MEDIA_PAD_FL_SOURCE; - pads[IPIPEIF_PAD_SOURCE_VP].flags = MEDIA_PAD_FL_SOURCE; - - me->ops = &ipipeif_media_ops; - ret = media_entity_pads_init(me, IPIPEIF_PADS_NUM, pads); - if (ret < 0) - return ret; - - ipipeif_init_formats(sd, NULL); - - ipipeif->video_out.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; - ipipeif->video_out.ops = &ipipeif_video_ops; - ipipeif->video_out.iss = to_iss_device(ipipeif); - ipipeif->video_out.capture_mem = PAGE_ALIGN(4096 * 4096) * 3; - ipipeif->video_out.bpl_alignment = 32; - ipipeif->video_out.bpl_zero_padding = 1; - ipipeif->video_out.bpl_max = 0x1ffe0; - - return omap4iss_video_init(&ipipeif->video_out, "ISP IPIPEIF"); -} - -void omap4iss_ipipeif_unregister_entities(struct iss_ipipeif_device *ipipeif) -{ - v4l2_device_unregister_subdev(&ipipeif->subdev); - omap4iss_video_unregister(&ipipeif->video_out); -} - -int omap4iss_ipipeif_register_entities(struct iss_ipipeif_device *ipipeif, - struct v4l2_device *vdev) -{ - int ret; - - /* Register the subdev and video node. */ - ret = v4l2_device_register_subdev(vdev, &ipipeif->subdev); - if (ret < 0) - goto error; - - ret = omap4iss_video_register(&ipipeif->video_out, vdev); - if (ret < 0) - goto error; - - return 0; - -error: - omap4iss_ipipeif_unregister_entities(ipipeif); - return ret; -} - -/* ----------------------------------------------------------------------------- - * ISP IPIPEIF initialisation and cleanup - */ - -/* - * omap4iss_ipipeif_init - IPIPEIF module initialization. - * @iss: Device pointer specific to the OMAP4 ISS. - * - * TODO: Get the initialisation values from platform data. - * - * Return 0 on success or a negative error code otherwise. - */ -int omap4iss_ipipeif_init(struct iss_device *iss) -{ - struct iss_ipipeif_device *ipipeif = &iss->ipipeif; - - ipipeif->state = ISS_PIPELINE_STREAM_STOPPED; - init_waitqueue_head(&ipipeif->wait); - - return ipipeif_init_entities(ipipeif); -} - -/* - * omap4iss_ipipeif_create_links() - IPIPEIF pads links creation - * @iss: Pointer to ISS device - * - * return negative error code or zero on success - */ -int omap4iss_ipipeif_create_links(struct iss_device *iss) -{ - struct iss_ipipeif_device *ipipeif = &iss->ipipeif; - - /* Connect the IPIPEIF subdev to the video node. */ - return media_create_pad_link(&ipipeif->subdev.entity, - IPIPEIF_PAD_SOURCE_ISIF_SF, - &ipipeif->video_out.video.entity, 0, 0); -} - -/* - * omap4iss_ipipeif_cleanup - IPIPEIF module cleanup. - * @iss: Device pointer specific to the OMAP4 ISS. - */ -void omap4iss_ipipeif_cleanup(struct iss_device *iss) -{ - struct iss_ipipeif_device *ipipeif = &iss->ipipeif; - - media_entity_cleanup(&ipipeif->subdev.entity); -} diff --git a/drivers/staging/media/omap4iss/iss_ipipeif.h b/drivers/staging/media/omap4iss/iss_ipipeif.h deleted file mode 100644 index 69792333a62e..000000000000 --- a/drivers/staging/media/omap4iss/iss_ipipeif.h +++ /dev/null @@ -1,89 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0+ */ -/* - * TI OMAP4 ISS V4L2 Driver - ISP IPIPEIF module - * - * Copyright (C) 2012 Texas Instruments, Inc. - * - * Author: Sergio Aguirre - */ - -#ifndef OMAP4_ISS_IPIPEIF_H -#define OMAP4_ISS_IPIPEIF_H - -#include "iss_video.h" - -enum ipipeif_input_entity { - IPIPEIF_INPUT_NONE, - IPIPEIF_INPUT_CSI2A, - IPIPEIF_INPUT_CSI2B -}; - -#define IPIPEIF_OUTPUT_MEMORY BIT(0) -#define IPIPEIF_OUTPUT_VP BIT(1) - -/* Sink and source IPIPEIF pads */ -#define IPIPEIF_PAD_SINK 0 -#define IPIPEIF_PAD_SOURCE_ISIF_SF 1 -#define IPIPEIF_PAD_SOURCE_VP 2 -#define IPIPEIF_PADS_NUM 3 - -/* - * struct iss_ipipeif_device - Structure for the IPIPEIF module to store its own - * information - * @subdev: V4L2 subdevice - * @pads: Sink and source media entity pads - * @formats: Active video formats - * @input: Active input - * @output: Active outputs - * @video_out: Output video node - * @error: A hardware error occurred during capture - * @alaw: A-law compression enabled (1) or disabled (0) - * @lpf: Low pass filter enabled (1) or disabled (0) - * @obclamp: Optical-black clamp enabled (1) or disabled (0) - * @fpc_en: Faulty pixels correction enabled (1) or disabled (0) - * @blcomp: Black level compensation configuration - * @clamp: Optical-black or digital clamp configuration - * @fpc: Faulty pixels correction configuration - * @lsc: Lens shading compensation configuration - * @update: Bitmask of controls to update during the next interrupt - * @shadow_update: Controls update in progress by userspace - * @syncif: Interface synchronization configuration - * @vpcfg: Video port configuration - * @underrun: A buffer underrun occurred and a new buffer has been queued - * @state: Streaming state - * @lock: Serializes shadow_update with interrupt handler - * @wait: Wait queue used to stop the module - * @stopping: Stopping state - * @ioctl_lock: Serializes ioctl calls and LSC requests freeing - */ -struct iss_ipipeif_device { - struct v4l2_subdev subdev; - struct media_pad pads[IPIPEIF_PADS_NUM]; - struct v4l2_mbus_framefmt formats[IPIPEIF_PADS_NUM]; - - enum ipipeif_input_entity input; - unsigned int output; - struct iss_video video_out; - unsigned int error; - - enum iss_pipeline_stream_state state; - wait_queue_head_t wait; - atomic_t stopping; -}; - -struct iss_device; - -int omap4iss_ipipeif_init(struct iss_device *iss); -int omap4iss_ipipeif_create_links(struct iss_device *iss); -void omap4iss_ipipeif_cleanup(struct iss_device *iss); -int omap4iss_ipipeif_register_entities(struct iss_ipipeif_device *ipipeif, - struct v4l2_device *vdev); -void omap4iss_ipipeif_unregister_entities(struct iss_ipipeif_device *ipipeif); - -int omap4iss_ipipeif_busy(struct iss_ipipeif_device *ipipeif); -void omap4iss_ipipeif_isr(struct iss_ipipeif_device *ipipeif, u32 events); -void omap4iss_ipipeif_restore_context(struct iss_device *iss); -void omap4iss_ipipeif_max_rate(struct iss_ipipeif_device *ipipeif, - unsigned int *max_rate); - -#endif /* OMAP4_ISS_IPIPEIF_H */ diff --git a/drivers/staging/media/omap4iss/iss_regs.h b/drivers/staging/media/omap4iss/iss_regs.h deleted file mode 100644 index cfe0bb075072..000000000000 --- a/drivers/staging/media/omap4iss/iss_regs.h +++ /dev/null @@ -1,899 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0+ */ -/* - * TI OMAP4 ISS V4L2 Driver - Register defines - * - * Copyright (C) 2012 Texas Instruments. - * - * Author: Sergio Aguirre - */ - -#ifndef _OMAP4_ISS_REGS_H_ -#define _OMAP4_ISS_REGS_H_ - -/* ISS */ -#define ISS_HL_REVISION 0x0 - -#define ISS_HL_SYSCONFIG 0x10 -#define ISS_HL_SYSCONFIG_IDLEMODE_SHIFT 2 -#define ISS_HL_SYSCONFIG_IDLEMODE_FORCEIDLE 0x0 -#define ISS_HL_SYSCONFIG_IDLEMODE_NOIDLE 0x1 -#define ISS_HL_SYSCONFIG_IDLEMODE_SMARTIDLE 0x2 -#define ISS_HL_SYSCONFIG_SOFTRESET BIT(0) - -#define ISS_HL_IRQSTATUS_RAW(i) (0x20 + (0x10 * (i))) -#define ISS_HL_IRQSTATUS(i) (0x24 + (0x10 * (i))) -#define ISS_HL_IRQENABLE_SET(i) (0x28 + (0x10 * (i))) -#define ISS_HL_IRQENABLE_CLR(i) (0x2c + (0x10 * (i))) - -#define ISS_HL_IRQ_HS_VS BIT(17) -#define ISS_HL_IRQ_SIMCOP(i) BIT(12 + (i)) -#define ISS_HL_IRQ_BTE BIT(11) -#define ISS_HL_IRQ_CBUFF BIT(10) -#define ISS_HL_IRQ_CCP2(i) BIT((i) > 3 ? 16 : 14 + (i)) -#define ISS_HL_IRQ_CSIB BIT(5) -#define ISS_HL_IRQ_CSIA BIT(4) -#define ISS_HL_IRQ_ISP(i) BIT(i) - -#define ISS_CTRL 0x80 -#define ISS_CTRL_CLK_DIV_MASK (3 << 4) -#define ISS_CTRL_INPUT_SEL_MASK (3 << 2) -#define ISS_CTRL_INPUT_SEL_CSI2A (0 << 2) -#define ISS_CTRL_INPUT_SEL_CSI2B (1 << 2) -#define ISS_CTRL_SYNC_DETECT_VS_RAISING (3 << 0) - -#define ISS_CLKCTRL 0x84 -#define ISS_CLKCTRL_VPORT2_CLK BIT(30) -#define ISS_CLKCTRL_VPORT1_CLK BIT(29) -#define ISS_CLKCTRL_VPORT0_CLK BIT(28) -#define ISS_CLKCTRL_CCP2 BIT(4) -#define ISS_CLKCTRL_CSI2_B BIT(3) -#define ISS_CLKCTRL_CSI2_A BIT(2) -#define ISS_CLKCTRL_ISP BIT(1) -#define ISS_CLKCTRL_SIMCOP BIT(0) - -#define ISS_CLKSTAT 0x88 -#define ISS_CLKSTAT_VPORT2_CLK BIT(30) -#define ISS_CLKSTAT_VPORT1_CLK BIT(29) -#define ISS_CLKSTAT_VPORT0_CLK BIT(28) -#define ISS_CLKSTAT_CCP2 BIT(4) -#define ISS_CLKSTAT_CSI2_B BIT(3) -#define ISS_CLKSTAT_CSI2_A BIT(2) -#define ISS_CLKSTAT_ISP BIT(1) -#define ISS_CLKSTAT_SIMCOP BIT(0) - -#define ISS_PM_STATUS 0x8c -#define ISS_PM_STATUS_CBUFF_PM_MASK (3 << 12) -#define ISS_PM_STATUS_BTE_PM_MASK (3 << 10) -#define ISS_PM_STATUS_SIMCOP_PM_MASK (3 << 8) -#define ISS_PM_STATUS_ISP_PM_MASK (3 << 6) -#define ISS_PM_STATUS_CCP2_PM_MASK (3 << 4) -#define ISS_PM_STATUS_CSI2_B_PM_MASK (3 << 2) -#define ISS_PM_STATUS_CSI2_A_PM_MASK (3 << 0) - -#define REGISTER0 0x0 -#define REGISTER0_HSCLOCKCONFIG BIT(24) -#define REGISTER0_THS_TERM_MASK (0xff << 8) -#define REGISTER0_THS_TERM_SHIFT 8 -#define REGISTER0_THS_SETTLE_MASK (0xff << 0) -#define REGISTER0_THS_SETTLE_SHIFT 0 - -#define REGISTER1 0x4 -#define REGISTER1_RESET_DONE_CTRLCLK BIT(29) -#define REGISTER1_CLOCK_MISS_DETECTOR_STATUS BIT(25) -#define REGISTER1_TCLK_TERM_MASK (0x3f << 18) -#define REGISTER1_TCLK_TERM_SHIFT 18 -#define REGISTER1_DPHY_HS_SYNC_PATTERN_SHIFT 10 -#define REGISTER1_CTRLCLK_DIV_FACTOR_MASK (0x3 << 8) -#define REGISTER1_CTRLCLK_DIV_FACTOR_SHIFT 8 -#define REGISTER1_TCLK_SETTLE_MASK (0xff << 0) -#define REGISTER1_TCLK_SETTLE_SHIFT 0 - -#define REGISTER2 0x8 - -#define CSI2_SYSCONFIG 0x10 -#define CSI2_SYSCONFIG_MSTANDBY_MODE_MASK (3 << 12) -#define CSI2_SYSCONFIG_MSTANDBY_MODE_FORCE (0 << 12) -#define CSI2_SYSCONFIG_MSTANDBY_MODE_NO (1 << 12) -#define CSI2_SYSCONFIG_MSTANDBY_MODE_SMART (2 << 12) -#define CSI2_SYSCONFIG_SOFT_RESET (1 << 1) -#define CSI2_SYSCONFIG_AUTO_IDLE (1 << 0) - -#define CSI2_SYSSTATUS 0x14 -#define CSI2_SYSSTATUS_RESET_DONE BIT(0) - -#define CSI2_IRQSTATUS 0x18 -#define CSI2_IRQENABLE 0x1c - -/* Shared bits across CSI2_IRQENABLE and IRQSTATUS */ - -#define CSI2_IRQ_OCP_ERR BIT(14) -#define CSI2_IRQ_SHORT_PACKET BIT(13) -#define CSI2_IRQ_ECC_CORRECTION BIT(12) -#define CSI2_IRQ_ECC_NO_CORRECTION BIT(11) -#define CSI2_IRQ_COMPLEXIO_ERR BIT(9) -#define CSI2_IRQ_FIFO_OVF BIT(8) -#define CSI2_IRQ_CONTEXT0 BIT(0) - -#define CSI2_CTRL 0x40 -#define CSI2_CTRL_MFLAG_LEVH_MASK (7 << 20) -#define CSI2_CTRL_MFLAG_LEVH_SHIFT 20 -#define CSI2_CTRL_MFLAG_LEVL_MASK (7 << 17) -#define CSI2_CTRL_MFLAG_LEVL_SHIFT 17 -#define CSI2_CTRL_BURST_SIZE_EXPAND (1 << 16) -#define CSI2_CTRL_VP_CLK_EN (1 << 15) -#define CSI2_CTRL_NON_POSTED_WRITE (1 << 13) -#define CSI2_CTRL_VP_ONLY_EN (1 << 11) -#define CSI2_CTRL_VP_OUT_CTRL_MASK (3 << 8) -#define CSI2_CTRL_VP_OUT_CTRL_SHIFT 8 -#define CSI2_CTRL_DBG_EN (1 << 7) -#define CSI2_CTRL_BURST_SIZE_MASK (3 << 5) -#define CSI2_CTRL_ENDIANNESS (1 << 4) -#define CSI2_CTRL_FRAME (1 << 3) -#define CSI2_CTRL_ECC_EN (1 << 2) -#define CSI2_CTRL_IF_EN (1 << 0) - -#define CSI2_DBG_H 0x44 - -#define CSI2_COMPLEXIO_CFG 0x50 -#define CSI2_COMPLEXIO_CFG_RESET_CTRL (1 << 30) -#define CSI2_COMPLEXIO_CFG_RESET_DONE (1 << 29) -#define CSI2_COMPLEXIO_CFG_PWD_CMD_MASK (3 << 27) -#define CSI2_COMPLEXIO_CFG_PWD_CMD_OFF (0 << 27) -#define CSI2_COMPLEXIO_CFG_PWD_CMD_ON (1 << 27) -#define CSI2_COMPLEXIO_CFG_PWD_CMD_ULP (2 << 27) -#define CSI2_COMPLEXIO_CFG_PWD_STATUS_MASK (3 << 25) -#define CSI2_COMPLEXIO_CFG_PWD_STATUS_OFF (0 << 25) -#define CSI2_COMPLEXIO_CFG_PWD_STATUS_ON (1 << 25) -#define CSI2_COMPLEXIO_CFG_PWD_STATUS_ULP (2 << 25) -#define CSI2_COMPLEXIO_CFG_PWR_AUTO (1 << 24) -#define CSI2_COMPLEXIO_CFG_DATA_POL(i) (1 << (((i) * 4) + 3)) -#define CSI2_COMPLEXIO_CFG_DATA_POSITION_MASK(i) (7 << ((i) * 4)) -#define CSI2_COMPLEXIO_CFG_DATA_POSITION_SHIFT(i) ((i) * 4) -#define CSI2_COMPLEXIO_CFG_CLOCK_POL (1 << 3) -#define CSI2_COMPLEXIO_CFG_CLOCK_POSITION_MASK (7 << 0) -#define CSI2_COMPLEXIO_CFG_CLOCK_POSITION_SHIFT 0 - -#define CSI2_COMPLEXIO_IRQSTATUS 0x54 - -#define CSI2_SHORT_PACKET 0x5c - -#define CSI2_COMPLEXIO_IRQENABLE 0x60 - -/* Shared bits across CSI2_COMPLEXIO_IRQENABLE and IRQSTATUS */ -#define CSI2_COMPLEXIO_IRQ_STATEALLULPMEXIT BIT(26) -#define CSI2_COMPLEXIO_IRQ_STATEALLULPMENTER BIT(25) -#define CSI2_COMPLEXIO_IRQ_STATEULPM5 BIT(24) -#define CSI2_COMPLEXIO_IRQ_STATEULPM4 BIT(23) -#define CSI2_COMPLEXIO_IRQ_STATEULPM3 BIT(22) -#define CSI2_COMPLEXIO_IRQ_STATEULPM2 BIT(21) -#define CSI2_COMPLEXIO_IRQ_STATEULPM1 BIT(20) -#define CSI2_COMPLEXIO_IRQ_ERRCONTROL5 BIT(19) -#define CSI2_COMPLEXIO_IRQ_ERRCONTROL4 BIT(18) -#define CSI2_COMPLEXIO_IRQ_ERRCONTROL3 BIT(17) -#define CSI2_COMPLEXIO_IRQ_ERRCONTROL2 BIT(16) -#define CSI2_COMPLEXIO_IRQ_ERRCONTROL1 BIT(15) -#define CSI2_COMPLEXIO_IRQ_ERRESC5 BIT(14) -#define CSI2_COMPLEXIO_IRQ_ERRESC4 BIT(13) -#define CSI2_COMPLEXIO_IRQ_ERRESC3 BIT(12) -#define CSI2_COMPLEXIO_IRQ_ERRESC2 BIT(11) -#define CSI2_COMPLEXIO_IRQ_ERRESC1 BIT(10) -#define CSI2_COMPLEXIO_IRQ_ERRSOTSYNCHS5 BIT(9) -#define CSI2_COMPLEXIO_IRQ_ERRSOTSYNCHS4 BIT(8) -#define CSI2_COMPLEXIO_IRQ_ERRSOTSYNCHS3 BIT(7) -#define CSI2_COMPLEXIO_IRQ_ERRSOTSYNCHS2 BIT(6) -#define CSI2_COMPLEXIO_IRQ_ERRSOTSYNCHS1 BIT(5) -#define CSI2_COMPLEXIO_IRQ_ERRSOTHS5 BIT(4) -#define CSI2_COMPLEXIO_IRQ_ERRSOTHS4 BIT(3) -#define CSI2_COMPLEXIO_IRQ_ERRSOTHS3 BIT(2) -#define CSI2_COMPLEXIO_IRQ_ERRSOTHS2 BIT(1) -#define CSI2_COMPLEXIO_IRQ_ERRSOTHS1 BIT(0) - -#define CSI2_DBG_P 0x68 - -#define CSI2_TIMING 0x6c -#define CSI2_TIMING_FORCE_RX_MODE_IO1 BIT(15) -#define CSI2_TIMING_STOP_STATE_X16_IO1 BIT(14) -#define CSI2_TIMING_STOP_STATE_X4_IO1 BIT(13) -#define CSI2_TIMING_STOP_STATE_COUNTER_IO1_MASK (0x1fff << 0) -#define CSI2_TIMING_STOP_STATE_COUNTER_IO1_SHIFT 0 - -#define CSI2_CTX_CTRL1(i) (0x70 + (0x20 * (i))) -#define CSI2_CTX_CTRL1_GENERIC BIT(30) -#define CSI2_CTX_CTRL1_TRANSCODE (0xf << 24) -#define CSI2_CTX_CTRL1_FEC_NUMBER_MASK (0xff << 16) -#define CSI2_CTX_CTRL1_COUNT_MASK (0xff << 8) -#define CSI2_CTX_CTRL1_COUNT_SHIFT 8 -#define CSI2_CTX_CTRL1_EOF_EN BIT(7) -#define CSI2_CTX_CTRL1_EOL_EN BIT(6) -#define CSI2_CTX_CTRL1_CS_EN BIT(5) -#define CSI2_CTX_CTRL1_COUNT_UNLOCK BIT(4) -#define CSI2_CTX_CTRL1_PING_PONG BIT(3) -#define CSI2_CTX_CTRL1_CTX_EN BIT(0) - -#define CSI2_CTX_CTRL2(i) (0x74 + (0x20 * (i))) -#define CSI2_CTX_CTRL2_FRAME_MASK (0xffff << 16) -#define CSI2_CTX_CTRL2_FRAME_SHIFT 16 -#define CSI2_CTX_CTRL2_USER_DEF_MAP_SHIFT 13 -#define CSI2_CTX_CTRL2_USER_DEF_MAP_MASK \ - (0x3 << CSI2_CTX_CTRL2_USER_DEF_MAP_SHIFT) -#define CSI2_CTX_CTRL2_VIRTUAL_ID_MASK (3 << 11) -#define CSI2_CTX_CTRL2_VIRTUAL_ID_SHIFT 11 -#define CSI2_CTX_CTRL2_DPCM_PRED (1 << 10) -#define CSI2_CTX_CTRL2_FORMAT_MASK (0x3ff << 0) -#define CSI2_CTX_CTRL2_FORMAT_SHIFT 0 - -#define CSI2_CTX_DAT_OFST(i) (0x78 + (0x20 * (i))) -#define CSI2_CTX_DAT_OFST_MASK (0xfff << 5) - -#define CSI2_CTX_PING_ADDR(i) (0x7c + (0x20 * (i))) -#define CSI2_CTX_PING_ADDR_MASK 0xffffffe0 - -#define CSI2_CTX_PONG_ADDR(i) (0x80 + (0x20 * (i))) -#define CSI2_CTX_PONG_ADDR_MASK CSI2_CTX_PING_ADDR_MASK - -#define CSI2_CTX_IRQENABLE(i) (0x84 + (0x20 * (i))) -#define CSI2_CTX_IRQSTATUS(i) (0x88 + (0x20 * (i))) - -#define CSI2_CTX_CTRL3(i) (0x8c + (0x20 * (i))) -#define CSI2_CTX_CTRL3_ALPHA_SHIFT 5 -#define CSI2_CTX_CTRL3_ALPHA_MASK \ - (0x3fff << CSI2_CTX_CTRL3_ALPHA_SHIFT) - -/* Shared bits across CSI2_CTX_IRQENABLE and IRQSTATUS */ -#define CSI2_CTX_IRQ_ECC_CORRECTION BIT(8) -#define CSI2_CTX_IRQ_LINE_NUMBER BIT(7) -#define CSI2_CTX_IRQ_FRAME_NUMBER BIT(6) -#define CSI2_CTX_IRQ_CS BIT(5) -#define CSI2_CTX_IRQ_LE BIT(3) -#define CSI2_CTX_IRQ_LS BIT(2) -#define CSI2_CTX_IRQ_FE BIT(1) -#define CSI2_CTX_IRQ_FS BIT(0) - -/* ISS BTE */ -#define BTE_CTRL (0x0030) -#define BTE_CTRL_BW_LIMITER_MASK (0x3ff << 22) -#define BTE_CTRL_BW_LIMITER_SHIFT 22 - -/* ISS ISP_SYS1 */ -#define ISP5_REVISION (0x0000) -#define ISP5_SYSCONFIG (0x0010) -#define ISP5_SYSCONFIG_STANDBYMODE_MASK (3 << 4) -#define ISP5_SYSCONFIG_STANDBYMODE_FORCE (0 << 4) -#define ISP5_SYSCONFIG_STANDBYMODE_NO (1 << 4) -#define ISP5_SYSCONFIG_STANDBYMODE_SMART (2 << 4) -#define ISP5_SYSCONFIG_SOFTRESET (1 << 1) - -#define ISP5_IRQSTATUS(i) (0x0028 + (0x10 * (i))) -#define ISP5_IRQENABLE_SET(i) (0x002c + (0x10 * (i))) -#define ISP5_IRQENABLE_CLR(i) (0x0030 + (0x10 * (i))) - -/* Bits shared for ISP5_IRQ* registers */ -#define ISP5_IRQ_OCP_ERR BIT(31) -#define ISP5_IRQ_IPIPE_INT_DPC_RNEW1 BIT(29) -#define ISP5_IRQ_IPIPE_INT_DPC_RNEW0 BIT(28) -#define ISP5_IRQ_IPIPE_INT_DPC_INIT BIT(27) -#define ISP5_IRQ_IPIPE_INT_EOF BIT(25) -#define ISP5_IRQ_H3A_INT_EOF BIT(24) -#define ISP5_IRQ_RSZ_INT_EOF1 BIT(23) -#define ISP5_IRQ_RSZ_INT_EOF0 BIT(22) -#define ISP5_IRQ_RSZ_FIFO_IN_BLK_ERR BIT(19) -#define ISP5_IRQ_RSZ_FIFO_OVF BIT(18) -#define ISP5_IRQ_RSZ_INT_CYC_RSZB BIT(17) -#define ISP5_IRQ_RSZ_INT_CYC_RSZA BIT(16) -#define ISP5_IRQ_RSZ_INT_DMA BIT(15) -#define ISP5_IRQ_RSZ_INT_LAST_PIX BIT(14) -#define ISP5_IRQ_RSZ_INT_REG BIT(13) -#define ISP5_IRQ_H3A_INT BIT(12) -#define ISP5_IRQ_AF_INT BIT(11) -#define ISP5_IRQ_AEW_INT BIT(10) -#define ISP5_IRQ_IPIPEIF_IRQ BIT(9) -#define ISP5_IRQ_IPIPE_INT_HST BIT(8) -#define ISP5_IRQ_IPIPE_INT_BSC BIT(7) -#define ISP5_IRQ_IPIPE_INT_DMA BIT(6) -#define ISP5_IRQ_IPIPE_INT_LAST_PIX BIT(5) -#define ISP5_IRQ_IPIPE_INT_REG BIT(4) -#define ISP5_IRQ_ISIF_INT(i) BIT(i) - -#define ISP5_CTRL (0x006c) -#define ISP5_CTRL_MSTANDBY BIT(24) -#define ISP5_CTRL_VD_PULSE_EXT BIT(23) -#define ISP5_CTRL_MSTANDBY_WAIT BIT(20) -#define ISP5_CTRL_BL_CLK_ENABLE BIT(15) -#define ISP5_CTRL_ISIF_CLK_ENABLE BIT(14) -#define ISP5_CTRL_H3A_CLK_ENABLE BIT(13) -#define ISP5_CTRL_RSZ_CLK_ENABLE BIT(12) -#define ISP5_CTRL_IPIPE_CLK_ENABLE BIT(11) -#define ISP5_CTRL_IPIPEIF_CLK_ENABLE BIT(10) -#define ISP5_CTRL_SYNC_ENABLE BIT(9) -#define ISP5_CTRL_PSYNC_CLK_SEL BIT(8) - -/* ISS ISP ISIF register offsets */ -#define ISIF_SYNCEN (0x0000) -#define ISIF_SYNCEN_DWEN BIT(1) -#define ISIF_SYNCEN_SYEN BIT(0) - -#define ISIF_MODESET (0x0004) -#define ISIF_MODESET_INPMOD_MASK (3 << 12) -#define ISIF_MODESET_INPMOD_RAW (0 << 12) -#define ISIF_MODESET_INPMOD_YCBCR16 (1 << 12) -#define ISIF_MODESET_INPMOD_YCBCR8 (2 << 12) -#define ISIF_MODESET_CCDW_MASK (7 << 8) -#define ISIF_MODESET_CCDW_2BIT (2 << 8) -#define ISIF_MODESET_CCDMD (1 << 7) -#define ISIF_MODESET_SWEN (1 << 5) -#define ISIF_MODESET_HDPOL (1 << 3) -#define ISIF_MODESET_VDPOL (1 << 2) - -#define ISIF_SPH (0x0018) -#define ISIF_SPH_MASK (0x7fff) - -#define ISIF_LNH (0x001c) -#define ISIF_LNH_MASK (0x7fff) - -#define ISIF_LNV (0x0028) -#define ISIF_LNV_MASK (0x7fff) - -#define ISIF_HSIZE (0x0034) -#define ISIF_HSIZE_ADCR BIT(12) -#define ISIF_HSIZE_HSIZE_MASK (0xfff) - -#define ISIF_CADU (0x003c) -#define ISIF_CADU_MASK (0x7ff) - -#define ISIF_CADL (0x0040) -#define ISIF_CADL_MASK (0xffff) - -#define ISIF_CCOLP (0x004c) -#define ISIF_CCOLP_CP0_F0_R (0 << 6) -#define ISIF_CCOLP_CP0_F0_GR (1 << 6) -#define ISIF_CCOLP_CP0_F0_B (3 << 6) -#define ISIF_CCOLP_CP0_F0_GB (2 << 6) -#define ISIF_CCOLP_CP1_F0_R (0 << 4) -#define ISIF_CCOLP_CP1_F0_GR (1 << 4) -#define ISIF_CCOLP_CP1_F0_B (3 << 4) -#define ISIF_CCOLP_CP1_F0_GB (2 << 4) -#define ISIF_CCOLP_CP2_F0_R (0 << 2) -#define ISIF_CCOLP_CP2_F0_GR (1 << 2) -#define ISIF_CCOLP_CP2_F0_B (3 << 2) -#define ISIF_CCOLP_CP2_F0_GB (2 << 2) -#define ISIF_CCOLP_CP3_F0_R (0 << 0) -#define ISIF_CCOLP_CP3_F0_GR (1 << 0) -#define ISIF_CCOLP_CP3_F0_B (3 << 0) -#define ISIF_CCOLP_CP3_F0_GB (2 << 0) - -#define ISIF_VDINT(i) (0x0070 + (i) * 4) -#define ISIF_VDINT_MASK (0x7fff) - -#define ISIF_CGAMMAWD (0x0080) -#define ISIF_CGAMMAWD_GWDI_MASK (0xf << 1) -#define ISIF_CGAMMAWD_GWDI(bpp) ((16 - (bpp)) << 1) - -#define ISIF_CCDCFG (0x0088) -#define ISIF_CCDCFG_Y8POS BIT(11) - -/* ISS ISP IPIPEIF register offsets */ -#define IPIPEIF_ENABLE (0x0000) - -#define IPIPEIF_CFG1 (0x0004) -#define IPIPEIF_CFG1_INPSRC1_MASK (3 << 14) -#define IPIPEIF_CFG1_INPSRC1_VPORT_RAW (0 << 14) -#define IPIPEIF_CFG1_INPSRC1_SDRAM_RAW (1 << 14) -#define IPIPEIF_CFG1_INPSRC1_ISIF_DARKFM (2 << 14) -#define IPIPEIF_CFG1_INPSRC1_SDRAM_YUV (3 << 14) -#define IPIPEIF_CFG1_INPSRC2_MASK (3 << 2) -#define IPIPEIF_CFG1_INPSRC2_ISIF (0 << 2) -#define IPIPEIF_CFG1_INPSRC2_SDRAM_RAW (1 << 2) -#define IPIPEIF_CFG1_INPSRC2_ISIF_DARKFM (2 << 2) -#define IPIPEIF_CFG1_INPSRC2_SDRAM_YUV (3 << 2) - -#define IPIPEIF_CFG2 (0x0030) -#define IPIPEIF_CFG2_YUV8P BIT(7) -#define IPIPEIF_CFG2_YUV8 BIT(6) -#define IPIPEIF_CFG2_YUV16 BIT(3) -#define IPIPEIF_CFG2_VDPOL BIT(2) -#define IPIPEIF_CFG2_HDPOL BIT(1) -#define IPIPEIF_CFG2_INTSW BIT(0) - -#define IPIPEIF_CLKDIV (0x0040) - -/* ISS ISP IPIPE register offsets */ -#define IPIPE_SRC_EN (0x0000) -#define IPIPE_SRC_EN_EN BIT(0) - -#define IPIPE_SRC_MODE (0x0004) -#define IPIPE_SRC_MODE_WRT BIT(1) -#define IPIPE_SRC_MODE_OST BIT(0) - -#define IPIPE_SRC_FMT (0x0008) -#define IPIPE_SRC_FMT_RAW2YUV (0 << 0) -#define IPIPE_SRC_FMT_RAW2RAW (1 << 0) -#define IPIPE_SRC_FMT_RAW2STATS (2 << 0) -#define IPIPE_SRC_FMT_YUV2YUV (3 << 0) - -#define IPIPE_SRC_COL (0x000c) -#define IPIPE_SRC_COL_OO_R (0 << 6) -#define IPIPE_SRC_COL_OO_GR (1 << 6) -#define IPIPE_SRC_COL_OO_B (3 << 6) -#define IPIPE_SRC_COL_OO_GB (2 << 6) -#define IPIPE_SRC_COL_OE_R (0 << 4) -#define IPIPE_SRC_COL_OE_GR (1 << 4) -#define IPIPE_SRC_COL_OE_B (3 << 4) -#define IPIPE_SRC_COL_OE_GB (2 << 4) -#define IPIPE_SRC_COL_EO_R (0 << 2) -#define IPIPE_SRC_COL_EO_GR (1 << 2) -#define IPIPE_SRC_COL_EO_B (3 << 2) -#define IPIPE_SRC_COL_EO_GB (2 << 2) -#define IPIPE_SRC_COL_EE_R (0 << 0) -#define IPIPE_SRC_COL_EE_GR (1 << 0) -#define IPIPE_SRC_COL_EE_B (3 << 0) -#define IPIPE_SRC_COL_EE_GB (2 << 0) - -#define IPIPE_SRC_VPS (0x0010) -#define IPIPE_SRC_VPS_MASK (0xffff) - -#define IPIPE_SRC_VSZ (0x0014) -#define IPIPE_SRC_VSZ_MASK (0x1fff) - -#define IPIPE_SRC_HPS (0x0018) -#define IPIPE_SRC_HPS_MASK (0xffff) - -#define IPIPE_SRC_HSZ (0x001c) -#define IPIPE_SRC_HSZ_MASK (0x1ffe) - -#define IPIPE_SEL_SBU (0x0020) - -#define IPIPE_SRC_STA (0x0024) - -#define IPIPE_GCK_MMR (0x0028) -#define IPIPE_GCK_MMR_REG BIT(0) - -#define IPIPE_GCK_PIX (0x002c) -#define IPIPE_GCK_PIX_G3 BIT(3) -#define IPIPE_GCK_PIX_G2 BIT(2) -#define IPIPE_GCK_PIX_G1 BIT(1) -#define IPIPE_GCK_PIX_G0 BIT(0) - -#define IPIPE_DPC_LUT_EN (0x0034) -#define IPIPE_DPC_LUT_SEL (0x0038) -#define IPIPE_DPC_LUT_ADR (0x003c) -#define IPIPE_DPC_LUT_SIZ (0x0040) - -#define IPIPE_DPC_OTF_EN (0x0044) -#define IPIPE_DPC_OTF_TYP (0x0048) -#define IPIPE_DPC_OTF_2_D_THR_R (0x004c) -#define IPIPE_DPC_OTF_2_D_THR_GR (0x0050) -#define IPIPE_DPC_OTF_2_D_THR_GB (0x0054) -#define IPIPE_DPC_OTF_2_D_THR_B (0x0058) -#define IPIPE_DPC_OTF_2_C_THR_R (0x005c) -#define IPIPE_DPC_OTF_2_C_THR_GR (0x0060) -#define IPIPE_DPC_OTF_2_C_THR_GB (0x0064) -#define IPIPE_DPC_OTF_2_C_THR_B (0x0068) -#define IPIPE_DPC_OTF_3_SHF (0x006c) -#define IPIPE_DPC_OTF_3_D_THR (0x0070) -#define IPIPE_DPC_OTF_3_D_SPL (0x0074) -#define IPIPE_DPC_OTF_3_D_MIN (0x0078) -#define IPIPE_DPC_OTF_3_D_MAX (0x007c) -#define IPIPE_DPC_OTF_3_C_THR (0x0080) -#define IPIPE_DPC_OTF_3_C_SLP (0x0084) -#define IPIPE_DPC_OTF_3_C_MIN (0x0088) -#define IPIPE_DPC_OTF_3_C_MAX (0x008c) - -#define IPIPE_LSC_VOFT (0x0090) -#define IPIPE_LSC_VA2 (0x0094) -#define IPIPE_LSC_VA1 (0x0098) -#define IPIPE_LSC_VS (0x009c) -#define IPIPE_LSC_HOFT (0x00a0) -#define IPIPE_LSC_HA2 (0x00a4) -#define IPIPE_LSC_HA1 (0x00a8) -#define IPIPE_LSC_HS (0x00ac) -#define IPIPE_LSC_GAN_R (0x00b0) -#define IPIPE_LSC_GAN_GR (0x00b4) -#define IPIPE_LSC_GAN_GB (0x00b8) -#define IPIPE_LSC_GAN_B (0x00bc) -#define IPIPE_LSC_OFT_R (0x00c0) -#define IPIPE_LSC_OFT_GR (0x00c4) -#define IPIPE_LSC_OFT_GB (0x00c8) -#define IPIPE_LSC_OFT_B (0x00cc) -#define IPIPE_LSC_SHF (0x00d0) -#define IPIPE_LSC_MAX (0x00d4) - -#define IPIPE_D2F_1ST_EN (0x00d8) -#define IPIPE_D2F_1ST_TYP (0x00dc) -#define IPIPE_D2F_1ST_THR_00 (0x00e0) -#define IPIPE_D2F_1ST_THR_01 (0x00e4) -#define IPIPE_D2F_1ST_THR_02 (0x00e8) -#define IPIPE_D2F_1ST_THR_03 (0x00ec) -#define IPIPE_D2F_1ST_THR_04 (0x00f0) -#define IPIPE_D2F_1ST_THR_05 (0x00f4) -#define IPIPE_D2F_1ST_THR_06 (0x00f8) -#define IPIPE_D2F_1ST_THR_07 (0x00fc) -#define IPIPE_D2F_1ST_STR_00 (0x0100) -#define IPIPE_D2F_1ST_STR_01 (0x0104) -#define IPIPE_D2F_1ST_STR_02 (0x0108) -#define IPIPE_D2F_1ST_STR_03 (0x010c) -#define IPIPE_D2F_1ST_STR_04 (0x0110) -#define IPIPE_D2F_1ST_STR_05 (0x0114) -#define IPIPE_D2F_1ST_STR_06 (0x0118) -#define IPIPE_D2F_1ST_STR_07 (0x011c) -#define IPIPE_D2F_1ST_SPR_00 (0x0120) -#define IPIPE_D2F_1ST_SPR_01 (0x0124) -#define IPIPE_D2F_1ST_SPR_02 (0x0128) -#define IPIPE_D2F_1ST_SPR_03 (0x012c) -#define IPIPE_D2F_1ST_SPR_04 (0x0130) -#define IPIPE_D2F_1ST_SPR_05 (0x0134) -#define IPIPE_D2F_1ST_SPR_06 (0x0138) -#define IPIPE_D2F_1ST_SPR_07 (0x013c) -#define IPIPE_D2F_1ST_EDG_MIN (0x0140) -#define IPIPE_D2F_1ST_EDG_MAX (0x0144) -#define IPIPE_D2F_2ND_EN (0x0148) -#define IPIPE_D2F_2ND_TYP (0x014c) -#define IPIPE_D2F_2ND_THR00 (0x0150) -#define IPIPE_D2F_2ND_THR01 (0x0154) -#define IPIPE_D2F_2ND_THR02 (0x0158) -#define IPIPE_D2F_2ND_THR03 (0x015c) -#define IPIPE_D2F_2ND_THR04 (0x0160) -#define IPIPE_D2F_2ND_THR05 (0x0164) -#define IPIPE_D2F_2ND_THR06 (0x0168) -#define IPIPE_D2F_2ND_THR07 (0x016c) -#define IPIPE_D2F_2ND_STR_00 (0x0170) -#define IPIPE_D2F_2ND_STR_01 (0x0174) -#define IPIPE_D2F_2ND_STR_02 (0x0178) -#define IPIPE_D2F_2ND_STR_03 (0x017c) -#define IPIPE_D2F_2ND_STR_04 (0x0180) -#define IPIPE_D2F_2ND_STR_05 (0x0184) -#define IPIPE_D2F_2ND_STR_06 (0x0188) -#define IPIPE_D2F_2ND_STR_07 (0x018c) -#define IPIPE_D2F_2ND_SPR_00 (0x0190) -#define IPIPE_D2F_2ND_SPR_01 (0x0194) -#define IPIPE_D2F_2ND_SPR_02 (0x0198) -#define IPIPE_D2F_2ND_SPR_03 (0x019c) -#define IPIPE_D2F_2ND_SPR_04 (0x01a0) -#define IPIPE_D2F_2ND_SPR_05 (0x01a4) -#define IPIPE_D2F_2ND_SPR_06 (0x01a8) -#define IPIPE_D2F_2ND_SPR_07 (0x01ac) -#define IPIPE_D2F_2ND_EDG_MIN (0x01b0) -#define IPIPE_D2F_2ND_EDG_MAX (0x01b4) - -#define IPIPE_GIC_EN (0x01b8) -#define IPIPE_GIC_TYP (0x01bc) -#define IPIPE_GIC_GAN (0x01c0) -#define IPIPE_GIC_NFGAIN (0x01c4) -#define IPIPE_GIC_THR (0x01c8) -#define IPIPE_GIC_SLP (0x01cc) - -#define IPIPE_WB2_OFT_R (0x01d0) -#define IPIPE_WB2_OFT_GR (0x01d4) -#define IPIPE_WB2_OFT_GB (0x01d8) -#define IPIPE_WB2_OFT_B (0x01dc) - -#define IPIPE_WB2_WGN_R (0x01e0) -#define IPIPE_WB2_WGN_GR (0x01e4) -#define IPIPE_WB2_WGN_GB (0x01e8) -#define IPIPE_WB2_WGN_B (0x01ec) - -#define IPIPE_CFA_MODE (0x01f0) -#define IPIPE_CFA_2DIR_HPF_THR (0x01f4) -#define IPIPE_CFA_2DIR_HPF_SLP (0x01f8) -#define IPIPE_CFA_2DIR_MIX_THR (0x01fc) -#define IPIPE_CFA_2DIR_MIX_SLP (0x0200) -#define IPIPE_CFA_2DIR_DIR_TRH (0x0204) -#define IPIPE_CFA_2DIR_DIR_SLP (0x0208) -#define IPIPE_CFA_2DIR_NDWT (0x020c) -#define IPIPE_CFA_MONO_HUE_FRA (0x0210) -#define IPIPE_CFA_MONO_EDG_THR (0x0214) -#define IPIPE_CFA_MONO_THR_MIN (0x0218) - -#define IPIPE_CFA_MONO_THR_SLP (0x021c) -#define IPIPE_CFA_MONO_SLP_MIN (0x0220) -#define IPIPE_CFA_MONO_SLP_SLP (0x0224) -#define IPIPE_CFA_MONO_LPWT (0x0228) - -#define IPIPE_RGB1_MUL_RR (0x022c) -#define IPIPE_RGB1_MUL_GR (0x0230) -#define IPIPE_RGB1_MUL_BR (0x0234) -#define IPIPE_RGB1_MUL_RG (0x0238) -#define IPIPE_RGB1_MUL_GG (0x023c) -#define IPIPE_RGB1_MUL_BG (0x0240) -#define IPIPE_RGB1_MUL_RB (0x0244) -#define IPIPE_RGB1_MUL_GB (0x0248) -#define IPIPE_RGB1_MUL_BB (0x024c) -#define IPIPE_RGB1_OFT_OR (0x0250) -#define IPIPE_RGB1_OFT_OG (0x0254) -#define IPIPE_RGB1_OFT_OB (0x0258) -#define IPIPE_GMM_CFG (0x025c) -#define IPIPE_RGB2_MUL_RR (0x0260) -#define IPIPE_RGB2_MUL_GR (0x0264) -#define IPIPE_RGB2_MUL_BR (0x0268) -#define IPIPE_RGB2_MUL_RG (0x026c) -#define IPIPE_RGB2_MUL_GG (0x0270) -#define IPIPE_RGB2_MUL_BG (0x0274) -#define IPIPE_RGB2_MUL_RB (0x0278) -#define IPIPE_RGB2_MUL_GB (0x027c) -#define IPIPE_RGB2_MUL_BB (0x0280) -#define IPIPE_RGB2_OFT_OR (0x0284) -#define IPIPE_RGB2_OFT_OG (0x0288) -#define IPIPE_RGB2_OFT_OB (0x028c) - -#define IPIPE_YUV_ADJ (0x0294) -#define IPIPE_YUV_MUL_RY (0x0298) -#define IPIPE_YUV_MUL_GY (0x029c) -#define IPIPE_YUV_MUL_BY (0x02a0) -#define IPIPE_YUV_MUL_RCB (0x02a4) -#define IPIPE_YUV_MUL_GCB (0x02a8) -#define IPIPE_YUV_MUL_BCB (0x02ac) -#define IPIPE_YUV_MUL_RCR (0x02b0) -#define IPIPE_YUV_MUL_GCR (0x02b4) -#define IPIPE_YUV_MUL_BCR (0x02b8) -#define IPIPE_YUV_OFT_Y (0x02bc) -#define IPIPE_YUV_OFT_CB (0x02c0) -#define IPIPE_YUV_OFT_CR (0x02c4) - -#define IPIPE_YUV_PHS (0x02c8) -#define IPIPE_YUV_PHS_LPF BIT(1) -#define IPIPE_YUV_PHS_POS BIT(0) - -#define IPIPE_YEE_EN (0x02d4) -#define IPIPE_YEE_TYP (0x02d8) -#define IPIPE_YEE_SHF (0x02dc) -#define IPIPE_YEE_MUL_00 (0x02e0) -#define IPIPE_YEE_MUL_01 (0x02e4) -#define IPIPE_YEE_MUL_02 (0x02e8) -#define IPIPE_YEE_MUL_10 (0x02ec) -#define IPIPE_YEE_MUL_11 (0x02f0) -#define IPIPE_YEE_MUL_12 (0x02f4) -#define IPIPE_YEE_MUL_20 (0x02f8) -#define IPIPE_YEE_MUL_21 (0x02fc) -#define IPIPE_YEE_MUL_22 (0x0300) -#define IPIPE_YEE_THR (0x0304) -#define IPIPE_YEE_E_GAN (0x0308) -#define IPIPE_YEE_E_THR_1 (0x030c) -#define IPIPE_YEE_E_THR_2 (0x0310) -#define IPIPE_YEE_G_GAN (0x0314) -#define IPIPE_YEE_G_OFT (0x0318) - -#define IPIPE_CAR_EN (0x031c) -#define IPIPE_CAR_TYP (0x0320) -#define IPIPE_CAR_SW (0x0324) -#define IPIPE_CAR_HPF_TYP (0x0328) -#define IPIPE_CAR_HPF_SHF (0x032c) -#define IPIPE_CAR_HPF_THR (0x0330) -#define IPIPE_CAR_GN1_GAN (0x0334) -#define IPIPE_CAR_GN1_SHF (0x0338) -#define IPIPE_CAR_GN1_MIN (0x033c) -#define IPIPE_CAR_GN2_GAN (0x0340) -#define IPIPE_CAR_GN2_SHF (0x0344) -#define IPIPE_CAR_GN2_MIN (0x0348) -#define IPIPE_CGS_EN (0x034c) -#define IPIPE_CGS_GN1_L_THR (0x0350) -#define IPIPE_CGS_GN1_L_GAIN (0x0354) -#define IPIPE_CGS_GN1_L_SHF (0x0358) -#define IPIPE_CGS_GN1_L_MIN (0x035c) -#define IPIPE_CGS_GN1_H_THR (0x0360) -#define IPIPE_CGS_GN1_H_GAIN (0x0364) -#define IPIPE_CGS_GN1_H_SHF (0x0368) -#define IPIPE_CGS_GN1_H_MIN (0x036c) -#define IPIPE_CGS_GN2_L_THR (0x0370) -#define IPIPE_CGS_GN2_L_GAIN (0x0374) -#define IPIPE_CGS_GN2_L_SHF (0x0378) -#define IPIPE_CGS_GN2_L_MIN (0x037c) - -#define IPIPE_BOX_EN (0x0380) -#define IPIPE_BOX_MODE (0x0384) -#define IPIPE_BOX_TYP (0x0388) -#define IPIPE_BOX_SHF (0x038c) -#define IPIPE_BOX_SDR_SAD_H (0x0390) -#define IPIPE_BOX_SDR_SAD_L (0x0394) - -#define IPIPE_HST_EN (0x039c) -#define IPIPE_HST_MODE (0x03a0) -#define IPIPE_HST_SEL (0x03a4) -#define IPIPE_HST_PARA (0x03a8) -#define IPIPE_HST_0_VPS (0x03ac) -#define IPIPE_HST_0_VSZ (0x03b0) -#define IPIPE_HST_0_HPS (0x03b4) -#define IPIPE_HST_0_HSZ (0x03b8) -#define IPIPE_HST_1_VPS (0x03bc) -#define IPIPE_HST_1_VSZ (0x03c0) -#define IPIPE_HST_1_HPS (0x03c4) -#define IPIPE_HST_1_HSZ (0x03c8) -#define IPIPE_HST_2_VPS (0x03cc) -#define IPIPE_HST_2_VSZ (0x03d0) -#define IPIPE_HST_2_HPS (0x03d4) -#define IPIPE_HST_2_HSZ (0x03d8) -#define IPIPE_HST_3_VPS (0x03dc) -#define IPIPE_HST_3_VSZ (0x03e0) -#define IPIPE_HST_3_HPS (0x03e4) -#define IPIPE_HST_3_HSZ (0x03e8) -#define IPIPE_HST_TBL (0x03ec) -#define IPIPE_HST_MUL_R (0x03f0) -#define IPIPE_HST_MUL_GR (0x03f4) -#define IPIPE_HST_MUL_GB (0x03f8) -#define IPIPE_HST_MUL_B (0x03fc) - -#define IPIPE_BSC_EN (0x0400) -#define IPIPE_BSC_MODE (0x0404) -#define IPIPE_BSC_TYP (0x0408) -#define IPIPE_BSC_ROW_VCT (0x040c) -#define IPIPE_BSC_ROW_SHF (0x0410) -#define IPIPE_BSC_ROW_VPO (0x0414) -#define IPIPE_BSC_ROW_VNU (0x0418) -#define IPIPE_BSC_ROW_VSKIP (0x041c) -#define IPIPE_BSC_ROW_HPO (0x0420) -#define IPIPE_BSC_ROW_HNU (0x0424) -#define IPIPE_BSC_ROW_HSKIP (0x0428) -#define IPIPE_BSC_COL_VCT (0x042c) -#define IPIPE_BSC_COL_SHF (0x0430) -#define IPIPE_BSC_COL_VPO (0x0434) -#define IPIPE_BSC_COL_VNU (0x0438) -#define IPIPE_BSC_COL_VSKIP (0x043c) -#define IPIPE_BSC_COL_HPO (0x0440) -#define IPIPE_BSC_COL_HNU (0x0444) -#define IPIPE_BSC_COL_HSKIP (0x0448) - -#define IPIPE_BSC_EN (0x0400) - -/* ISS ISP Resizer register offsets */ -#define RSZ_REVISION (0x0000) -#define RSZ_SYSCONFIG (0x0004) -#define RSZ_SYSCONFIG_RSZB_CLK_EN BIT(9) -#define RSZ_SYSCONFIG_RSZA_CLK_EN BIT(8) - -#define RSZ_IN_FIFO_CTRL (0x000c) -#define RSZ_IN_FIFO_CTRL_THRLD_LOW_MASK (0x1ff << 16) -#define RSZ_IN_FIFO_CTRL_THRLD_LOW_SHIFT 16 -#define RSZ_IN_FIFO_CTRL_THRLD_HIGH_MASK (0x1ff << 0) -#define RSZ_IN_FIFO_CTRL_THRLD_HIGH_SHIFT 0 - -#define RSZ_FRACDIV (0x0008) -#define RSZ_FRACDIV_MASK (0xffff) - -#define RSZ_SRC_EN (0x0020) -#define RSZ_SRC_EN_SRC_EN BIT(0) - -#define RSZ_SRC_MODE (0x0024) -#define RSZ_SRC_MODE_OST BIT(0) -#define RSZ_SRC_MODE_WRT BIT(1) - -#define RSZ_SRC_FMT0 (0x0028) -#define RSZ_SRC_FMT0_BYPASS BIT(1) -#define RSZ_SRC_FMT0_SEL BIT(0) - -#define RSZ_SRC_FMT1 (0x002c) -#define RSZ_SRC_FMT1_IN420 BIT(1) - -#define RSZ_SRC_VPS (0x0030) -#define RSZ_SRC_VSZ (0x0034) -#define RSZ_SRC_HPS (0x0038) -#define RSZ_SRC_HSZ (0x003c) -#define RSZ_DMA_RZA (0x0040) -#define RSZ_DMA_RZB (0x0044) -#define RSZ_DMA_STA (0x0048) -#define RSZ_GCK_MMR (0x004c) -#define RSZ_GCK_MMR_MMR BIT(0) - -#define RSZ_GCK_SDR (0x0054) -#define RSZ_GCK_SDR_CORE BIT(0) - -#define RSZ_IRQ_RZA (0x0058) -#define RSZ_IRQ_RZA_MASK (0x1fff) - -#define RSZ_IRQ_RZB (0x005c) -#define RSZ_IRQ_RZB_MASK (0x1fff) - -#define RSZ_YUV_Y_MIN (0x0060) -#define RSZ_YUV_Y_MAX (0x0064) -#define RSZ_YUV_C_MIN (0x0068) -#define RSZ_YUV_C_MAX (0x006c) - -#define RSZ_SEQ (0x0074) -#define RSZ_SEQ_HRVB BIT(2) -#define RSZ_SEQ_HRVA BIT(0) - -#define RZA_EN (0x0078) -#define RZA_MODE (0x007c) -#define RZA_MODE_ONE_SHOT BIT(0) - -#define RZA_420 (0x0080) -#define RZA_I_VPS (0x0084) -#define RZA_I_HPS (0x0088) -#define RZA_O_VSZ (0x008c) -#define RZA_O_HSZ (0x0090) -#define RZA_V_PHS_Y (0x0094) -#define RZA_V_PHS_C (0x0098) -#define RZA_V_DIF (0x009c) -#define RZA_V_TYP (0x00a0) -#define RZA_V_LPF (0x00a4) -#define RZA_H_PHS (0x00a8) -#define RZA_H_DIF (0x00b0) -#define RZA_H_TYP (0x00b4) -#define RZA_H_LPF (0x00b8) -#define RZA_DWN_EN (0x00bc) -#define RZA_SDR_Y_BAD_H (0x00d0) -#define RZA_SDR_Y_BAD_L (0x00d4) -#define RZA_SDR_Y_SAD_H (0x00d8) -#define RZA_SDR_Y_SAD_L (0x00dc) -#define RZA_SDR_Y_OFT (0x00e0) -#define RZA_SDR_Y_PTR_S (0x00e4) -#define RZA_SDR_Y_PTR_E (0x00e8) -#define RZA_SDR_C_BAD_H (0x00ec) -#define RZA_SDR_C_BAD_L (0x00f0) -#define RZA_SDR_C_SAD_H (0x00f4) -#define RZA_SDR_C_SAD_L (0x00f8) -#define RZA_SDR_C_OFT (0x00fc) -#define RZA_SDR_C_PTR_S (0x0100) -#define RZA_SDR_C_PTR_E (0x0104) - -#define RZB_EN (0x0108) -#define RZB_MODE (0x010c) -#define RZB_420 (0x0110) -#define RZB_I_VPS (0x0114) -#define RZB_I_HPS (0x0118) -#define RZB_O_VSZ (0x011c) -#define RZB_O_HSZ (0x0120) - -#define RZB_V_DIF (0x012c) -#define RZB_V_TYP (0x0130) -#define RZB_V_LPF (0x0134) - -#define RZB_H_DIF (0x0140) -#define RZB_H_TYP (0x0144) -#define RZB_H_LPF (0x0148) - -#define RZB_SDR_Y_BAD_H (0x0160) -#define RZB_SDR_Y_BAD_L (0x0164) -#define RZB_SDR_Y_SAD_H (0x0168) -#define RZB_SDR_Y_SAD_L (0x016c) -#define RZB_SDR_Y_OFT (0x0170) -#define RZB_SDR_Y_PTR_S (0x0174) -#define RZB_SDR_Y_PTR_E (0x0178) -#define RZB_SDR_C_BAD_H (0x017c) -#define RZB_SDR_C_BAD_L (0x0180) -#define RZB_SDR_C_SAD_H (0x0184) -#define RZB_SDR_C_SAD_L (0x0188) - -#define RZB_SDR_C_PTR_S (0x0190) -#define RZB_SDR_C_PTR_E (0x0194) - -/* Shared Bitmasks between RZA & RZB */ -#define RSZ_EN_EN BIT(0) - -#define RSZ_420_CEN BIT(1) -#define RSZ_420_YEN BIT(0) - -#define RSZ_I_VPS_MASK (0x1fff) - -#define RSZ_I_HPS_MASK (0x1fff) - -#define RSZ_O_VSZ_MASK (0x1fff) - -#define RSZ_O_HSZ_MASK (0x1ffe) - -#define RSZ_V_PHS_Y_MASK (0x3fff) - -#define RSZ_V_PHS_C_MASK (0x3fff) - -#define RSZ_V_DIF_MASK (0x3fff) - -#define RSZ_V_TYP_C BIT(1) -#define RSZ_V_TYP_Y BIT(0) - -#define RSZ_V_LPF_C_MASK (0x3f << 6) -#define RSZ_V_LPF_C_SHIFT 6 -#define RSZ_V_LPF_Y_MASK (0x3f << 0) -#define RSZ_V_LPF_Y_SHIFT 0 - -#define RSZ_H_PHS_MASK (0x3fff) - -#define RSZ_H_DIF_MASK (0x3fff) - -#define RSZ_H_TYP_C BIT(1) -#define RSZ_H_TYP_Y BIT(0) - -#define RSZ_H_LPF_C_MASK (0x3f << 6) -#define RSZ_H_LPF_C_SHIFT 6 -#define RSZ_H_LPF_Y_MASK (0x3f << 0) -#define RSZ_H_LPF_Y_SHIFT 0 - -#define RSZ_DWN_EN_DWN_EN BIT(0) - -#endif /* _OMAP4_ISS_REGS_H_ */ diff --git a/drivers/staging/media/omap4iss/iss_resizer.c b/drivers/staging/media/omap4iss/iss_resizer.c deleted file mode 100644 index 58e698ef9108..000000000000 --- a/drivers/staging/media/omap4iss/iss_resizer.c +++ /dev/null @@ -1,884 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * TI OMAP4 ISS V4L2 Driver - ISP RESIZER module - * - * Copyright (C) 2012 Texas Instruments, Inc. - * - * Author: Sergio Aguirre - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "iss.h" -#include "iss_regs.h" -#include "iss_resizer.h" - -static const unsigned int resizer_fmts[] = { - MEDIA_BUS_FMT_UYVY8_1X16, - MEDIA_BUS_FMT_YUYV8_1X16, -}; - -/* - * resizer_print_status - Print current RESIZER Module register values. - * @resizer: Pointer to ISS ISP RESIZER device. - * - * Also prints other debug information stored in the RESIZER module. - */ -#define RSZ_PRINT_REGISTER(iss, name)\ - dev_dbg(iss->dev, "###RSZ " #name "=0x%08x\n", \ - iss_reg_read(iss, OMAP4_ISS_MEM_ISP_RESIZER, RSZ_##name)) - -#define RZA_PRINT_REGISTER(iss, name)\ - dev_dbg(iss->dev, "###RZA " #name "=0x%08x\n", \ - iss_reg_read(iss, OMAP4_ISS_MEM_ISP_RESIZER, RZA_##name)) - -static void resizer_print_status(struct iss_resizer_device *resizer) -{ - struct iss_device *iss = to_iss_device(resizer); - - dev_dbg(iss->dev, "-------------RESIZER Register dump-------------\n"); - - RSZ_PRINT_REGISTER(iss, SYSCONFIG); - RSZ_PRINT_REGISTER(iss, IN_FIFO_CTRL); - RSZ_PRINT_REGISTER(iss, FRACDIV); - RSZ_PRINT_REGISTER(iss, SRC_EN); - RSZ_PRINT_REGISTER(iss, SRC_MODE); - RSZ_PRINT_REGISTER(iss, SRC_FMT0); - RSZ_PRINT_REGISTER(iss, SRC_FMT1); - RSZ_PRINT_REGISTER(iss, SRC_VPS); - RSZ_PRINT_REGISTER(iss, SRC_VSZ); - RSZ_PRINT_REGISTER(iss, SRC_HPS); - RSZ_PRINT_REGISTER(iss, SRC_HSZ); - RSZ_PRINT_REGISTER(iss, DMA_RZA); - RSZ_PRINT_REGISTER(iss, DMA_RZB); - RSZ_PRINT_REGISTER(iss, DMA_STA); - RSZ_PRINT_REGISTER(iss, GCK_MMR); - RSZ_PRINT_REGISTER(iss, GCK_SDR); - RSZ_PRINT_REGISTER(iss, IRQ_RZA); - RSZ_PRINT_REGISTER(iss, IRQ_RZB); - RSZ_PRINT_REGISTER(iss, YUV_Y_MIN); - RSZ_PRINT_REGISTER(iss, YUV_Y_MAX); - RSZ_PRINT_REGISTER(iss, YUV_C_MIN); - RSZ_PRINT_REGISTER(iss, YUV_C_MAX); - RSZ_PRINT_REGISTER(iss, SEQ); - - RZA_PRINT_REGISTER(iss, EN); - RZA_PRINT_REGISTER(iss, MODE); - RZA_PRINT_REGISTER(iss, 420); - RZA_PRINT_REGISTER(iss, I_VPS); - RZA_PRINT_REGISTER(iss, I_HPS); - RZA_PRINT_REGISTER(iss, O_VSZ); - RZA_PRINT_REGISTER(iss, O_HSZ); - RZA_PRINT_REGISTER(iss, V_PHS_Y); - RZA_PRINT_REGISTER(iss, V_PHS_C); - RZA_PRINT_REGISTER(iss, V_DIF); - RZA_PRINT_REGISTER(iss, V_TYP); - RZA_PRINT_REGISTER(iss, V_LPF); - RZA_PRINT_REGISTER(iss, H_PHS); - RZA_PRINT_REGISTER(iss, H_DIF); - RZA_PRINT_REGISTER(iss, H_TYP); - RZA_PRINT_REGISTER(iss, H_LPF); - RZA_PRINT_REGISTER(iss, DWN_EN); - RZA_PRINT_REGISTER(iss, SDR_Y_BAD_H); - RZA_PRINT_REGISTER(iss, SDR_Y_BAD_L); - RZA_PRINT_REGISTER(iss, SDR_Y_SAD_H); - RZA_PRINT_REGISTER(iss, SDR_Y_SAD_L); - RZA_PRINT_REGISTER(iss, SDR_Y_OFT); - RZA_PRINT_REGISTER(iss, SDR_Y_PTR_S); - RZA_PRINT_REGISTER(iss, SDR_Y_PTR_E); - RZA_PRINT_REGISTER(iss, SDR_C_BAD_H); - RZA_PRINT_REGISTER(iss, SDR_C_BAD_L); - RZA_PRINT_REGISTER(iss, SDR_C_SAD_H); - RZA_PRINT_REGISTER(iss, SDR_C_SAD_L); - RZA_PRINT_REGISTER(iss, SDR_C_OFT); - RZA_PRINT_REGISTER(iss, SDR_C_PTR_S); - RZA_PRINT_REGISTER(iss, SDR_C_PTR_E); - - dev_dbg(iss->dev, "-----------------------------------------------\n"); -} - -/* - * resizer_enable - Enable/Disable RESIZER. - * @enable: enable flag - * - */ -static void resizer_enable(struct iss_resizer_device *resizer, u8 enable) -{ - struct iss_device *iss = to_iss_device(resizer); - - iss_reg_update(iss, OMAP4_ISS_MEM_ISP_RESIZER, RSZ_SRC_EN, - RSZ_SRC_EN_SRC_EN, enable ? RSZ_SRC_EN_SRC_EN : 0); - - /* TODO: Enable RSZB */ - iss_reg_update(iss, OMAP4_ISS_MEM_ISP_RESIZER, RZA_EN, RSZ_EN_EN, - enable ? RSZ_EN_EN : 0); -} - -/* ----------------------------------------------------------------------------- - * Format- and pipeline-related configuration helpers - */ - -/* - * resizer_set_outaddr - Set memory address to save output image - * @resizer: Pointer to ISP RESIZER device. - * @addr: 32-bit memory address aligned on 32 byte boundary. - * - * Sets the memory address where the output will be saved. - */ -static void resizer_set_outaddr(struct iss_resizer_device *resizer, u32 addr) -{ - struct iss_device *iss = to_iss_device(resizer); - struct v4l2_mbus_framefmt *informat, *outformat; - - informat = &resizer->formats[RESIZER_PAD_SINK]; - outformat = &resizer->formats[RESIZER_PAD_SOURCE_MEM]; - - /* Save address split in Base Address H & L */ - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_RESIZER, RZA_SDR_Y_BAD_H, - (addr >> 16) & 0xffff); - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_RESIZER, RZA_SDR_Y_BAD_L, - addr & 0xffff); - - /* SAD = BAD */ - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_RESIZER, RZA_SDR_Y_SAD_H, - (addr >> 16) & 0xffff); - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_RESIZER, RZA_SDR_Y_SAD_L, - addr & 0xffff); - - /* Program UV buffer address... Hardcoded to be contiguous! */ - if ((informat->code == MEDIA_BUS_FMT_UYVY8_1X16) && - (outformat->code == MEDIA_BUS_FMT_YUYV8_1_5X8)) { - u32 c_addr = addr + resizer->video_out.bpl_value - * outformat->height; - - /* Ensure Y_BAD_L[6:0] = C_BAD_L[6:0]*/ - if ((c_addr ^ addr) & 0x7f) { - c_addr &= ~0x7f; - c_addr += 0x80; - c_addr |= addr & 0x7f; - } - - /* Save address split in Base Address H & L */ - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_RESIZER, RZA_SDR_C_BAD_H, - (c_addr >> 16) & 0xffff); - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_RESIZER, RZA_SDR_C_BAD_L, - c_addr & 0xffff); - - /* SAD = BAD */ - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_RESIZER, RZA_SDR_C_SAD_H, - (c_addr >> 16) & 0xffff); - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_RESIZER, RZA_SDR_C_SAD_L, - c_addr & 0xffff); - } -} - -static void resizer_configure(struct iss_resizer_device *resizer) -{ - struct iss_device *iss = to_iss_device(resizer); - struct v4l2_mbus_framefmt *informat, *outformat; - - informat = &resizer->formats[RESIZER_PAD_SINK]; - outformat = &resizer->formats[RESIZER_PAD_SOURCE_MEM]; - - /* Disable pass-through more. Despite its name, the BYPASS bit controls - * pass-through mode, not bypass mode. - */ - iss_reg_clr(iss, OMAP4_ISS_MEM_ISP_RESIZER, RSZ_SRC_FMT0, - RSZ_SRC_FMT0_BYPASS); - - /* Select RSZ input */ - iss_reg_update(iss, OMAP4_ISS_MEM_ISP_RESIZER, RSZ_SRC_FMT0, - RSZ_SRC_FMT0_SEL, - resizer->input == RESIZER_INPUT_IPIPEIF ? - RSZ_SRC_FMT0_SEL : 0); - - /* RSZ ignores WEN signal from IPIPE/IPIPEIF */ - iss_reg_clr(iss, OMAP4_ISS_MEM_ISP_RESIZER, RSZ_SRC_MODE, - RSZ_SRC_MODE_WRT); - - /* Set Resizer in free-running mode */ - iss_reg_clr(iss, OMAP4_ISS_MEM_ISP_RESIZER, RSZ_SRC_MODE, - RSZ_SRC_MODE_OST); - - /* Init Resizer A */ - iss_reg_clr(iss, OMAP4_ISS_MEM_ISP_RESIZER, RZA_MODE, - RZA_MODE_ONE_SHOT); - - /* Set size related things now */ - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_RESIZER, RSZ_SRC_VPS, 0); - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_RESIZER, RSZ_SRC_HPS, 0); - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_RESIZER, RSZ_SRC_VSZ, - informat->height - 2); - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_RESIZER, RSZ_SRC_HSZ, - informat->width - 1); - - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_RESIZER, RZA_I_VPS, 0); - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_RESIZER, RZA_I_HPS, 0); - - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_RESIZER, RZA_O_VSZ, - outformat->height - 2); - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_RESIZER, RZA_O_HSZ, - outformat->width - 1); - - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_RESIZER, RZA_V_DIF, 0x100); - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_RESIZER, RZA_H_DIF, 0x100); - - /* Buffer output settings */ - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_RESIZER, RZA_SDR_Y_PTR_S, 0); - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_RESIZER, RZA_SDR_Y_PTR_E, - outformat->height - 1); - - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_RESIZER, RZA_SDR_Y_OFT, - resizer->video_out.bpl_value); - - /* UYVY -> NV12 conversion */ - if ((informat->code == MEDIA_BUS_FMT_UYVY8_1X16) && - (outformat->code == MEDIA_BUS_FMT_YUYV8_1_5X8)) { - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_RESIZER, RZA_420, - RSZ_420_CEN | RSZ_420_YEN); - - /* UV Buffer output settings */ - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_RESIZER, RZA_SDR_C_PTR_S, - 0); - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_RESIZER, RZA_SDR_C_PTR_E, - outformat->height - 1); - - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_RESIZER, RZA_SDR_C_OFT, - resizer->video_out.bpl_value); - } else { - iss_reg_write(iss, OMAP4_ISS_MEM_ISP_RESIZER, RZA_420, 0); - } -} - -/* ----------------------------------------------------------------------------- - * Interrupt handling - */ - -static void resizer_isr_buffer(struct iss_resizer_device *resizer) -{ - struct iss_buffer *buffer; - - /* The whole resizer needs to be stopped. Disabling RZA only produces - * input FIFO overflows, most probably when the next frame is received. - */ - resizer_enable(resizer, 0); - - buffer = omap4iss_video_buffer_next(&resizer->video_out); - if (!buffer) - return; - - resizer_set_outaddr(resizer, buffer->iss_addr); - - resizer_enable(resizer, 1); -} - -/* - * omap4iss_resizer_isr - Configure resizer during interframe time. - * @resizer: Pointer to ISP RESIZER device. - * @events: RESIZER events - */ -void omap4iss_resizer_isr(struct iss_resizer_device *resizer, u32 events) -{ - struct iss_device *iss = to_iss_device(resizer); - struct iss_pipeline *pipe = - to_iss_pipeline(&resizer->subdev.entity); - - if (events & (ISP5_IRQ_RSZ_FIFO_IN_BLK_ERR | - ISP5_IRQ_RSZ_FIFO_OVF)) { - dev_dbg(iss->dev, "RSZ Err: FIFO_IN_BLK:%d, FIFO_OVF:%d\n", - events & ISP5_IRQ_RSZ_FIFO_IN_BLK_ERR ? 1 : 0, - events & ISP5_IRQ_RSZ_FIFO_OVF ? 1 : 0); - omap4iss_pipeline_cancel_stream(pipe); - } - - if (omap4iss_module_sync_is_stopping(&resizer->wait, - &resizer->stopping)) - return; - - if (events & ISP5_IRQ_RSZ_INT_DMA) - resizer_isr_buffer(resizer); -} - -/* ----------------------------------------------------------------------------- - * ISS video operations - */ - -static int resizer_video_queue(struct iss_video *video, - struct iss_buffer *buffer) -{ - struct iss_resizer_device *resizer = container_of(video, - struct iss_resizer_device, video_out); - - if (!(resizer->output & RESIZER_OUTPUT_MEMORY)) - return -ENODEV; - - resizer_set_outaddr(resizer, buffer->iss_addr); - - /* - * If streaming was enabled before there was a buffer queued - * or underrun happened in the ISR, the hardware was not enabled - * and DMA queue flag ISS_VIDEO_DMAQUEUE_UNDERRUN is still set. - * Enable it now. - */ - if (video->dmaqueue_flags & ISS_VIDEO_DMAQUEUE_UNDERRUN) { - resizer_enable(resizer, 1); - iss_video_dmaqueue_flags_clr(video); - } - - return 0; -} - -static const struct iss_video_operations resizer_video_ops = { - .queue = resizer_video_queue, -}; - -/* ----------------------------------------------------------------------------- - * V4L2 subdev operations - */ - -/* - * resizer_set_stream - Enable/Disable streaming on the RESIZER module - * @sd: ISP RESIZER V4L2 subdevice - * @enable: Enable/disable stream - */ -static int resizer_set_stream(struct v4l2_subdev *sd, int enable) -{ - struct iss_resizer_device *resizer = v4l2_get_subdevdata(sd); - struct iss_device *iss = to_iss_device(resizer); - struct iss_video *video_out = &resizer->video_out; - int ret = 0; - - if (resizer->state == ISS_PIPELINE_STREAM_STOPPED) { - if (enable == ISS_PIPELINE_STREAM_STOPPED) - return 0; - - omap4iss_isp_subclk_enable(iss, OMAP4_ISS_ISP_SUBCLK_RSZ); - - iss_reg_set(iss, OMAP4_ISS_MEM_ISP_RESIZER, RSZ_GCK_MMR, - RSZ_GCK_MMR_MMR); - iss_reg_set(iss, OMAP4_ISS_MEM_ISP_RESIZER, RSZ_GCK_SDR, - RSZ_GCK_SDR_CORE); - - /* FIXME: Enable RSZB also */ - iss_reg_set(iss, OMAP4_ISS_MEM_ISP_RESIZER, RSZ_SYSCONFIG, - RSZ_SYSCONFIG_RSZA_CLK_EN); - } - - switch (enable) { - case ISS_PIPELINE_STREAM_CONTINUOUS: - - resizer_configure(resizer); - resizer_print_status(resizer); - - /* - * When outputting to memory with no buffer available, let the - * buffer queue handler start the hardware. A DMA queue flag - * ISS_VIDEO_DMAQUEUE_QUEUED will be set as soon as there is - * a buffer available. - */ - if (resizer->output & RESIZER_OUTPUT_MEMORY && - !(video_out->dmaqueue_flags & ISS_VIDEO_DMAQUEUE_QUEUED)) - break; - - atomic_set(&resizer->stopping, 0); - resizer_enable(resizer, 1); - iss_video_dmaqueue_flags_clr(video_out); - break; - - case ISS_PIPELINE_STREAM_STOPPED: - if (resizer->state == ISS_PIPELINE_STREAM_STOPPED) - return 0; - if (omap4iss_module_sync_idle(&sd->entity, &resizer->wait, - &resizer->stopping)) - ret = -ETIMEDOUT; - - resizer_enable(resizer, 0); - iss_reg_clr(iss, OMAP4_ISS_MEM_ISP_RESIZER, RSZ_SYSCONFIG, - RSZ_SYSCONFIG_RSZA_CLK_EN); - iss_reg_clr(iss, OMAP4_ISS_MEM_ISP_RESIZER, RSZ_GCK_SDR, - RSZ_GCK_SDR_CORE); - iss_reg_clr(iss, OMAP4_ISS_MEM_ISP_RESIZER, RSZ_GCK_MMR, - RSZ_GCK_MMR_MMR); - omap4iss_isp_subclk_disable(iss, OMAP4_ISS_ISP_SUBCLK_RSZ); - iss_video_dmaqueue_flags_clr(video_out); - break; - } - - resizer->state = enable; - return ret; -} - -static struct v4l2_mbus_framefmt * -__resizer_get_format(struct iss_resizer_device *resizer, - struct v4l2_subdev_state *sd_state, unsigned int pad, - enum v4l2_subdev_format_whence which) -{ - if (which == V4L2_SUBDEV_FORMAT_TRY) - return v4l2_subdev_state_get_format(sd_state, pad); - return &resizer->formats[pad]; -} - -/* - * resizer_try_format - Try video format on a pad - * @resizer: ISS RESIZER device - * @sd_state: V4L2 subdev state - * @pad: Pad number - * @fmt: Format - */ -static void -resizer_try_format(struct iss_resizer_device *resizer, - struct v4l2_subdev_state *sd_state, unsigned int pad, - struct v4l2_mbus_framefmt *fmt, - enum v4l2_subdev_format_whence which) -{ - u32 pixelcode; - struct v4l2_mbus_framefmt *format; - unsigned int width = fmt->width; - unsigned int height = fmt->height; - unsigned int i; - - switch (pad) { - case RESIZER_PAD_SINK: - for (i = 0; i < ARRAY_SIZE(resizer_fmts); i++) { - if (fmt->code == resizer_fmts[i]) - break; - } - - /* If not found, use UYVY as default */ - if (i >= ARRAY_SIZE(resizer_fmts)) - fmt->code = MEDIA_BUS_FMT_UYVY8_1X16; - - /* Clamp the input size. */ - fmt->width = clamp_t(u32, width, 1, 8192); - fmt->height = clamp_t(u32, height, 1, 8192); - break; - - case RESIZER_PAD_SOURCE_MEM: - pixelcode = fmt->code; - format = __resizer_get_format(resizer, sd_state, - RESIZER_PAD_SINK, - which); - memcpy(fmt, format, sizeof(*fmt)); - - if ((pixelcode == MEDIA_BUS_FMT_YUYV8_1_5X8) && - (fmt->code == MEDIA_BUS_FMT_UYVY8_1X16)) - fmt->code = pixelcode; - - /* The data formatter truncates the number of horizontal output - * pixels to a multiple of 16. To avoid clipping data, allow - * callers to request an output size bigger than the input size - * up to the nearest multiple of 16. - */ - fmt->width = clamp_t(u32, width, 32, (fmt->width + 15) & ~15); - fmt->width &= ~15; - fmt->height = clamp_t(u32, height, 32, fmt->height); - break; - } - - fmt->colorspace = V4L2_COLORSPACE_JPEG; - fmt->field = V4L2_FIELD_NONE; -} - -/* - * resizer_enum_mbus_code - Handle pixel format enumeration - * @sd : pointer to v4l2 subdev structure - * @sd_state: V4L2 subdev state - * @code : pointer to v4l2_subdev_mbus_code_enum structure - * return -EINVAL or zero on success - */ -static int resizer_enum_mbus_code(struct v4l2_subdev *sd, - struct v4l2_subdev_state *sd_state, - struct v4l2_subdev_mbus_code_enum *code) -{ - struct iss_resizer_device *resizer = v4l2_get_subdevdata(sd); - struct v4l2_mbus_framefmt *format; - - switch (code->pad) { - case RESIZER_PAD_SINK: - if (code->index >= ARRAY_SIZE(resizer_fmts)) - return -EINVAL; - - code->code = resizer_fmts[code->index]; - break; - - case RESIZER_PAD_SOURCE_MEM: - format = __resizer_get_format(resizer, sd_state, - RESIZER_PAD_SINK, - code->which); - - if (code->index == 0) { - code->code = format->code; - break; - } - - switch (format->code) { - case MEDIA_BUS_FMT_UYVY8_1X16: - if (code->index == 1) - code->code = MEDIA_BUS_FMT_YUYV8_1_5X8; - else - return -EINVAL; - break; - default: - if (code->index != 0) - return -EINVAL; - } - - break; - - default: - return -EINVAL; - } - - return 0; -} - -static int resizer_enum_frame_size(struct v4l2_subdev *sd, - struct v4l2_subdev_state *sd_state, - struct v4l2_subdev_frame_size_enum *fse) -{ - struct iss_resizer_device *resizer = v4l2_get_subdevdata(sd); - struct v4l2_mbus_framefmt format; - - if (fse->index != 0) - return -EINVAL; - - format.code = fse->code; - format.width = 1; - format.height = 1; - resizer_try_format(resizer, sd_state, fse->pad, &format, fse->which); - fse->min_width = format.width; - fse->min_height = format.height; - - if (format.code != fse->code) - return -EINVAL; - - format.code = fse->code; - format.width = -1; - format.height = -1; - resizer_try_format(resizer, sd_state, fse->pad, &format, fse->which); - fse->max_width = format.width; - fse->max_height = format.height; - - return 0; -} - -/* - * resizer_get_format - Retrieve the video format on a pad - * @sd : ISP RESIZER V4L2 subdevice - * @sd_state: V4L2 subdev state - * @fmt: Format - * - * Return 0 on success or -EINVAL if the pad is invalid or doesn't correspond - * to the format type. - */ -static int resizer_get_format(struct v4l2_subdev *sd, - struct v4l2_subdev_state *sd_state, - struct v4l2_subdev_format *fmt) -{ - struct iss_resizer_device *resizer = v4l2_get_subdevdata(sd); - struct v4l2_mbus_framefmt *format; - - format = __resizer_get_format(resizer, sd_state, fmt->pad, fmt->which); - if (!format) - return -EINVAL; - - fmt->format = *format; - return 0; -} - -/* - * resizer_set_format - Set the video format on a pad - * @sd : ISP RESIZER V4L2 subdevice - * @sd_state: V4L2 subdev state - * @fmt: Format - * - * Return 0 on success or -EINVAL if the pad is invalid or doesn't correspond - * to the format type. - */ -static int resizer_set_format(struct v4l2_subdev *sd, - struct v4l2_subdev_state *sd_state, - struct v4l2_subdev_format *fmt) -{ - struct iss_resizer_device *resizer = v4l2_get_subdevdata(sd); - struct v4l2_mbus_framefmt *format; - - format = __resizer_get_format(resizer, sd_state, fmt->pad, fmt->which); - if (!format) - return -EINVAL; - - resizer_try_format(resizer, sd_state, fmt->pad, &fmt->format, - fmt->which); - *format = fmt->format; - - /* Propagate the format from sink to source */ - if (fmt->pad == RESIZER_PAD_SINK) { - format = __resizer_get_format(resizer, sd_state, - RESIZER_PAD_SOURCE_MEM, - fmt->which); - *format = fmt->format; - resizer_try_format(resizer, sd_state, RESIZER_PAD_SOURCE_MEM, - format, - fmt->which); - } - - return 0; -} - -static int resizer_link_validate(struct v4l2_subdev *sd, - struct media_link *link, - struct v4l2_subdev_format *source_fmt, - struct v4l2_subdev_format *sink_fmt) -{ - /* Check if the two ends match */ - if (source_fmt->format.width != sink_fmt->format.width || - source_fmt->format.height != sink_fmt->format.height) - return -EPIPE; - - if (source_fmt->format.code != sink_fmt->format.code) - return -EPIPE; - - return 0; -} - -/* - * resizer_init_formats - Initialize formats on all pads - * @sd: ISP RESIZER V4L2 subdevice - * @fh: V4L2 subdev file handle - * - * Initialize all pad formats with default values. If fh is not NULL, try - * formats are initialized on the file handle. Otherwise active formats are - * initialized on the device. - */ -static int resizer_init_formats(struct v4l2_subdev *sd, - struct v4l2_subdev_fh *fh) -{ - struct v4l2_subdev_format format; - - memset(&format, 0, sizeof(format)); - format.pad = RESIZER_PAD_SINK; - format.which = fh ? V4L2_SUBDEV_FORMAT_TRY : V4L2_SUBDEV_FORMAT_ACTIVE; - format.format.code = MEDIA_BUS_FMT_UYVY8_1X16; - format.format.width = 4096; - format.format.height = 4096; - resizer_set_format(sd, fh ? fh->state : NULL, &format); - - return 0; -} - -/* V4L2 subdev video operations */ -static const struct v4l2_subdev_video_ops resizer_v4l2_video_ops = { - .s_stream = resizer_set_stream, -}; - -/* V4L2 subdev pad operations */ -static const struct v4l2_subdev_pad_ops resizer_v4l2_pad_ops = { - .enum_mbus_code = resizer_enum_mbus_code, - .enum_frame_size = resizer_enum_frame_size, - .get_fmt = resizer_get_format, - .set_fmt = resizer_set_format, - .link_validate = resizer_link_validate, -}; - -/* V4L2 subdev operations */ -static const struct v4l2_subdev_ops resizer_v4l2_ops = { - .video = &resizer_v4l2_video_ops, - .pad = &resizer_v4l2_pad_ops, -}; - -/* V4L2 subdev internal operations */ -static const struct v4l2_subdev_internal_ops resizer_v4l2_internal_ops = { - .open = resizer_init_formats, -}; - -/* ----------------------------------------------------------------------------- - * Media entity operations - */ - -/* - * resizer_link_setup - Setup RESIZER connections - * @entity: RESIZER media entity - * @local: Pad at the local end of the link - * @remote: Pad at the remote end of the link - * @flags: Link flags - * - * return -EINVAL or zero on success - */ -static int resizer_link_setup(struct media_entity *entity, - const struct media_pad *local, - const struct media_pad *remote, u32 flags) -{ - struct v4l2_subdev *sd = media_entity_to_v4l2_subdev(entity); - struct iss_resizer_device *resizer = v4l2_get_subdevdata(sd); - struct iss_device *iss = to_iss_device(resizer); - unsigned int index = local->index; - - /* FIXME: this is actually a hack! */ - if (is_media_entity_v4l2_subdev(remote->entity)) - index |= 2 << 16; - - switch (index) { - case RESIZER_PAD_SINK | 2 << 16: - /* Read from IPIPE or IPIPEIF. */ - if (!(flags & MEDIA_LNK_FL_ENABLED)) { - resizer->input = RESIZER_INPUT_NONE; - break; - } - - if (resizer->input != RESIZER_INPUT_NONE) - return -EBUSY; - - if (remote->entity == &iss->ipipeif.subdev.entity) - resizer->input = RESIZER_INPUT_IPIPEIF; - else if (remote->entity == &iss->ipipe.subdev.entity) - resizer->input = RESIZER_INPUT_IPIPE; - - break; - - case RESIZER_PAD_SOURCE_MEM: - /* Write to memory */ - if (flags & MEDIA_LNK_FL_ENABLED) { - if (resizer->output & ~RESIZER_OUTPUT_MEMORY) - return -EBUSY; - resizer->output |= RESIZER_OUTPUT_MEMORY; - } else { - resizer->output &= ~RESIZER_OUTPUT_MEMORY; - } - break; - - default: - return -EINVAL; - } - - return 0; -} - -/* media operations */ -static const struct media_entity_operations resizer_media_ops = { - .link_setup = resizer_link_setup, - .link_validate = v4l2_subdev_link_validate, -}; - -/* - * resizer_init_entities - Initialize V4L2 subdev and media entity - * @resizer: ISS ISP RESIZER module - * - * Return 0 on success and a negative error code on failure. - */ -static int resizer_init_entities(struct iss_resizer_device *resizer) -{ - struct v4l2_subdev *sd = &resizer->subdev; - struct media_pad *pads = resizer->pads; - struct media_entity *me = &sd->entity; - int ret; - - resizer->input = RESIZER_INPUT_NONE; - - v4l2_subdev_init(sd, &resizer_v4l2_ops); - sd->internal_ops = &resizer_v4l2_internal_ops; - strscpy(sd->name, "OMAP4 ISS ISP resizer", sizeof(sd->name)); - sd->grp_id = BIT(16); /* group ID for iss subdevs */ - v4l2_set_subdevdata(sd, resizer); - sd->flags |= V4L2_SUBDEV_FL_HAS_DEVNODE; - - pads[RESIZER_PAD_SINK].flags = MEDIA_PAD_FL_SINK; - pads[RESIZER_PAD_SOURCE_MEM].flags = MEDIA_PAD_FL_SOURCE; - - me->ops = &resizer_media_ops; - ret = media_entity_pads_init(me, RESIZER_PADS_NUM, pads); - if (ret < 0) - return ret; - - resizer_init_formats(sd, NULL); - - resizer->video_out.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; - resizer->video_out.ops = &resizer_video_ops; - resizer->video_out.iss = to_iss_device(resizer); - resizer->video_out.capture_mem = PAGE_ALIGN(4096 * 4096) * 3; - resizer->video_out.bpl_alignment = 32; - resizer->video_out.bpl_zero_padding = 1; - resizer->video_out.bpl_max = 0x1ffe0; - - return omap4iss_video_init(&resizer->video_out, "ISP resizer a"); -} - -void omap4iss_resizer_unregister_entities(struct iss_resizer_device *resizer) -{ - v4l2_device_unregister_subdev(&resizer->subdev); - omap4iss_video_unregister(&resizer->video_out); -} - -int omap4iss_resizer_register_entities(struct iss_resizer_device *resizer, - struct v4l2_device *vdev) -{ - int ret; - - /* Register the subdev and video node. */ - ret = v4l2_device_register_subdev(vdev, &resizer->subdev); - if (ret < 0) - goto error; - - ret = omap4iss_video_register(&resizer->video_out, vdev); - if (ret < 0) - goto error; - - return 0; - -error: - omap4iss_resizer_unregister_entities(resizer); - return ret; -} - -/* ----------------------------------------------------------------------------- - * ISP RESIZER initialisation and cleanup - */ - -/* - * omap4iss_resizer_init - RESIZER module initialization. - * @iss: Device pointer specific to the OMAP4 ISS. - * - * TODO: Get the initialisation values from platform data. - * - * Return 0 on success or a negative error code otherwise. - */ -int omap4iss_resizer_init(struct iss_device *iss) -{ - struct iss_resizer_device *resizer = &iss->resizer; - - resizer->state = ISS_PIPELINE_STREAM_STOPPED; - init_waitqueue_head(&resizer->wait); - - return resizer_init_entities(resizer); -} - -/* - * omap4iss_resizer_create_links() - RESIZER pads links creation - * @iss: Pointer to ISS device - * - * return negative error code or zero on success - */ -int omap4iss_resizer_create_links(struct iss_device *iss) -{ - struct iss_resizer_device *resizer = &iss->resizer; - - /* Connect the RESIZER subdev to the video node. */ - return media_create_pad_link(&resizer->subdev.entity, - RESIZER_PAD_SOURCE_MEM, - &resizer->video_out.video.entity, 0, 0); -} - -/* - * omap4iss_resizer_cleanup - RESIZER module cleanup. - * @iss: Device pointer specific to the OMAP4 ISS. - */ -void omap4iss_resizer_cleanup(struct iss_device *iss) -{ - struct iss_resizer_device *resizer = &iss->resizer; - - media_entity_cleanup(&resizer->subdev.entity); -} diff --git a/drivers/staging/media/omap4iss/iss_resizer.h b/drivers/staging/media/omap4iss/iss_resizer.h deleted file mode 100644 index cb937fccc21f..000000000000 --- a/drivers/staging/media/omap4iss/iss_resizer.h +++ /dev/null @@ -1,72 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0+ */ -/* - * TI OMAP4 ISS V4L2 Driver - ISP RESIZER module - * - * Copyright (C) 2012 Texas Instruments, Inc. - * - * Author: Sergio Aguirre - */ - -#ifndef OMAP4_ISS_RESIZER_H -#define OMAP4_ISS_RESIZER_H - -#include "iss_video.h" - -enum resizer_input_entity { - RESIZER_INPUT_NONE, - RESIZER_INPUT_IPIPE, - RESIZER_INPUT_IPIPEIF -}; - -#define RESIZER_OUTPUT_MEMORY BIT(0) - -/* Sink and source RESIZER pads */ -#define RESIZER_PAD_SINK 0 -#define RESIZER_PAD_SOURCE_MEM 1 -#define RESIZER_PADS_NUM 2 - -/* - * struct iss_resizer_device - Structure for the RESIZER module to store its own - * information - * @subdev: V4L2 subdevice - * @pads: Sink and source media entity pads - * @formats: Active video formats - * @input: Active input - * @output: Active outputs - * @video_out: Output video node - * @error: A hardware error occurred during capture - * @state: Streaming state - * @wait: Wait queue used to stop the module - * @stopping: Stopping state - */ -struct iss_resizer_device { - struct v4l2_subdev subdev; - struct media_pad pads[RESIZER_PADS_NUM]; - struct v4l2_mbus_framefmt formats[RESIZER_PADS_NUM]; - - enum resizer_input_entity input; - unsigned int output; - struct iss_video video_out; - unsigned int error; - - enum iss_pipeline_stream_state state; - wait_queue_head_t wait; - atomic_t stopping; -}; - -struct iss_device; - -int omap4iss_resizer_init(struct iss_device *iss); -int omap4iss_resizer_create_links(struct iss_device *iss); -void omap4iss_resizer_cleanup(struct iss_device *iss); -int omap4iss_resizer_register_entities(struct iss_resizer_device *resizer, - struct v4l2_device *vdev); -void omap4iss_resizer_unregister_entities(struct iss_resizer_device *resizer); - -int omap4iss_resizer_busy(struct iss_resizer_device *resizer); -void omap4iss_resizer_isr(struct iss_resizer_device *resizer, u32 events); -void omap4iss_resizer_restore_context(struct iss_device *iss); -void omap4iss_resizer_max_rate(struct iss_resizer_device *resizer, - unsigned int *max_rate); - -#endif /* OMAP4_ISS_RESIZER_H */ diff --git a/drivers/staging/media/omap4iss/iss_video.c b/drivers/staging/media/omap4iss/iss_video.c deleted file mode 100644 index 22fa4d6cae10..000000000000 --- a/drivers/staging/media/omap4iss/iss_video.c +++ /dev/null @@ -1,1274 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * TI OMAP4 ISS V4L2 Driver - Generic video node - * - * Copyright (C) 2012 Texas Instruments, Inc. - * - * Author: Sergio Aguirre - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "iss_video.h" -#include "iss.h" - -/* ----------------------------------------------------------------------------- - * Helper functions - */ - -static struct iss_format_info formats[] = { - { MEDIA_BUS_FMT_Y8_1X8, MEDIA_BUS_FMT_Y8_1X8, - MEDIA_BUS_FMT_Y8_1X8, MEDIA_BUS_FMT_Y8_1X8, - V4L2_PIX_FMT_GREY, 8, }, - { MEDIA_BUS_FMT_Y10_1X10, MEDIA_BUS_FMT_Y10_1X10, - MEDIA_BUS_FMT_Y10_1X10, MEDIA_BUS_FMT_Y8_1X8, - V4L2_PIX_FMT_Y10, 10, }, - { MEDIA_BUS_FMT_Y12_1X12, MEDIA_BUS_FMT_Y10_1X10, - MEDIA_BUS_FMT_Y12_1X12, MEDIA_BUS_FMT_Y8_1X8, - V4L2_PIX_FMT_Y12, 12, }, - { MEDIA_BUS_FMT_SBGGR8_1X8, MEDIA_BUS_FMT_SBGGR8_1X8, - MEDIA_BUS_FMT_SBGGR8_1X8, MEDIA_BUS_FMT_SBGGR8_1X8, - V4L2_PIX_FMT_SBGGR8, 8, }, - { MEDIA_BUS_FMT_SGBRG8_1X8, MEDIA_BUS_FMT_SGBRG8_1X8, - MEDIA_BUS_FMT_SGBRG8_1X8, MEDIA_BUS_FMT_SGBRG8_1X8, - V4L2_PIX_FMT_SGBRG8, 8, }, - { MEDIA_BUS_FMT_SGRBG8_1X8, MEDIA_BUS_FMT_SGRBG8_1X8, - MEDIA_BUS_FMT_SGRBG8_1X8, MEDIA_BUS_FMT_SGRBG8_1X8, - V4L2_PIX_FMT_SGRBG8, 8, }, - { MEDIA_BUS_FMT_SRGGB8_1X8, MEDIA_BUS_FMT_SRGGB8_1X8, - MEDIA_BUS_FMT_SRGGB8_1X8, MEDIA_BUS_FMT_SRGGB8_1X8, - V4L2_PIX_FMT_SRGGB8, 8, }, - { MEDIA_BUS_FMT_SGRBG10_DPCM8_1X8, MEDIA_BUS_FMT_SGRBG10_DPCM8_1X8, - MEDIA_BUS_FMT_SGRBG10_1X10, 0, - V4L2_PIX_FMT_SGRBG10DPCM8, 8, }, - { MEDIA_BUS_FMT_SBGGR10_1X10, MEDIA_BUS_FMT_SBGGR10_1X10, - MEDIA_BUS_FMT_SBGGR10_1X10, MEDIA_BUS_FMT_SBGGR8_1X8, - V4L2_PIX_FMT_SBGGR10, 10, }, - { MEDIA_BUS_FMT_SGBRG10_1X10, MEDIA_BUS_FMT_SGBRG10_1X10, - MEDIA_BUS_FMT_SGBRG10_1X10, MEDIA_BUS_FMT_SGBRG8_1X8, - V4L2_PIX_FMT_SGBRG10, 10, }, - { MEDIA_BUS_FMT_SGRBG10_1X10, MEDIA_BUS_FMT_SGRBG10_1X10, - MEDIA_BUS_FMT_SGRBG10_1X10, MEDIA_BUS_FMT_SGRBG8_1X8, - V4L2_PIX_FMT_SGRBG10, 10, }, - { MEDIA_BUS_FMT_SRGGB10_1X10, MEDIA_BUS_FMT_SRGGB10_1X10, - MEDIA_BUS_FMT_SRGGB10_1X10, MEDIA_BUS_FMT_SRGGB8_1X8, - V4L2_PIX_FMT_SRGGB10, 10, }, - { MEDIA_BUS_FMT_SBGGR12_1X12, MEDIA_BUS_FMT_SBGGR10_1X10, - MEDIA_BUS_FMT_SBGGR12_1X12, MEDIA_BUS_FMT_SBGGR8_1X8, - V4L2_PIX_FMT_SBGGR12, 12, }, - { MEDIA_BUS_FMT_SGBRG12_1X12, MEDIA_BUS_FMT_SGBRG10_1X10, - MEDIA_BUS_FMT_SGBRG12_1X12, MEDIA_BUS_FMT_SGBRG8_1X8, - V4L2_PIX_FMT_SGBRG12, 12, }, - { MEDIA_BUS_FMT_SGRBG12_1X12, MEDIA_BUS_FMT_SGRBG10_1X10, - MEDIA_BUS_FMT_SGRBG12_1X12, MEDIA_BUS_FMT_SGRBG8_1X8, - V4L2_PIX_FMT_SGRBG12, 12, }, - { MEDIA_BUS_FMT_SRGGB12_1X12, MEDIA_BUS_FMT_SRGGB10_1X10, - MEDIA_BUS_FMT_SRGGB12_1X12, MEDIA_BUS_FMT_SRGGB8_1X8, - V4L2_PIX_FMT_SRGGB12, 12, }, - { MEDIA_BUS_FMT_UYVY8_1X16, MEDIA_BUS_FMT_UYVY8_1X16, - MEDIA_BUS_FMT_UYVY8_1X16, 0, - V4L2_PIX_FMT_UYVY, 16, }, - { MEDIA_BUS_FMT_YUYV8_1X16, MEDIA_BUS_FMT_YUYV8_1X16, - MEDIA_BUS_FMT_YUYV8_1X16, 0, - V4L2_PIX_FMT_YUYV, 16, }, - { MEDIA_BUS_FMT_YUYV8_1_5X8, MEDIA_BUS_FMT_YUYV8_1_5X8, - MEDIA_BUS_FMT_YUYV8_1_5X8, 0, - V4L2_PIX_FMT_NV12, 8, }, -}; - -const struct iss_format_info * -omap4iss_video_format_info(u32 code) -{ - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(formats); ++i) { - if (formats[i].code == code) - return &formats[i]; - } - - return NULL; -} - -/* - * iss_video_mbus_to_pix - Convert v4l2_mbus_framefmt to v4l2_pix_format - * @video: ISS video instance - * @mbus: v4l2_mbus_framefmt format (input) - * @pix: v4l2_pix_format format (output) - * - * Fill the output pix structure with information from the input mbus format. - * The bytesperline and sizeimage fields are computed from the requested bytes - * per line value in the pix format and information from the video instance. - * - * Return the number of padding bytes at end of line. - */ -static unsigned int iss_video_mbus_to_pix(const struct iss_video *video, - const struct v4l2_mbus_framefmt *mbus, - struct v4l2_pix_format *pix) -{ - unsigned int bpl = pix->bytesperline; - unsigned int min_bpl; - unsigned int i; - - memset(pix, 0, sizeof(*pix)); - pix->width = mbus->width; - pix->height = mbus->height; - - /* - * Skip the last format in the loop so that it will be selected if no - * match is found. - */ - for (i = 0; i < ARRAY_SIZE(formats) - 1; ++i) { - if (formats[i].code == mbus->code) - break; - } - - min_bpl = pix->width * ALIGN(formats[i].bpp, 8) / 8; - - /* - * Clamp the requested bytes per line value. If the maximum bytes per - * line value is zero, the module doesn't support user configurable line - * sizes. Override the requested value with the minimum in that case. - */ - if (video->bpl_max) - bpl = clamp(bpl, min_bpl, video->bpl_max); - else - bpl = min_bpl; - - if (!video->bpl_zero_padding || bpl != min_bpl) - bpl = ALIGN(bpl, video->bpl_alignment); - - pix->pixelformat = formats[i].pixelformat; - pix->bytesperline = bpl; - pix->sizeimage = pix->bytesperline * pix->height; - pix->colorspace = mbus->colorspace; - pix->field = mbus->field; - - /* FIXME: Special case for NV12! We should make this nicer... */ - if (pix->pixelformat == V4L2_PIX_FMT_NV12) - pix->sizeimage += (pix->bytesperline * pix->height) / 2; - - return bpl - min_bpl; -} - -static void iss_video_pix_to_mbus(const struct v4l2_pix_format *pix, - struct v4l2_mbus_framefmt *mbus) -{ - unsigned int i; - - memset(mbus, 0, sizeof(*mbus)); - mbus->width = pix->width; - mbus->height = pix->height; - - /* - * Skip the last format in the loop so that it will be selected if no - * match is found. - */ - for (i = 0; i < ARRAY_SIZE(formats) - 1; ++i) { - if (formats[i].pixelformat == pix->pixelformat) - break; - } - - mbus->code = formats[i].code; - mbus->colorspace = pix->colorspace; - mbus->field = pix->field; -} - -static struct v4l2_subdev * -iss_video_remote_subdev(struct iss_video *video, u32 *pad) -{ - struct media_pad *remote; - - remote = media_pad_remote_pad_first(&video->pad); - - if (!remote || !is_media_entity_v4l2_subdev(remote->entity)) - return NULL; - - if (pad) - *pad = remote->index; - - return media_entity_to_v4l2_subdev(remote->entity); -} - -/* Return a pointer to the ISS video instance at the far end of the pipeline. */ -static struct iss_video * -iss_video_far_end(struct iss_video *video, struct iss_pipeline *pipe) -{ - struct media_pipeline_entity_iter iter; - struct media_entity *entity; - struct iss_video *far_end = NULL; - int ret; - - ret = media_pipeline_entity_iter_init(&pipe->pipe, &iter); - if (ret) - return ERR_PTR(-ENOMEM); - - media_pipeline_for_each_entity(&pipe->pipe, &iter, entity) { - struct iss_video *other; - - if (entity == &video->video.entity) - continue; - - if (!is_media_entity_v4l2_video_device(entity)) - continue; - - other = to_iss_video(media_entity_to_video_device(entity)); - if (other->type != video->type) { - far_end = other; - break; - } - } - - media_pipeline_entity_iter_cleanup(&iter); - - return far_end; -} - -static int -__iss_video_get_format(struct iss_video *video, - struct v4l2_mbus_framefmt *format) -{ - struct v4l2_subdev_format fmt = { - .which = V4L2_SUBDEV_FORMAT_ACTIVE, - }; - struct v4l2_subdev *subdev; - u32 pad; - int ret; - - subdev = iss_video_remote_subdev(video, &pad); - if (!subdev) - return -EINVAL; - - fmt.pad = pad; - - mutex_lock(&video->mutex); - ret = v4l2_subdev_call(subdev, pad, get_fmt, NULL, &fmt); - mutex_unlock(&video->mutex); - - if (ret) - return ret; - - *format = fmt.format; - return 0; -} - -static int -iss_video_check_format(struct iss_video *video, struct iss_video_fh *vfh) -{ - struct v4l2_mbus_framefmt format; - struct v4l2_pix_format pixfmt; - int ret; - - ret = __iss_video_get_format(video, &format); - if (ret < 0) - return ret; - - pixfmt.bytesperline = 0; - ret = iss_video_mbus_to_pix(video, &format, &pixfmt); - - if (vfh->format.fmt.pix.pixelformat != pixfmt.pixelformat || - vfh->format.fmt.pix.height != pixfmt.height || - vfh->format.fmt.pix.width != pixfmt.width || - vfh->format.fmt.pix.bytesperline != pixfmt.bytesperline || - vfh->format.fmt.pix.sizeimage != pixfmt.sizeimage) - return -EINVAL; - - return ret; -} - -/* ----------------------------------------------------------------------------- - * Video queue operations - */ - -static int iss_video_queue_setup(struct vb2_queue *vq, - unsigned int *count, unsigned int *num_planes, - unsigned int sizes[], - struct device *alloc_devs[]) -{ - struct iss_video_fh *vfh = vb2_get_drv_priv(vq); - struct iss_video *video = vfh->video; - - /* Revisit multi-planar support for NV12 */ - *num_planes = 1; - - sizes[0] = vfh->format.fmt.pix.sizeimage; - if (sizes[0] == 0) - return -EINVAL; - - *count = min(*count, video->capture_mem / PAGE_ALIGN(sizes[0])); - - return 0; -} - -static void iss_video_buf_cleanup(struct vb2_buffer *vb) -{ - struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); - struct iss_buffer *buffer = container_of(vbuf, struct iss_buffer, vb); - - if (buffer->iss_addr) - buffer->iss_addr = 0; -} - -static int iss_video_buf_prepare(struct vb2_buffer *vb) -{ - struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); - struct iss_video_fh *vfh = vb2_get_drv_priv(vb->vb2_queue); - struct iss_buffer *buffer = container_of(vbuf, struct iss_buffer, vb); - struct iss_video *video = vfh->video; - unsigned long size = vfh->format.fmt.pix.sizeimage; - dma_addr_t addr; - - if (vb2_plane_size(vb, 0) < size) - return -ENOBUFS; - - addr = vb2_dma_contig_plane_dma_addr(vb, 0); - if (!IS_ALIGNED(addr, 32)) { - dev_dbg(video->iss->dev, - "Buffer address must be aligned to 32 bytes boundary.\n"); - return -EINVAL; - } - - vb2_set_plane_payload(vb, 0, size); - buffer->iss_addr = addr; - return 0; -} - -static void iss_video_buf_queue(struct vb2_buffer *vb) -{ - struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); - struct iss_video_fh *vfh = vb2_get_drv_priv(vb->vb2_queue); - struct iss_video *video = vfh->video; - struct iss_buffer *buffer = container_of(vbuf, struct iss_buffer, vb); - struct iss_pipeline *pipe = to_iss_pipeline(&video->video.entity); - unsigned long flags; - bool empty; - - spin_lock_irqsave(&video->qlock, flags); - - /* - * Mark the buffer is faulty and give it back to the queue immediately - * if the video node has registered an error. vb2 will perform the same - * check when preparing the buffer, but that is inherently racy, so we - * need to handle the race condition with an authoritative check here. - */ - if (unlikely(video->error)) { - vb2_buffer_done(vb, VB2_BUF_STATE_ERROR); - spin_unlock_irqrestore(&video->qlock, flags); - return; - } - - empty = list_empty(&video->dmaqueue); - list_add_tail(&buffer->list, &video->dmaqueue); - - spin_unlock_irqrestore(&video->qlock, flags); - - if (empty) { - enum iss_pipeline_state state; - unsigned int start; - - if (video->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) - state = ISS_PIPELINE_QUEUE_OUTPUT; - else - state = ISS_PIPELINE_QUEUE_INPUT; - - spin_lock_irqsave(&pipe->lock, flags); - pipe->state |= state; - video->ops->queue(video, buffer); - video->dmaqueue_flags |= ISS_VIDEO_DMAQUEUE_QUEUED; - - start = iss_pipeline_ready(pipe); - if (start) - pipe->state |= ISS_PIPELINE_STREAM; - spin_unlock_irqrestore(&pipe->lock, flags); - - if (start) - omap4iss_pipeline_set_stream(pipe, - ISS_PIPELINE_STREAM_SINGLESHOT); - } -} - -static const struct vb2_ops iss_video_vb2ops = { - .queue_setup = iss_video_queue_setup, - .buf_prepare = iss_video_buf_prepare, - .buf_queue = iss_video_buf_queue, - .buf_cleanup = iss_video_buf_cleanup, -}; - -/* - * omap4iss_video_buffer_next - Complete the current buffer and return the next - * @video: ISS video object - * - * Remove the current video buffer from the DMA queue and fill its timestamp, - * field count and state fields before waking up its completion handler. - * - * For capture video nodes, the buffer state is set to VB2_BUF_STATE_DONE if no - * error has been flagged in the pipeline, or to VB2_BUF_STATE_ERROR otherwise. - * - * The DMA queue is expected to contain at least one buffer. - * - * Return a pointer to the next buffer in the DMA queue, or NULL if the queue is - * empty. - */ -struct iss_buffer *omap4iss_video_buffer_next(struct iss_video *video) -{ - struct iss_pipeline *pipe = to_iss_pipeline(&video->video.entity); - enum iss_pipeline_state state; - struct iss_buffer *buf; - unsigned long flags; - - spin_lock_irqsave(&video->qlock, flags); - if (WARN_ON(list_empty(&video->dmaqueue))) { - spin_unlock_irqrestore(&video->qlock, flags); - return NULL; - } - - buf = list_first_entry(&video->dmaqueue, struct iss_buffer, - list); - list_del(&buf->list); - spin_unlock_irqrestore(&video->qlock, flags); - - buf->vb.vb2_buf.timestamp = ktime_get_ns(); - - /* - * Do frame number propagation only if this is the output video node. - * Frame number either comes from the CSI receivers or it gets - * incremented here if H3A is not active. - * Note: There is no guarantee that the output buffer will finish - * first, so the input number might lag behind by 1 in some cases. - */ - if (video == pipe->output && !pipe->do_propagation) - buf->vb.sequence = - atomic_inc_return(&pipe->frame_number); - else - buf->vb.sequence = atomic_read(&pipe->frame_number); - - vb2_buffer_done(&buf->vb.vb2_buf, pipe->error ? - VB2_BUF_STATE_ERROR : VB2_BUF_STATE_DONE); - pipe->error = false; - - spin_lock_irqsave(&video->qlock, flags); - if (list_empty(&video->dmaqueue)) { - spin_unlock_irqrestore(&video->qlock, flags); - if (video->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) - state = ISS_PIPELINE_QUEUE_OUTPUT - | ISS_PIPELINE_STREAM; - else - state = ISS_PIPELINE_QUEUE_INPUT - | ISS_PIPELINE_STREAM; - - spin_lock_irqsave(&pipe->lock, flags); - pipe->state &= ~state; - if (video->pipe.stream_state == ISS_PIPELINE_STREAM_CONTINUOUS) - video->dmaqueue_flags |= ISS_VIDEO_DMAQUEUE_UNDERRUN; - spin_unlock_irqrestore(&pipe->lock, flags); - return NULL; - } - - if (video->type == V4L2_BUF_TYPE_VIDEO_CAPTURE && pipe->input) { - spin_lock(&pipe->lock); - pipe->state &= ~ISS_PIPELINE_STREAM; - spin_unlock(&pipe->lock); - } - - buf = list_first_entry(&video->dmaqueue, struct iss_buffer, - list); - spin_unlock_irqrestore(&video->qlock, flags); - buf->vb.vb2_buf.state = VB2_BUF_STATE_ACTIVE; - return buf; -} - -/* - * omap4iss_video_cancel_stream - Cancel stream on a video node - * @video: ISS video object - * - * Cancelling a stream mark all buffers on the video node as erroneous and makes - * sure no new buffer can be queued. - */ -void omap4iss_video_cancel_stream(struct iss_video *video) -{ - unsigned long flags; - - spin_lock_irqsave(&video->qlock, flags); - - while (!list_empty(&video->dmaqueue)) { - struct iss_buffer *buf; - - buf = list_first_entry(&video->dmaqueue, struct iss_buffer, - list); - list_del(&buf->list); - vb2_buffer_done(&buf->vb.vb2_buf, VB2_BUF_STATE_ERROR); - } - - vb2_queue_error(video->queue); - video->error = true; - - spin_unlock_irqrestore(&video->qlock, flags); -} - -/* ----------------------------------------------------------------------------- - * V4L2 ioctls - */ - -static int -iss_video_querycap(struct file *file, void *fh, struct v4l2_capability *cap) -{ - struct iss_video *video = video_drvdata(file); - - strscpy(cap->driver, ISS_VIDEO_DRIVER_NAME, sizeof(cap->driver)); - strscpy(cap->card, video->video.name, sizeof(cap->card)); - strscpy(cap->bus_info, "media", sizeof(cap->bus_info)); - cap->capabilities = V4L2_CAP_DEVICE_CAPS | V4L2_CAP_STREAMING - | V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_VIDEO_OUTPUT; - - return 0; -} - -static int -iss_video_enum_format(struct file *file, void *fh, struct v4l2_fmtdesc *f) -{ - struct iss_video *video = video_drvdata(file); - struct v4l2_mbus_framefmt format; - unsigned int index = f->index; - unsigned int i; - int ret; - - if (f->type != video->type) - return -EINVAL; - - ret = __iss_video_get_format(video, &format); - if (ret < 0) - return ret; - - for (i = 0; i < ARRAY_SIZE(formats); ++i) { - const struct iss_format_info *info = &formats[i]; - - if (format.code != info->code) - continue; - - if (index == 0) { - f->pixelformat = info->pixelformat; - return 0; - } - - index--; - } - - return -EINVAL; -} - -static int -iss_video_get_format(struct file *file, void *fh, struct v4l2_format *format) -{ - struct iss_video_fh *vfh = to_iss_video_fh(fh); - struct iss_video *video = video_drvdata(file); - - if (format->type != video->type) - return -EINVAL; - - mutex_lock(&video->mutex); - *format = vfh->format; - mutex_unlock(&video->mutex); - - return 0; -} - -static int -iss_video_set_format(struct file *file, void *fh, struct v4l2_format *format) -{ - struct iss_video_fh *vfh = to_iss_video_fh(fh); - struct iss_video *video = video_drvdata(file); - struct v4l2_mbus_framefmt fmt; - - if (format->type != video->type) - return -EINVAL; - - mutex_lock(&video->mutex); - - /* - * Fill the bytesperline and sizeimage fields by converting to media bus - * format and back to pixel format. - */ - iss_video_pix_to_mbus(&format->fmt.pix, &fmt); - iss_video_mbus_to_pix(video, &fmt, &format->fmt.pix); - - vfh->format = *format; - - mutex_unlock(&video->mutex); - return 0; -} - -static int -iss_video_try_format(struct file *file, void *fh, struct v4l2_format *format) -{ - struct iss_video *video = video_drvdata(file); - struct v4l2_subdev_format fmt = { - .which = V4L2_SUBDEV_FORMAT_ACTIVE, - }; - struct v4l2_subdev *subdev; - u32 pad; - int ret; - - if (format->type != video->type) - return -EINVAL; - - subdev = iss_video_remote_subdev(video, &pad); - if (!subdev) - return -EINVAL; - - iss_video_pix_to_mbus(&format->fmt.pix, &fmt.format); - - fmt.pad = pad; - ret = v4l2_subdev_call(subdev, pad, get_fmt, NULL, &fmt); - if (ret) - return ret; - - iss_video_mbus_to_pix(video, &fmt.format, &format->fmt.pix); - return 0; -} - -static int -iss_video_get_selection(struct file *file, void *fh, struct v4l2_selection *sel) -{ - struct iss_video *video = video_drvdata(file); - struct v4l2_subdev_format format = { - .which = V4L2_SUBDEV_FORMAT_ACTIVE, - }; - struct v4l2_subdev *subdev; - struct v4l2_subdev_selection sdsel = { - .which = V4L2_SUBDEV_FORMAT_ACTIVE, - .target = sel->target, - }; - u32 pad; - int ret; - - switch (sel->target) { - case V4L2_SEL_TGT_CROP: - case V4L2_SEL_TGT_CROP_BOUNDS: - case V4L2_SEL_TGT_CROP_DEFAULT: - if (video->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) - return -EINVAL; - break; - case V4L2_SEL_TGT_COMPOSE: - case V4L2_SEL_TGT_COMPOSE_BOUNDS: - case V4L2_SEL_TGT_COMPOSE_DEFAULT: - if (video->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) - return -EINVAL; - break; - default: - return -EINVAL; - } - subdev = iss_video_remote_subdev(video, &pad); - if (!subdev) - return -EINVAL; - - /* - * Try the get selection operation first and fallback to get format if - * not implemented. - */ - sdsel.pad = pad; - ret = v4l2_subdev_call(subdev, pad, get_selection, NULL, &sdsel); - if (!ret) - sel->r = sdsel.r; - if (ret != -ENOIOCTLCMD) - return ret; - - format.pad = pad; - ret = v4l2_subdev_call(subdev, pad, get_fmt, NULL, &format); - if (ret < 0) - return ret == -ENOIOCTLCMD ? -ENOTTY : ret; - - sel->r.left = 0; - sel->r.top = 0; - sel->r.width = format.format.width; - sel->r.height = format.format.height; - - return 0; -} - -static int -iss_video_set_selection(struct file *file, void *fh, struct v4l2_selection *sel) -{ - struct iss_video *video = video_drvdata(file); - struct v4l2_subdev *subdev; - struct v4l2_subdev_selection sdsel = { - .which = V4L2_SUBDEV_FORMAT_ACTIVE, - .target = sel->target, - .flags = sel->flags, - .r = sel->r, - }; - u32 pad; - int ret; - - switch (sel->target) { - case V4L2_SEL_TGT_CROP: - if (video->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) - return -EINVAL; - break; - case V4L2_SEL_TGT_COMPOSE: - if (video->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) - return -EINVAL; - break; - default: - return -EINVAL; - } - subdev = iss_video_remote_subdev(video, &pad); - if (!subdev) - return -EINVAL; - - sdsel.pad = pad; - mutex_lock(&video->mutex); - ret = v4l2_subdev_call(subdev, pad, set_selection, NULL, &sdsel); - mutex_unlock(&video->mutex); - if (!ret) - sel->r = sdsel.r; - - return ret == -ENOIOCTLCMD ? -ENOTTY : ret; -} - -static int -iss_video_get_param(struct file *file, void *fh, struct v4l2_streamparm *a) -{ - struct iss_video_fh *vfh = to_iss_video_fh(fh); - struct iss_video *video = video_drvdata(file); - - if (video->type != V4L2_BUF_TYPE_VIDEO_OUTPUT || - video->type != a->type) - return -EINVAL; - - memset(a, 0, sizeof(*a)); - a->type = V4L2_BUF_TYPE_VIDEO_OUTPUT; - a->parm.output.capability = V4L2_CAP_TIMEPERFRAME; - a->parm.output.timeperframe = vfh->timeperframe; - - return 0; -} - -static int -iss_video_set_param(struct file *file, void *fh, struct v4l2_streamparm *a) -{ - struct iss_video_fh *vfh = to_iss_video_fh(fh); - struct iss_video *video = video_drvdata(file); - - if (video->type != V4L2_BUF_TYPE_VIDEO_OUTPUT || - video->type != a->type) - return -EINVAL; - - if (a->parm.output.timeperframe.denominator == 0) - a->parm.output.timeperframe.denominator = 1; - - vfh->timeperframe = a->parm.output.timeperframe; - - return 0; -} - -static int -iss_video_reqbufs(struct file *file, void *fh, struct v4l2_requestbuffers *rb) -{ - struct iss_video_fh *vfh = to_iss_video_fh(fh); - - return vb2_reqbufs(&vfh->queue, rb); -} - -static int -iss_video_querybuf(struct file *file, void *fh, struct v4l2_buffer *b) -{ - struct iss_video_fh *vfh = to_iss_video_fh(fh); - - return vb2_querybuf(&vfh->queue, b); -} - -static int -iss_video_qbuf(struct file *file, void *fh, struct v4l2_buffer *b) -{ - struct iss_video *video = video_drvdata(file); - struct iss_video_fh *vfh = to_iss_video_fh(fh); - - return vb2_qbuf(&vfh->queue, video->video.v4l2_dev->mdev, b); -} - -static int -iss_video_expbuf(struct file *file, void *fh, struct v4l2_exportbuffer *e) -{ - struct iss_video_fh *vfh = to_iss_video_fh(fh); - - return vb2_expbuf(&vfh->queue, e); -} - -static int -iss_video_dqbuf(struct file *file, void *fh, struct v4l2_buffer *b) -{ - struct iss_video_fh *vfh = to_iss_video_fh(fh); - - return vb2_dqbuf(&vfh->queue, b, file->f_flags & O_NONBLOCK); -} - -/* - * Stream management - * - * Every ISS pipeline has a single input and a single output. The input can be - * either a sensor or a video node. The output is always a video node. - * - * As every pipeline has an output video node, the ISS video objects at the - * pipeline output stores the pipeline state. It tracks the streaming state of - * both the input and output, as well as the availability of buffers. - * - * In sensor-to-memory mode, frames are always available at the pipeline input. - * Starting the sensor usually requires I2C transfers and must be done in - * interruptible context. The pipeline is started and stopped synchronously - * to the stream on/off commands. All modules in the pipeline will get their - * subdev set stream handler called. The module at the end of the pipeline must - * delay starting the hardware until buffers are available at its output. - * - * In memory-to-memory mode, starting/stopping the stream requires - * synchronization between the input and output. ISS modules can't be stopped - * in the middle of a frame, and at least some of the modules seem to become - * busy as soon as they're started, even if they don't receive a frame start - * event. For that reason frames need to be processed in single-shot mode. The - * driver needs to wait until a frame is completely processed and written to - * memory before restarting the pipeline for the next frame. Pipelined - * processing might be possible but requires more testing. - * - * Stream start must be delayed until buffers are available at both the input - * and output. The pipeline must be started in the vb2 queue callback with - * the buffers queue spinlock held. The modules subdev set stream operation must - * not sleep. - */ -static int -iss_video_streamon(struct file *file, void *fh, enum v4l2_buf_type type) -{ - struct iss_video_fh *vfh = to_iss_video_fh(fh); - struct iss_video *video = video_drvdata(file); - struct media_device *mdev = video->video.entity.graph_obj.mdev; - struct media_pipeline_pad_iter iter; - enum iss_pipeline_state state; - struct iss_pipeline *pipe; - struct iss_video *far_end; - struct media_pad *pad; - unsigned long flags; - int ret; - - if (type != video->type) - return -EINVAL; - - mutex_lock(&video->stream_lock); - - /* - * Start streaming on the pipeline. No link touching an entity in the - * pipeline can be activated or deactivated once streaming is started. - */ - pipe = to_iss_pipeline(&video->video.entity) ? : &video->pipe; - pipe->external = NULL; - pipe->external_rate = 0; - pipe->external_bpp = 0; - - ret = media_entity_enum_init(&pipe->ent_enum, mdev); - if (ret) - goto err_entity_enum_init; - - if (video->iss->pdata->set_constraints) - video->iss->pdata->set_constraints(video->iss, true); - - ret = video_device_pipeline_start(&video->video, &pipe->pipe); - if (ret < 0) - goto err_media_pipeline_start; - - media_pipeline_for_each_pad(&pipe->pipe, &iter, pad) - media_entity_enum_set(&pipe->ent_enum, pad->entity); - - /* - * Verify that the currently configured format matches the output of - * the connected subdev. - */ - ret = iss_video_check_format(video, vfh); - if (ret < 0) - goto err_iss_video_check_format; - - video->bpl_padding = ret; - video->bpl_value = vfh->format.fmt.pix.bytesperline; - - /* - * Find the ISS video node connected at the far end of the pipeline and - * update the pipeline. - */ - far_end = iss_video_far_end(video, pipe); - if (IS_ERR(far_end)) { - ret = PTR_ERR(far_end); - goto err_iss_video_check_format; - } - - if (video->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) { - state = ISS_PIPELINE_STREAM_OUTPUT | ISS_PIPELINE_IDLE_OUTPUT; - pipe->input = far_end; - pipe->output = video; - } else { - if (!far_end) { - ret = -EPIPE; - goto err_iss_video_check_format; - } - - state = ISS_PIPELINE_STREAM_INPUT | ISS_PIPELINE_IDLE_INPUT; - pipe->input = video; - pipe->output = far_end; - } - - spin_lock_irqsave(&pipe->lock, flags); - pipe->state &= ~ISS_PIPELINE_STREAM; - pipe->state |= state; - spin_unlock_irqrestore(&pipe->lock, flags); - - /* - * Set the maximum time per frame as the value requested by userspace. - * This is a soft limit that can be overridden if the hardware doesn't - * support the request limit. - */ - if (video->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) - pipe->max_timeperframe = vfh->timeperframe; - - video->queue = &vfh->queue; - INIT_LIST_HEAD(&video->dmaqueue); - video->error = false; - atomic_set(&pipe->frame_number, -1); - - ret = vb2_streamon(&vfh->queue, type); - if (ret < 0) - goto err_iss_video_check_format; - - /* - * In sensor-to-memory mode, the stream can be started synchronously - * to the stream on command. In memory-to-memory mode, it will be - * started when buffers are queued on both the input and output. - */ - if (!pipe->input) { - unsigned long flags; - - ret = omap4iss_pipeline_set_stream(pipe, - ISS_PIPELINE_STREAM_CONTINUOUS); - if (ret < 0) - goto err_omap4iss_set_stream; - spin_lock_irqsave(&video->qlock, flags); - if (list_empty(&video->dmaqueue)) - video->dmaqueue_flags |= ISS_VIDEO_DMAQUEUE_UNDERRUN; - spin_unlock_irqrestore(&video->qlock, flags); - } - - mutex_unlock(&video->stream_lock); - - return 0; - -err_omap4iss_set_stream: - vb2_streamoff(&vfh->queue, type); -err_iss_video_check_format: - video_device_pipeline_stop(&video->video); -err_media_pipeline_start: - if (video->iss->pdata->set_constraints) - video->iss->pdata->set_constraints(video->iss, false); - video->queue = NULL; - -err_entity_enum_init: - media_entity_enum_cleanup(&pipe->ent_enum); - - mutex_unlock(&video->stream_lock); - - return ret; -} - -static int -iss_video_streamoff(struct file *file, void *fh, enum v4l2_buf_type type) -{ - struct iss_video_fh *vfh = to_iss_video_fh(fh); - struct iss_video *video = video_drvdata(file); - struct iss_pipeline *pipe = to_iss_pipeline(&video->video.entity); - enum iss_pipeline_state state; - unsigned long flags; - - if (type != video->type) - return -EINVAL; - - mutex_lock(&video->stream_lock); - - if (!vb2_is_streaming(&vfh->queue)) - goto done; - - /* Update the pipeline state. */ - if (video->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) - state = ISS_PIPELINE_STREAM_OUTPUT - | ISS_PIPELINE_QUEUE_OUTPUT; - else - state = ISS_PIPELINE_STREAM_INPUT - | ISS_PIPELINE_QUEUE_INPUT; - - spin_lock_irqsave(&pipe->lock, flags); - pipe->state &= ~state; - spin_unlock_irqrestore(&pipe->lock, flags); - - /* Stop the stream. */ - omap4iss_pipeline_set_stream(pipe, ISS_PIPELINE_STREAM_STOPPED); - vb2_streamoff(&vfh->queue, type); - video->queue = NULL; - - media_entity_enum_cleanup(&pipe->ent_enum); - - if (video->iss->pdata->set_constraints) - video->iss->pdata->set_constraints(video->iss, false); - video_device_pipeline_stop(&video->video); - -done: - mutex_unlock(&video->stream_lock); - return 0; -} - -static int -iss_video_enum_input(struct file *file, void *fh, struct v4l2_input *input) -{ - if (input->index > 0) - return -EINVAL; - - strscpy(input->name, "camera", sizeof(input->name)); - input->type = V4L2_INPUT_TYPE_CAMERA; - - return 0; -} - -static int -iss_video_g_input(struct file *file, void *fh, unsigned int *input) -{ - *input = 0; - - return 0; -} - -static int -iss_video_s_input(struct file *file, void *fh, unsigned int input) -{ - return input == 0 ? 0 : -EINVAL; -} - -static const struct v4l2_ioctl_ops iss_video_ioctl_ops = { - .vidioc_querycap = iss_video_querycap, - .vidioc_enum_fmt_vid_cap = iss_video_enum_format, - .vidioc_g_fmt_vid_cap = iss_video_get_format, - .vidioc_s_fmt_vid_cap = iss_video_set_format, - .vidioc_try_fmt_vid_cap = iss_video_try_format, - .vidioc_g_fmt_vid_out = iss_video_get_format, - .vidioc_s_fmt_vid_out = iss_video_set_format, - .vidioc_try_fmt_vid_out = iss_video_try_format, - .vidioc_g_selection = iss_video_get_selection, - .vidioc_s_selection = iss_video_set_selection, - .vidioc_g_parm = iss_video_get_param, - .vidioc_s_parm = iss_video_set_param, - .vidioc_reqbufs = iss_video_reqbufs, - .vidioc_querybuf = iss_video_querybuf, - .vidioc_qbuf = iss_video_qbuf, - .vidioc_expbuf = iss_video_expbuf, - .vidioc_dqbuf = iss_video_dqbuf, - .vidioc_streamon = iss_video_streamon, - .vidioc_streamoff = iss_video_streamoff, - .vidioc_enum_input = iss_video_enum_input, - .vidioc_g_input = iss_video_g_input, - .vidioc_s_input = iss_video_s_input, -}; - -/* ----------------------------------------------------------------------------- - * V4L2 file operations - */ - -static int iss_video_open(struct file *file) -{ - struct iss_video *video = video_drvdata(file); - struct iss_video_fh *handle; - struct vb2_queue *q; - int ret = 0; - - handle = kzalloc(sizeof(*handle), GFP_KERNEL); - if (!handle) - return -ENOMEM; - - v4l2_fh_init(&handle->vfh, &video->video); - v4l2_fh_add(&handle->vfh); - - /* If this is the first user, initialise the pipeline. */ - if (!omap4iss_get(video->iss)) { - ret = -EBUSY; - goto done; - } - - ret = v4l2_pipeline_pm_get(&video->video.entity); - if (ret < 0) { - omap4iss_put(video->iss); - goto done; - } - - q = &handle->queue; - - q->type = video->type; - q->io_modes = VB2_MMAP | VB2_DMABUF; - q->drv_priv = handle; - q->ops = &iss_video_vb2ops; - q->mem_ops = &vb2_dma_contig_memops; - q->buf_struct_size = sizeof(struct iss_buffer); - q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC; - q->dev = video->iss->dev; - - ret = vb2_queue_init(q); - if (ret) { - omap4iss_put(video->iss); - goto done; - } - - memset(&handle->format, 0, sizeof(handle->format)); - handle->format.type = video->type; - handle->timeperframe.denominator = 1; - - handle->video = video; - file->private_data = &handle->vfh; - -done: - if (ret < 0) { - v4l2_fh_del(&handle->vfh); - v4l2_fh_exit(&handle->vfh); - kfree(handle); - } - - return ret; -} - -static int iss_video_release(struct file *file) -{ - struct iss_video *video = video_drvdata(file); - struct v4l2_fh *vfh = file->private_data; - struct iss_video_fh *handle = to_iss_video_fh(vfh); - - /* Disable streaming and free the buffers queue resources. */ - iss_video_streamoff(file, vfh, video->type); - - v4l2_pipeline_pm_put(&video->video.entity); - - /* Release the videobuf2 queue */ - vb2_queue_release(&handle->queue); - - v4l2_fh_del(vfh); - v4l2_fh_exit(vfh); - kfree(handle); - file->private_data = NULL; - - omap4iss_put(video->iss); - - return 0; -} - -static __poll_t iss_video_poll(struct file *file, poll_table *wait) -{ - struct iss_video_fh *vfh = to_iss_video_fh(file->private_data); - - return vb2_poll(&vfh->queue, file, wait); -} - -static int iss_video_mmap(struct file *file, struct vm_area_struct *vma) -{ - struct iss_video_fh *vfh = to_iss_video_fh(file->private_data); - - return vb2_mmap(&vfh->queue, vma); -} - -static const struct v4l2_file_operations iss_video_fops = { - .owner = THIS_MODULE, - .unlocked_ioctl = video_ioctl2, - .open = iss_video_open, - .release = iss_video_release, - .poll = iss_video_poll, - .mmap = iss_video_mmap, -}; - -/* ----------------------------------------------------------------------------- - * ISS video core - */ - -static const struct iss_video_operations iss_video_dummy_ops = { -}; - -int omap4iss_video_init(struct iss_video *video, const char *name) -{ - const char *direction; - int ret; - - switch (video->type) { - case V4L2_BUF_TYPE_VIDEO_CAPTURE: - direction = "output"; - video->pad.flags = MEDIA_PAD_FL_SINK; - break; - case V4L2_BUF_TYPE_VIDEO_OUTPUT: - direction = "input"; - video->pad.flags = MEDIA_PAD_FL_SOURCE; - break; - - default: - return -EINVAL; - } - - ret = media_entity_pads_init(&video->video.entity, 1, &video->pad); - if (ret < 0) - return ret; - - spin_lock_init(&video->qlock); - mutex_init(&video->mutex); - atomic_set(&video->active, 0); - - spin_lock_init(&video->pipe.lock); - mutex_init(&video->stream_lock); - - /* Initialize the video device. */ - if (!video->ops) - video->ops = &iss_video_dummy_ops; - - video->video.fops = &iss_video_fops; - snprintf(video->video.name, sizeof(video->video.name), - "OMAP4 ISS %s %s", name, direction); - video->video.vfl_type = VFL_TYPE_VIDEO; - video->video.release = video_device_release_empty; - video->video.ioctl_ops = &iss_video_ioctl_ops; - video->pipe.stream_state = ISS_PIPELINE_STREAM_STOPPED; - - video_set_drvdata(&video->video, video); - - return 0; -} - -void omap4iss_video_cleanup(struct iss_video *video) -{ - media_entity_cleanup(&video->video.entity); - mutex_destroy(&video->stream_lock); - mutex_destroy(&video->mutex); -} - -int omap4iss_video_register(struct iss_video *video, struct v4l2_device *vdev) -{ - int ret; - - video->video.v4l2_dev = vdev; - if (video->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) - video->video.device_caps = V4L2_CAP_VIDEO_CAPTURE; - else - video->video.device_caps = V4L2_CAP_VIDEO_OUTPUT; - video->video.device_caps |= V4L2_CAP_STREAMING; - - ret = video_register_device(&video->video, VFL_TYPE_VIDEO, -1); - if (ret < 0) - dev_err(video->iss->dev, - "could not register video device (%d)\n", ret); - - return ret; -} - -void omap4iss_video_unregister(struct iss_video *video) -{ - video_unregister_device(&video->video); -} diff --git a/drivers/staging/media/omap4iss/iss_video.h b/drivers/staging/media/omap4iss/iss_video.h deleted file mode 100644 index 19668d28b682..000000000000 --- a/drivers/staging/media/omap4iss/iss_video.h +++ /dev/null @@ -1,203 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0+ */ -/* - * TI OMAP4 ISS V4L2 Driver - Generic video node - * - * Copyright (C) 2012 Texas Instruments, Inc. - * - * Author: Sergio Aguirre - */ - -#ifndef OMAP4_ISS_VIDEO_H -#define OMAP4_ISS_VIDEO_H - -#include -#include -#include -#include -#include -#include - -#define ISS_VIDEO_DRIVER_NAME "issvideo" - -struct iss_device; -struct iss_video; -struct v4l2_mbus_framefmt; -struct v4l2_pix_format; - -/* - * struct iss_format_info - ISS media bus format information - * @code: V4L2 media bus format code - * @truncated: V4L2 media bus format code for the same format truncated to 10 - * bits. Identical to @code if the format is 10 bits wide or less. - * @uncompressed: V4L2 media bus format code for the corresponding uncompressed - * format. Identical to @code if the format is not DPCM compressed. - * @flavor: V4L2 media bus format code for the same pixel layout but - * shifted to be 8 bits per pixel. =0 if format is not shiftable. - * @pixelformat: V4L2 pixel format FCC identifier - * @bpp: Bits per pixel - */ -struct iss_format_info { - u32 code; - u32 truncated; - u32 uncompressed; - u32 flavor; - u32 pixelformat; - unsigned int bpp; -}; - -enum iss_pipeline_stream_state { - ISS_PIPELINE_STREAM_STOPPED = 0, - ISS_PIPELINE_STREAM_CONTINUOUS = 1, - ISS_PIPELINE_STREAM_SINGLESHOT = 2, -}; - -enum iss_pipeline_state { - /* The stream has been started on the input video node. */ - ISS_PIPELINE_STREAM_INPUT = BIT(0), - /* The stream has been started on the output video node. */ - ISS_PIPELINE_STREAM_OUTPUT = BIT(1), - /* At least one buffer is queued on the input video node. */ - ISS_PIPELINE_QUEUE_INPUT = BIT(2), - /* At least one buffer is queued on the output video node. */ - ISS_PIPELINE_QUEUE_OUTPUT = BIT(3), - /* The input entity is idle, ready to be started. */ - ISS_PIPELINE_IDLE_INPUT = BIT(4), - /* The output entity is idle, ready to be started. */ - ISS_PIPELINE_IDLE_OUTPUT = BIT(5), - /* The pipeline is currently streaming. */ - ISS_PIPELINE_STREAM = BIT(6), -}; - -/* - * struct iss_pipeline - An OMAP4 ISS hardware pipeline - * @ent_enum: Entities in the pipeline - * @error: A hardware error occurred during capture - */ -struct iss_pipeline { - struct media_pipeline pipe; - spinlock_t lock; /* Pipeline state and queue flags */ - unsigned int state; - enum iss_pipeline_stream_state stream_state; - struct iss_video *input; - struct iss_video *output; - struct media_entity_enum ent_enum; - atomic_t frame_number; - bool do_propagation; /* of frame number */ - bool error; - struct v4l2_fract max_timeperframe; - struct v4l2_subdev *external; - unsigned int external_rate; - int external_bpp; -}; - -static inline struct iss_pipeline *to_iss_pipeline(struct media_entity *entity) -{ - struct media_pipeline *pipe = media_entity_pipeline(entity); - - if (!pipe) - return NULL; - - return container_of(pipe, struct iss_pipeline, pipe); -} - -static inline int iss_pipeline_ready(struct iss_pipeline *pipe) -{ - return pipe->state == (ISS_PIPELINE_STREAM_INPUT | - ISS_PIPELINE_STREAM_OUTPUT | - ISS_PIPELINE_QUEUE_INPUT | - ISS_PIPELINE_QUEUE_OUTPUT | - ISS_PIPELINE_IDLE_INPUT | - ISS_PIPELINE_IDLE_OUTPUT); -} - -/* - * struct iss_buffer - ISS buffer - * @buffer: ISS video buffer - * @iss_addr: Physical address of the buffer. - */ -struct iss_buffer { - /* common v4l buffer stuff -- must be first */ - struct vb2_v4l2_buffer vb; - struct list_head list; - dma_addr_t iss_addr; -}; - -#define to_iss_buffer(buf) container_of(buf, struct iss_buffer, vb) - -enum iss_video_dmaqueue_flags { - /* Set if DMA queue becomes empty when ISS_PIPELINE_STREAM_CONTINUOUS */ - ISS_VIDEO_DMAQUEUE_UNDERRUN = BIT(0), - /* Set when queuing buffer to an empty DMA queue */ - ISS_VIDEO_DMAQUEUE_QUEUED = BIT(1), -}; - -#define iss_video_dmaqueue_flags_clr(video) \ - ({ (video)->dmaqueue_flags = 0; }) - -/* - * struct iss_video_operations - ISS video operations - * @queue: Resume streaming when a buffer is queued. Called on VIDIOC_QBUF - * if there was no buffer previously queued. - */ -struct iss_video_operations { - int (*queue)(struct iss_video *video, struct iss_buffer *buffer); -}; - -struct iss_video { - struct video_device video; - enum v4l2_buf_type type; - struct media_pad pad; - - struct mutex mutex; /* format and crop settings */ - atomic_t active; - - struct iss_device *iss; - - unsigned int capture_mem; - unsigned int bpl_alignment; /* alignment value */ - unsigned int bpl_zero_padding; /* whether the alignment is optional */ - unsigned int bpl_max; /* maximum bytes per line value */ - unsigned int bpl_value; /* bytes per line value */ - unsigned int bpl_padding; /* padding at end of line */ - - /* Pipeline state */ - struct iss_pipeline pipe; - struct mutex stream_lock; /* pipeline and stream states */ - bool error; - - /* Video buffers queue */ - struct vb2_queue *queue; - spinlock_t qlock; /* protects dmaqueue and error */ - struct list_head dmaqueue; - enum iss_video_dmaqueue_flags dmaqueue_flags; - - const struct iss_video_operations *ops; -}; - -#define to_iss_video(vdev) container_of(vdev, struct iss_video, video) - -struct iss_video_fh { - struct v4l2_fh vfh; - struct iss_video *video; - struct vb2_queue queue; - struct v4l2_format format; - struct v4l2_fract timeperframe; -}; - -#define to_iss_video_fh(fh) container_of(fh, struct iss_video_fh, vfh) -#define iss_video_queue_to_iss_video_fh(q) \ - container_of(q, struct iss_video_fh, queue) - -int omap4iss_video_init(struct iss_video *video, const char *name); -void omap4iss_video_cleanup(struct iss_video *video); -int omap4iss_video_register(struct iss_video *video, - struct v4l2_device *vdev); -void omap4iss_video_unregister(struct iss_video *video); -struct iss_buffer *omap4iss_video_buffer_next(struct iss_video *video); -void omap4iss_video_cancel_stream(struct iss_video *video); -struct media_pad *omap4iss_video_remote_pad(struct iss_video *video); - -const struct iss_format_info * -omap4iss_video_format_info(u32 code); - -#endif /* OMAP4_ISS_VIDEO_H */ diff --git a/include/linux/platform_data/media/omap4iss.h b/include/linux/platform_data/media/omap4iss.h deleted file mode 100644 index 2a511a8fcda7..000000000000 --- a/include/linux/platform_data/media/omap4iss.h +++ /dev/null @@ -1,66 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef ARCH_ARM_PLAT_OMAP4_ISS_H -#define ARCH_ARM_PLAT_OMAP4_ISS_H - -#include - -struct iss_device; - -enum iss_interface_type { - ISS_INTERFACE_CSI2A_PHY1, - ISS_INTERFACE_CSI2B_PHY2, -}; - -/** - * struct iss_csiphy_lane: CSI2 lane position and polarity - * @pos: position of the lane - * @pol: polarity of the lane - */ -struct iss_csiphy_lane { - u8 pos; - u8 pol; -}; - -#define ISS_CSIPHY1_NUM_DATA_LANES 4 -#define ISS_CSIPHY2_NUM_DATA_LANES 1 - -/** - * struct iss_csiphy_lanes_cfg - CSI2 lane configuration - * @data: Configuration of one or two data lanes - * @clk: Clock lane configuration - */ -struct iss_csiphy_lanes_cfg { - struct iss_csiphy_lane data[ISS_CSIPHY1_NUM_DATA_LANES]; - struct iss_csiphy_lane clk; -}; - -/** - * struct iss_csi2_platform_data - CSI2 interface platform data - * @crc: Enable the cyclic redundancy check - * @vpclk_div: Video port output clock control - */ -struct iss_csi2_platform_data { - unsigned crc:1; - unsigned vpclk_div:2; - struct iss_csiphy_lanes_cfg lanecfg; -}; - -struct iss_subdev_i2c_board_info { - struct i2c_board_info *board_info; - int i2c_adapter_id; -}; - -struct iss_v4l2_subdevs_group { - struct iss_subdev_i2c_board_info *subdevs; - enum iss_interface_type interface; - union { - struct iss_csi2_platform_data csi2; - } bus; /* gcc < 4.6.0 chokes on anonymous union initializers */ -}; - -struct iss_platform_data { - struct iss_v4l2_subdevs_group *subdevs; - void (*set_constraints)(struct iss_device *iss, bool enable); -}; - -#endif -- cgit v1.2.3 From ec763c234d7f60c5bce0fa2611ba79f5be1af76b Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 4 Oct 2024 15:10:28 -0700 Subject: Revert "rtnetlink: add guard for RTNL" This reverts commit 464eb03c4a7cfb32cb3324249193cf6bb5b35152. Once we have a per-netns RTNL, we won't use guard(rtnl). Also, there's no users for now. $ grep -rnI "guard(rtnl" || true $ Suggested-by: Eric Dumazet Link: https://lore.kernel.org/netdev/CANn89i+KoYzUH+VPLdGmLABYf5y4TW0hrM4UAeQQJ9AREty0iw@mail.gmail.com/ Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/linux/rtnetlink.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index a7da7dfc06a2..cdfc897f1e3c 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -7,7 +7,6 @@ #include #include #include -#include #include extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo); @@ -47,8 +46,6 @@ extern int rtnl_is_locked(void); extern int rtnl_lock_killable(void); extern bool refcount_dec_and_rtnl_lock(refcount_t *r); -DEFINE_LOCK_GUARD_0(rtnl, rtnl_lock(), rtnl_unlock()) - extern wait_queue_head_t netdev_unregistering_wq; extern atomic_t dev_unreg_count; extern struct rw_semaphore pernet_ops_rwsem; -- cgit v1.2.3 From 76aed95319da25d6884dff01d5f0149e4b542f96 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 4 Oct 2024 15:10:29 -0700 Subject: rtnetlink: Add per-netns RTNL. The goal is to break RTNL down into per-netns mutex. This patch adds per-netns mutex and its helper functions, rtnl_net_lock() and rtnl_net_unlock(). rtnl_net_lock() acquires the global RTNL and per-netns RTNL mutex, and rtnl_net_unlock() releases them. We will replace 800+ rtnl_lock() with rtnl_net_lock() and finally removes rtnl_lock() in rtnl_net_lock(). When we need to nest per-netns RTNL mutex, we will use __rtnl_net_lock(), and its locking order is defined by rtnl_net_lock_cmp_fn() as follows: 1. init_net is first 2. netns address ascending order Note that the conversion will be done under CONFIG_DEBUG_NET_SMALL_RTNL with LOCKDEP so that we can carefully add the extra mutex without slowing down RTNL operations during conversion. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/linux/rtnetlink.h | 21 ++++++++++++++++ include/net/net_namespace.h | 4 ++++ net/Kconfig.debug | 15 ++++++++++++ net/core/net_namespace.c | 6 +++++ net/core/rtnetlink.c | 58 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 104 insertions(+) (limited to 'include') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index cdfc897f1e3c..edd840a49989 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -92,6 +92,27 @@ static inline bool lockdep_rtnl_is_held(void) #define rcu_replace_pointer_rtnl(rp, p) \ rcu_replace_pointer(rp, p, lockdep_rtnl_is_held()) +#ifdef CONFIG_DEBUG_NET_SMALL_RTNL +void __rtnl_net_lock(struct net *net); +void __rtnl_net_unlock(struct net *net); +void rtnl_net_lock(struct net *net); +void rtnl_net_unlock(struct net *net); +int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b); +#else +static inline void __rtnl_net_lock(struct net *net) {} +static inline void __rtnl_net_unlock(struct net *net) {} + +static inline void rtnl_net_lock(struct net *net) +{ + rtnl_lock(); +} + +static inline void rtnl_net_unlock(struct net *net) +{ + rtnl_unlock(); +} +#endif + static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev) { return rtnl_dereference(dev->ingress_queue); diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index e67b483cc8bb..873c0f9fdac6 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -188,6 +188,10 @@ struct net { #if IS_ENABLED(CONFIG_SMC) struct netns_smc smc; #endif +#ifdef CONFIG_DEBUG_NET_SMALL_RTNL + /* Move to a better place when the config guard is removed. */ + struct mutex rtnl_mutex; +#endif } __randomize_layout; #include diff --git a/net/Kconfig.debug b/net/Kconfig.debug index 5e3fffe707dd..277fab8c4d77 100644 --- a/net/Kconfig.debug +++ b/net/Kconfig.debug @@ -24,3 +24,18 @@ config DEBUG_NET help Enable extra sanity checks in networking. This is mostly used by fuzzers, but is safe to select. + +config DEBUG_NET_SMALL_RTNL + bool "Add extra per-netns mutex inside RTNL" + depends on DEBUG_KERNEL && NET && LOCK_DEBUGGING_SUPPORT + select PROVE_LOCKING + default n + help + rtnl_lock() is being replaced with rtnl_net_lock() that + acquires the global RTNL and a small per-netns RTNL mutex. + + During the conversion, rtnl_net_lock() just adds an extra + mutex in every RTNL scope and slows down the operations. + + Once the conversion completes, rtnl_lock() will be removed + and rtnetlink will gain per-netns scalability. diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index e39479f1c9a4..105e3cd26763 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -334,6 +334,12 @@ static __net_init void preinit_net(struct net *net, struct user_namespace *user_ idr_init(&net->netns_ids); spin_lock_init(&net->nsid_lock); mutex_init(&net->ipv4.ra_mutex); + +#ifdef CONFIG_DEBUG_NET_SMALL_RTNL + mutex_init(&net->rtnl_mutex); + lock_set_cmp_fn(&net->rtnl_mutex, rtnl_net_lock_cmp_fn, NULL); +#endif + preinit_net_sysctl(net); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 682d8d3127db..46567fa54e42 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -179,6 +179,64 @@ bool lockdep_rtnl_is_held(void) EXPORT_SYMBOL(lockdep_rtnl_is_held); #endif /* #ifdef CONFIG_PROVE_LOCKING */ +#ifdef CONFIG_DEBUG_NET_SMALL_RTNL +void __rtnl_net_lock(struct net *net) +{ + ASSERT_RTNL(); + + mutex_lock(&net->rtnl_mutex); +} +EXPORT_SYMBOL(__rtnl_net_lock); + +void __rtnl_net_unlock(struct net *net) +{ + ASSERT_RTNL(); + + mutex_unlock(&net->rtnl_mutex); +} +EXPORT_SYMBOL(__rtnl_net_unlock); + +void rtnl_net_lock(struct net *net) +{ + rtnl_lock(); + __rtnl_net_lock(net); +} +EXPORT_SYMBOL(rtnl_net_lock); + +void rtnl_net_unlock(struct net *net) +{ + __rtnl_net_unlock(net); + rtnl_unlock(); +} +EXPORT_SYMBOL(rtnl_net_unlock); + +static int rtnl_net_cmp_locks(const struct net *net_a, const struct net *net_b) +{ + if (net_eq(net_a, net_b)) + return 0; + + /* always init_net first */ + if (net_eq(net_a, &init_net)) + return -1; + + if (net_eq(net_b, &init_net)) + return 1; + + /* otherwise lock in ascending order */ + return net_a < net_b ? -1 : 1; +} + +int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b) +{ + const struct net *net_a, *net_b; + + net_a = container_of(a, struct net, rtnl_mutex.dep_map); + net_b = container_of(b, struct net, rtnl_mutex.dep_map); + + return rtnl_net_cmp_locks(net_a, net_b); +} +#endif + static struct rtnl_link __rcu *__rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1]; static inline int rtm_msgindex(int msgtype) -- cgit v1.2.3 From 844e5e7e656d3a7a904fd5607f8491d6fd01db8e Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 4 Oct 2024 15:10:30 -0700 Subject: rtnetlink: Add assertion helpers for per-netns RTNL. Once an RTNL scope is converted with rtnl_net_lock(), we will replace RTNL helper functions inside the scope with the following per-netns alternatives: ASSERT_RTNL() -> ASSERT_RTNL_NET(net) rcu_dereference_rtnl(p) -> rcu_dereference_rtnl_net(net, p) Note that the per-netns helpers are equivalent to the conventional helpers unless CONFIG_DEBUG_NET_SMALL_RTNL is enabled. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/linux/rtnetlink.h | 45 +++++++++++++++++++++++++++++++++++++++++---- net/core/rtnetlink.c | 12 ++++++++++++ 2 files changed, 53 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index edd840a49989..8468a4ce8510 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -51,6 +51,10 @@ extern atomic_t dev_unreg_count; extern struct rw_semaphore pernet_ops_rwsem; extern struct rw_semaphore net_rwsem; +#define ASSERT_RTNL() \ + WARN_ONCE(!rtnl_is_locked(), \ + "RTNL: assertion failed at %s (%d)\n", __FILE__, __LINE__) + #ifdef CONFIG_PROVE_LOCKING extern bool lockdep_rtnl_is_held(void); #else @@ -98,6 +102,22 @@ void __rtnl_net_unlock(struct net *net); void rtnl_net_lock(struct net *net); void rtnl_net_unlock(struct net *net); int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b); + +bool rtnl_net_is_locked(struct net *net); + +#define ASSERT_RTNL_NET(net) \ + WARN_ONCE(!rtnl_net_is_locked(net), \ + "RTNL_NET: assertion failed at %s (%d)\n", \ + __FILE__, __LINE__) + +bool lockdep_rtnl_net_is_held(struct net *net); + +#define rcu_dereference_rtnl_net(net, p) \ + rcu_dereference_check(p, lockdep_rtnl_net_is_held(net)) +#define rtnl_net_dereference(net, p) \ + rcu_dereference_protected(p, lockdep_rtnl_net_is_held(net)) +#define rcu_replace_pointer_rtnl_net(net, rp, p) \ + rcu_replace_pointer(rp, p, lockdep_rtnl_net_is_held(net)) #else static inline void __rtnl_net_lock(struct net *net) {} static inline void __rtnl_net_unlock(struct net *net) {} @@ -111,6 +131,27 @@ static inline void rtnl_net_unlock(struct net *net) { rtnl_unlock(); } + +static inline void ASSERT_RTNL_NET(struct net *net) +{ + ASSERT_RTNL(); +} + +static inline void *rcu_dereference_rtnl_net(struct net *net, void *p) +{ + return rcu_dereference_rtnl(p); +} + +static inline void *rtnl_net_dereference(struct net *net, void *p) +{ + return rtnl_dereference(p); +} + +static inline void *rcu_replace_pointer_rtnl_net(struct net *net, + void *rp, void *p) +{ + return rcu_replace_pointer_rtnl(rp, p); +} #endif static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev) @@ -140,10 +181,6 @@ void rtnetlink_init(void); void __rtnl_unlock(void); void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail); -#define ASSERT_RTNL() \ - WARN_ONCE(!rtnl_is_locked(), \ - "RTNL: assertion failed at %s (%d)\n", __FILE__, __LINE__) - extern int ndo_dflt_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 46567fa54e42..6d68247aea70 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -235,6 +235,18 @@ int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map * return rtnl_net_cmp_locks(net_a, net_b); } + +bool rtnl_net_is_locked(struct net *net) +{ + return rtnl_is_locked() && mutex_is_locked(&net->rtnl_mutex); +} +EXPORT_SYMBOL(rtnl_net_is_locked); + +bool lockdep_rtnl_net_is_held(struct net *net) +{ + return lockdep_rtnl_is_held() && lockdep_is_held(&net->rtnl_mutex); +} +EXPORT_SYMBOL(lockdep_rtnl_net_is_held); #endif static struct rtnl_link __rcu *__rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1]; -- cgit v1.2.3 From a8f2cdd27d114ed6c3354a0e39502e6d56215804 Mon Sep 17 00:00:00 2001 From: Dmitry Perchanov Date: Mon, 26 Aug 2024 16:04:23 +0300 Subject: media: v4l: Add luma 16-bit interlaced pixel format The formats added by this patch are: V4L2_PIX_FMT_Y16I Interlaced lumina format primary use in RealSense Depth cameras with stereo stream for left and right image sensors. Signed-off-by: Dmitry Perchanov Reviewed-by: Laurent Pinchart Link: https://lore.kernel.org/r/568efbd75290e286b8ad9e7347b5f43745121020.camel@intel.com Signed-off-by: Laurent Pinchart Signed-off-by: Hans Verkuil --- .../userspace-api/media/v4l/pixfmt-y16i.rst | 73 ++++++++++++++++++++++ .../userspace-api/media/v4l/yuv-formats.rst | 1 + drivers/media/v4l2-core/v4l2-ioctl.c | 1 + include/uapi/linux/videodev2.h | 1 + 4 files changed, 76 insertions(+) create mode 100644 Documentation/userspace-api/media/v4l/pixfmt-y16i.rst (limited to 'include') diff --git a/Documentation/userspace-api/media/v4l/pixfmt-y16i.rst b/Documentation/userspace-api/media/v4l/pixfmt-y16i.rst new file mode 100644 index 000000000000..74ba9e910a38 --- /dev/null +++ b/Documentation/userspace-api/media/v4l/pixfmt-y16i.rst @@ -0,0 +1,73 @@ +.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later + +.. _V4L2-PIX-FMT-Y16I: + +************************** +V4L2_PIX_FMT_Y16I ('Y16I') +************************** + +Interleaved grey-scale image, e.g. from a stereo-pair + + +Description +=========== + +This is a grey-scale image with a depth of 16 bits per pixel, but with pixels +from 2 sources interleaved and unpacked. Each pixel is stored in a 16-bit word +in the little-endian order. The first pixel is from the left source. + +**Pixel unpacked representation.** +Left/Right pixels 16-bit unpacked - 16-bit for each interleaved pixel. + +.. flat-table:: + :header-rows: 0 + :stub-columns: 0 + + * - Y'\ :sub:`0L[7:0]` + - Y'\ :sub:`0L[15:8]` + - Y'\ :sub:`0R[7:0]` + - Y'\ :sub:`0R[15:8]` + +**Byte Order.** +Each cell is one byte. + +.. flat-table:: + :header-rows: 0 + :stub-columns: 0 + + * - start + 0: + - Y'\ :sub:`00Llow` + - Y'\ :sub:`00Lhigh` + - Y'\ :sub:`00Rlow` + - Y'\ :sub:`00Rhigh` + - Y'\ :sub:`01Llow` + - Y'\ :sub:`01Lhigh` + - Y'\ :sub:`01Rlow` + - Y'\ :sub:`01Rhigh` + * - start + 8: + - Y'\ :sub:`10Llow` + - Y'\ :sub:`10Lhigh` + - Y'\ :sub:`10Rlow` + - Y'\ :sub:`10Rhigh` + - Y'\ :sub:`11Llow` + - Y'\ :sub:`11Lhigh` + - Y'\ :sub:`11Rlow` + - Y'\ :sub:`11Rhigh` + * - start + 16: + - Y'\ :sub:`20Llow` + - Y'\ :sub:`20Lhigh` + - Y'\ :sub:`20Rlow` + - Y'\ :sub:`20Rhigh` + - Y'\ :sub:`21Llow` + - Y'\ :sub:`21Lhigh` + - Y'\ :sub:`21Rlow` + - Y'\ :sub:`21Rhigh` + * - start + 24: + - Y'\ :sub:`30Llow` + - Y'\ :sub:`30Lhigh` + - Y'\ :sub:`30Rlow` + - Y'\ :sub:`30Rhigh` + - Y'\ :sub:`31Llow` + - Y'\ :sub:`31Lhigh` + - Y'\ :sub:`31Rlow` + - Y'\ :sub:`31Rhigh` diff --git a/Documentation/userspace-api/media/v4l/yuv-formats.rst b/Documentation/userspace-api/media/v4l/yuv-formats.rst index 24b34cdfa6fe..78ee406d7647 100644 --- a/Documentation/userspace-api/media/v4l/yuv-formats.rst +++ b/Documentation/userspace-api/media/v4l/yuv-formats.rst @@ -269,5 +269,6 @@ image. pixfmt-yuv-luma pixfmt-y8i pixfmt-y12i + pixfmt-y16i pixfmt-uv8 pixfmt-m420 diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index e14db67be97c..b9a3c6b20282 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -1327,6 +1327,7 @@ static void v4l_fill_fmtdesc(struct v4l2_fmtdesc *fmt) case V4L2_PIX_FMT_Y14P: descr = "14-bit Greyscale (MIPI Packed)"; break; case V4L2_PIX_FMT_Y8I: descr = "Interleaved 8-bit Greyscale"; break; case V4L2_PIX_FMT_Y12I: descr = "Interleaved 12-bit Greyscale"; break; + case V4L2_PIX_FMT_Y16I: descr = "Interleaved 16-bit Greyscale"; break; case V4L2_PIX_FMT_Z16: descr = "16-bit Depth"; break; case V4L2_PIX_FMT_INZI: descr = "Planar 10:16 Greyscale Depth"; break; case V4L2_PIX_FMT_CNF4: descr = "4-bit Depth Confidence (Packed)"; break; diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 27239cb64065..21a8aa575ea3 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -798,6 +798,7 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_S5C_UYVY_JPG v4l2_fourcc('S', '5', 'C', 'I') /* S5C73M3 interleaved UYVY/JPEG */ #define V4L2_PIX_FMT_Y8I v4l2_fourcc('Y', '8', 'I', ' ') /* Greyscale 8-bit L/R interleaved */ #define V4L2_PIX_FMT_Y12I v4l2_fourcc('Y', '1', '2', 'I') /* Greyscale 12-bit L/R interleaved */ +#define V4L2_PIX_FMT_Y16I v4l2_fourcc('Y', '1', '6', 'I') /* Greyscale 16-bit L/R interleaved */ #define V4L2_PIX_FMT_Z16 v4l2_fourcc('Z', '1', '6', ' ') /* Depth data 16-bit */ #define V4L2_PIX_FMT_MT21C v4l2_fourcc('M', 'T', '2', '1') /* Mediatek compressed block mode */ #define V4L2_PIX_FMT_MM21 v4l2_fourcc('M', 'M', '2', '1') /* Mediatek 8-bit block mode, two non-contiguous planes */ -- cgit v1.2.3 From 55b834873e800a5809787ce6aedcb70ee49a596f Mon Sep 17 00:00:00 2001 From: Dmitry Perchanov Date: Mon, 26 Aug 2024 16:05:04 +0300 Subject: media: uvcvideo: Add luma 16-bit interlaced pixel format The formats added by this patch are: UVC_GUID_FORMAT_Y16I Interlaced lumina format primary use in RealSense Depth cameras with stereo stream for left and right image sensors. Signed-off-by: Dmitry Perchanov Reviewed-by: Laurent Pinchart Link: https://lore.kernel.org/r/a717a912035b0a0f82b2f35719cca0c5269e995f.camel@intel.com Signed-off-by: Laurent Pinchart Signed-off-by: Hans Verkuil --- drivers/media/common/uvc.c | 4 ++++ include/linux/usb/uvc.h | 3 +++ 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/drivers/media/common/uvc.c b/drivers/media/common/uvc.c index c54c2268fee6..027498d37464 100644 --- a/drivers/media/common/uvc.c +++ b/drivers/media/common/uvc.c @@ -120,6 +120,10 @@ static const struct uvc_format_desc uvc_fmts[] = { .guid = UVC_GUID_FORMAT_Y12I, .fcc = V4L2_PIX_FMT_Y12I, }, + { + .guid = UVC_GUID_FORMAT_Y16I, + .fcc = V4L2_PIX_FMT_Y16I, + }, { .guid = UVC_GUID_FORMAT_Z16, .fcc = V4L2_PIX_FMT_Z16, diff --git a/include/linux/usb/uvc.h b/include/linux/usb/uvc.h index 88d96095bcb1..1c16be20c966 100644 --- a/include/linux/usb/uvc.h +++ b/include/linux/usb/uvc.h @@ -118,6 +118,9 @@ #define UVC_GUID_FORMAT_Y12I \ { 'Y', '1', '2', 'I', 0x00, 0x00, 0x10, 0x00, \ 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_Y16I \ + { 'Y', '1', '6', 'I', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} #define UVC_GUID_FORMAT_Z16 \ { 'Z', '1', '6', ' ', 0x00, 0x00, 0x10, 0x00, \ 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} -- cgit v1.2.3 From a7e742e416bc331c0f409d6a3630471896e696d3 Mon Sep 17 00:00:00 2001 From: David Given Date: Wed, 18 Sep 2024 20:05:39 +0200 Subject: media: uvcvideo: Add support for the D3DFMT_R5G6B5 pixmap type This media format is used by the NXP Semiconductors 1fc9:009b chipset, used by the Kaiweets KTI-W02 infrared camera. Signed-off-by: David Given Reviewed-by: Ricardo Ribalda Reviewed-by: Laurent Pinchart Link: https://lore.kernel.org/r/20240918180540.10830-1-dg@cowlark.com Signed-off-by: Laurent Pinchart Signed-off-by: Hans Verkuil --- drivers/media/common/uvc.c | 4 ++++ include/linux/usb/uvc.h | 3 +++ 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/drivers/media/common/uvc.c b/drivers/media/common/uvc.c index 027498d37464..1ad4604474ac 100644 --- a/drivers/media/common/uvc.c +++ b/drivers/media/common/uvc.c @@ -96,6 +96,10 @@ static const struct uvc_format_desc uvc_fmts[] = { .guid = UVC_GUID_FORMAT_RGBP, .fcc = V4L2_PIX_FMT_RGB565, }, + { + .guid = UVC_GUID_FORMAT_D3DFMT_R5G6B5, + .fcc = V4L2_PIX_FMT_RGB565, + }, { .guid = UVC_GUID_FORMAT_BGR3, .fcc = V4L2_PIX_FMT_BGR24, diff --git a/include/linux/usb/uvc.h b/include/linux/usb/uvc.h index 1c16be20c966..bce95153e5a6 100644 --- a/include/linux/usb/uvc.h +++ b/include/linux/usb/uvc.h @@ -143,6 +143,9 @@ #define UVC_GUID_FORMAT_D3DFMT_L8 \ {0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, \ 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_D3DFMT_R5G6B5 \ + {0x7b, 0xeb, 0x36, 0xe4, 0x4f, 0x52, 0xce, 0x11, \ + 0x9f, 0x53, 0x00, 0x20, 0xaf, 0x0b, 0xa7, 0x70} #define UVC_GUID_FORMAT_KSMEDIA_L8_IR \ {0x32, 0x00, 0x00, 0x00, 0x02, 0x00, 0x10, 0x00, \ 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} -- cgit v1.2.3 From 36e69b160705b65bf136c2fb6a1194447eeb8478 Mon Sep 17 00:00:00 2001 From: Dragan Simic Date: Sun, 29 Sep 2024 11:21:16 +0200 Subject: driver core: Add device probe log helper dev_warn_probe() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some drivers can still provide their functionality to a certain extent even when some of their resource acquisitions eventually fail. In such cases, emitting errors isn't the desired action, but warnings should be emitted instead. To solve this, introduce dev_warn_probe() as a new device probe log helper, which behaves identically as the already existing dev_err_probe(), while it produces warnings instead of errors. The intended use is with the resources that are actually optional for a particular driver. While there, copyedit the kerneldoc for dev_err_probe() a bit, to simplify its wording a bit, and reuse it as the kerneldoc for dev_warn_probe(), with the necessary wording adjustments, of course. Signed-off-by: Dragan Simic Tested-by: Hélène Vulquin Acked-by: Greg Kroah-Hartman Link: https://patch.msgid.link/2be0a28538bb2a3d1bcc91e2ca1f2d0dc09146d9.1727601608.git.dsimic@manjaro.org Signed-off-by: Mark Brown --- drivers/base/core.c | 129 +++++++++++++++++++++++++++++++++++---------- include/linux/dev_printk.h | 1 + 2 files changed, 102 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/drivers/base/core.c b/drivers/base/core.c index a4c853411a6b..a84a7b952cfd 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -4980,6 +4980,49 @@ define_dev_printk_level(_dev_info, KERN_INFO); #endif +static void __dev_probe_failed(const struct device *dev, int err, bool fatal, + const char *fmt, va_list vargsp) +{ + struct va_format vaf; + va_list vargs; + + /* + * On x86_64 and possibly on other architectures, va_list is actually a + * size-1 array containing a structure. As a result, function parameter + * vargsp decays from T[1] to T*, and &vargsp has type T** rather than + * T(*)[1], which is expected by its assignment to vaf.va below. + * + * One standard way to solve this mess is by creating a copy in a local + * variable of type va_list and then using a pointer to that local copy + * instead, which is the approach employed here. + */ + va_copy(vargs, vargsp); + + vaf.fmt = fmt; + vaf.va = &vargs; + + switch (err) { + case -EPROBE_DEFER: + device_set_deferred_probe_reason(dev, &vaf); + dev_dbg(dev, "error %pe: %pV", ERR_PTR(err), &vaf); + break; + + case -ENOMEM: + /* Don't print anything on -ENOMEM, there's already enough output */ + break; + + default: + /* Log fatal final failures as errors, otherwise produce warnings */ + if (fatal) + dev_err(dev, "error %pe: %pV", ERR_PTR(err), &vaf); + else + dev_warn(dev, "error %pe: %pV", ERR_PTR(err), &vaf); + break; + } + + va_end(vargs); +} + /** * dev_err_probe - probe error check and log helper * @dev: the pointer to the struct device @@ -4992,7 +5035,7 @@ define_dev_printk_level(_dev_info, KERN_INFO); * -EPROBE_DEFER and propagate error upwards. * In case of -EPROBE_DEFER it sets also defer probe reason, which can be * checked later by reading devices_deferred debugfs attribute. - * It replaces code sequence:: + * It replaces the following code sequence:: * * if (err != -EPROBE_DEFER) * dev_err(dev, ...); @@ -5004,47 +5047,77 @@ define_dev_printk_level(_dev_info, KERN_INFO); * * return dev_err_probe(dev, err, ...); * - * Using this helper in your probe function is totally fine even if @err is - * known to never be -EPROBE_DEFER. + * Using this helper in your probe function is totally fine even if @err + * is known to never be -EPROBE_DEFER. * The benefit compared to a normal dev_err() is the standardized format - * of the error code, it being emitted symbolically (i.e. you get "EAGAIN" - * instead of "-35") and the fact that the error code is returned which allows - * more compact error paths. + * of the error code, which is emitted symbolically (i.e. you get "EAGAIN" + * instead of "-35"), and having the error code returned allows more + * compact error paths. * * Returns @err. */ int dev_err_probe(const struct device *dev, int err, const char *fmt, ...) { - struct va_format vaf; - va_list args; + va_list vargs; - va_start(args, fmt); - vaf.fmt = fmt; - vaf.va = &args; + va_start(vargs, fmt); - switch (err) { - case -EPROBE_DEFER: - device_set_deferred_probe_reason(dev, &vaf); - dev_dbg(dev, "error %pe: %pV", ERR_PTR(err), &vaf); - break; + /* Use dev_err() for logging when err doesn't equal -EPROBE_DEFER */ + __dev_probe_failed(dev, err, true, fmt, vargs); - case -ENOMEM: - /* - * We don't print anything on -ENOMEM, there is already enough - * output. - */ - break; + va_end(vargs); - default: - dev_err(dev, "error %pe: %pV", ERR_PTR(err), &vaf); - break; - } + return err; +} +EXPORT_SYMBOL_GPL(dev_err_probe); - va_end(args); +/** + * dev_warn_probe - probe error check and log helper + * @dev: the pointer to the struct device + * @err: error value to test + * @fmt: printf-style format string + * @...: arguments as specified in the format string + * + * This helper implements common pattern present in probe functions for error + * checking: print debug or warning message depending if the error value is + * -EPROBE_DEFER and propagate error upwards. + * In case of -EPROBE_DEFER it sets also defer probe reason, which can be + * checked later by reading devices_deferred debugfs attribute. + * It replaces the following code sequence:: + * + * if (err != -EPROBE_DEFER) + * dev_warn(dev, ...); + * else + * dev_dbg(dev, ...); + * return err; + * + * with:: + * + * return dev_warn_probe(dev, err, ...); + * + * Using this helper in your probe function is totally fine even if @err + * is known to never be -EPROBE_DEFER. + * The benefit compared to a normal dev_warn() is the standardized format + * of the error code, which is emitted symbolically (i.e. you get "EAGAIN" + * instead of "-35"), and having the error code returned allows more + * compact error paths. + * + * Returns @err. + */ +int dev_warn_probe(const struct device *dev, int err, const char *fmt, ...) +{ + va_list vargs; + + va_start(vargs, fmt); + + /* Use dev_warn() for logging when err doesn't equal -EPROBE_DEFER */ + __dev_probe_failed(dev, err, false, fmt, vargs); + + va_end(vargs); return err; } -EXPORT_SYMBOL_GPL(dev_err_probe); +EXPORT_SYMBOL_GPL(dev_warn_probe); static inline bool fwnode_is_primary(struct fwnode_handle *fwnode) { diff --git a/include/linux/dev_printk.h b/include/linux/dev_printk.h index ca32b5bb28eb..eb2094e43050 100644 --- a/include/linux/dev_printk.h +++ b/include/linux/dev_printk.h @@ -276,6 +276,7 @@ do { \ dev_driver_string(dev), dev_name(dev), ## arg) __printf(3, 4) int dev_err_probe(const struct device *dev, int err, const char *fmt, ...); +__printf(3, 4) int dev_warn_probe(const struct device *dev, int err, const char *fmt, ...); /* Simple helper for dev_err_probe() when ERR_PTR() is to be returned. */ #define dev_err_ptr_probe(dev, ___err, fmt, ...) \ -- cgit v1.2.3 From 0b028ff7e70ecbe5240ad92e36a664af5cf7f382 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sun, 6 Oct 2024 23:55:11 +0100 Subject: auxdisplay: Remove unused functions cfag12864b_getrate() and cfag12864b_isenabled() were both added in commit 70e840499aae ("[PATCH] drivers: add LCD support") but never used. Remove them. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Geert Uytterhoeven Acked-by: Miguel Ojeda Signed-off-by: Andy Shevchenko --- drivers/auxdisplay/cfag12864b.c | 12 ------------ include/linux/cfag12864b.h | 17 ----------------- 2 files changed, 29 deletions(-) (limited to 'include') diff --git a/drivers/auxdisplay/cfag12864b.c b/drivers/auxdisplay/cfag12864b.c index 6526aa51fb1d..e1a94ae3eb0c 100644 --- a/drivers/auxdisplay/cfag12864b.c +++ b/drivers/auxdisplay/cfag12864b.c @@ -37,11 +37,6 @@ module_param(cfag12864b_rate, uint, 0444); MODULE_PARM_DESC(cfag12864b_rate, "Refresh rate (hertz)"); -unsigned int cfag12864b_getrate(void) -{ - return cfag12864b_rate; -} - /* * cfag12864b Commands * @@ -249,11 +244,6 @@ void cfag12864b_disable(void) mutex_unlock(&cfag12864b_mutex); } -unsigned char cfag12864b_isenabled(void) -{ - return cfag12864b_updating; -} - static void cfag12864b_update(struct work_struct *work) { unsigned char c; @@ -293,10 +283,8 @@ static void cfag12864b_update(struct work_struct *work) */ EXPORT_SYMBOL_GPL(cfag12864b_buffer); -EXPORT_SYMBOL_GPL(cfag12864b_getrate); EXPORT_SYMBOL_GPL(cfag12864b_enable); EXPORT_SYMBOL_GPL(cfag12864b_disable); -EXPORT_SYMBOL_GPL(cfag12864b_isenabled); /* * Is the module inited? diff --git a/include/linux/cfag12864b.h b/include/linux/cfag12864b.h index 6617d9c68d86..83e6613d12ae 100644 --- a/include/linux/cfag12864b.h +++ b/include/linux/cfag12864b.h @@ -27,13 +27,6 @@ */ extern unsigned char * cfag12864b_buffer; -/* - * Get the refresh rate of the LCD - * - * Returns the refresh rate (hertz). - */ -extern unsigned int cfag12864b_getrate(void); - /* * Enable refreshing * @@ -49,16 +42,6 @@ extern unsigned char cfag12864b_enable(void); */ extern void cfag12864b_disable(void); -/* - * Is enabled refreshing? (is anyone using the module?) - * - * Returns 0 if refreshing is not enabled (anyone is using it), - * or != 0 if refreshing is enabled (someone is using it). - * - * Useful for buffer read-only modules. - */ -extern unsigned char cfag12864b_isenabled(void); - /* * Is the module inited? */ -- cgit v1.2.3 From 581434654e01ec79dd02c21448ac84e2ce2d1a64 Mon Sep 17 00:00:00 2001 From: Chen Ridong Date: Tue, 8 Oct 2024 11:24:58 +0000 Subject: workqueue: Adjust WQ_MAX_ACTIVE from 512 to 2048 WQ_MAX_ACTIVE is currently set to 512, which was established approximately 15 yeas ago. However, with the significant increase in machine sizes and capabilities, the previous limit of 256 concurrent tasks is no longer sufficient. Therefore, we propose to increase WQ_MAX_ACTIVE to 2048. and WQ_DFL_ACTIVE is 1024 now. Signed-off-by: Chen Ridong Signed-off-by: Tejun Heo --- Documentation/core-api/workqueue.rst | 4 ++-- include/linux/workqueue.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/Documentation/core-api/workqueue.rst b/Documentation/core-api/workqueue.rst index 2b813f80ce39..e295835fc116 100644 --- a/Documentation/core-api/workqueue.rst +++ b/Documentation/core-api/workqueue.rst @@ -245,8 +245,8 @@ CPU which can be assigned to the work items of a wq. For example, with at the same time per CPU. This is always a per-CPU attribute, even for unbound workqueues. -The maximum limit for ``@max_active`` is 512 and the default value used -when 0 is specified is 256. These values are chosen sufficiently high +The maximum limit for ``@max_active`` is 2048 and the default value used +when 0 is specified is 1024. These values are chosen sufficiently high such that they are not the limiting factor while providing protection in runaway cases. diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 59c2695e12e7..b0dc957c3e56 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -412,7 +412,7 @@ enum wq_flags { }; enum wq_consts { - WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */ + WQ_MAX_ACTIVE = 2048, /* I like 2048, better ideas? */ WQ_UNBOUND_MAX_ACTIVE = WQ_MAX_ACTIVE, WQ_DFL_ACTIVE = WQ_MAX_ACTIVE / 2, -- cgit v1.2.3 From aefa398d93d5db7c555be78a605ff015357f127d Mon Sep 17 00:00:00 2001 From: Joshua Hahn Date: Wed, 2 Oct 2024 11:47:16 -0700 Subject: cgroup/rstat: Tracking cgroup-level niced CPU time Cgroup-level CPU statistics currently include time spent on user/system processes, but do not include niced CPU time (despite already being tracked). This patch exposes niced CPU time to the userspace, allowing users to get a better understanding of their hardware limits and can facilitate more informed workload distribution. A new field 'ntime' is added to struct cgroup_base_stat as opposed to struct task_cputime to minimize footprint. Signed-off-by: Joshua Hahn Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 1 + kernel/cgroup/rstat.c | 19 ++++++++++++++----- 2 files changed, 15 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 47ae4c4d924c..0a80ef9191a6 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -327,6 +327,7 @@ struct cgroup_base_stat { #ifdef CONFIG_SCHED_CORE u64 forceidle_sum; #endif + u64 ntime; }; /* diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index a06b45272411..5877974ece92 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -444,6 +444,7 @@ static void cgroup_base_stat_add(struct cgroup_base_stat *dst_bstat, #ifdef CONFIG_SCHED_CORE dst_bstat->forceidle_sum += src_bstat->forceidle_sum; #endif + dst_bstat->ntime += src_bstat->ntime; } static void cgroup_base_stat_sub(struct cgroup_base_stat *dst_bstat, @@ -455,6 +456,7 @@ static void cgroup_base_stat_sub(struct cgroup_base_stat *dst_bstat, #ifdef CONFIG_SCHED_CORE dst_bstat->forceidle_sum -= src_bstat->forceidle_sum; #endif + dst_bstat->ntime -= src_bstat->ntime; } static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu) @@ -534,8 +536,10 @@ void __cgroup_account_cputime_field(struct cgroup *cgrp, rstatc = cgroup_base_stat_cputime_account_begin(cgrp, &flags); switch (index) { - case CPUTIME_USER: case CPUTIME_NICE: + rstatc->bstat.ntime += delta_exec; + fallthrough; + case CPUTIME_USER: rstatc->bstat.cputime.utime += delta_exec; break; case CPUTIME_SYSTEM: @@ -591,6 +595,7 @@ static void root_cgroup_cputime(struct cgroup_base_stat *bstat) #ifdef CONFIG_SCHED_CORE bstat->forceidle_sum += cpustat[CPUTIME_FORCEIDLE]; #endif + bstat->ntime += cpustat[CPUTIME_NICE]; } } @@ -608,13 +613,14 @@ static void cgroup_force_idle_show(struct seq_file *seq, struct cgroup_base_stat void cgroup_base_stat_cputime_show(struct seq_file *seq) { struct cgroup *cgrp = seq_css(seq)->cgroup; - u64 usage, utime, stime; + u64 usage, utime, stime, ntime; if (cgroup_parent(cgrp)) { cgroup_rstat_flush_hold(cgrp); usage = cgrp->bstat.cputime.sum_exec_runtime; cputime_adjust(&cgrp->bstat.cputime, &cgrp->prev_cputime, &utime, &stime); + ntime = cgrp->bstat.ntime; cgroup_rstat_flush_release(cgrp); } else { /* cgrp->bstat of root is not actually used, reuse it */ @@ -622,16 +628,19 @@ void cgroup_base_stat_cputime_show(struct seq_file *seq) usage = cgrp->bstat.cputime.sum_exec_runtime; utime = cgrp->bstat.cputime.utime; stime = cgrp->bstat.cputime.stime; + ntime = cgrp->bstat.ntime; } do_div(usage, NSEC_PER_USEC); do_div(utime, NSEC_PER_USEC); do_div(stime, NSEC_PER_USEC); + do_div(ntime, NSEC_PER_USEC); seq_printf(seq, "usage_usec %llu\n" - "user_usec %llu\n" - "system_usec %llu\n", - usage, utime, stime); + "user_usec %llu\n" + "system_usec %llu\n" + "nice_usec %llu\n", + usage, utime, stime, ntime); cgroup_force_idle_show(seq, &cgrp->bstat); } -- cgit v1.2.3 From 49e4154f4b16345da5e219b23ed9737a6e735bc1 Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Wed, 11 Sep 2024 09:00:26 +0800 Subject: tracing: Remove TRACE_EVENT_FL_FILTERED logic After commit dcb0b5575d24 ("tracing: Remove TRACE_EVENT_FL_USE_CALL_FILTER logic"), no one's going to set the TRACE_EVENT_FL_FILTERED or change the call->filter, so remove related logic. Link: https://lore.kernel.org/20240911010026.2302849-1-zhengyejian@huaweicloud.com Signed-off-by: Zheng Yejian Signed-off-by: Steven Rostedt (Google) --- include/linux/trace_events.h | 4 ---- kernel/trace/trace.c | 44 ++++++++---------------------------- kernel/trace/trace.h | 4 ---- kernel/trace/trace_branch.c | 4 +--- kernel/trace/trace_events.c | 2 -- kernel/trace/trace_functions_graph.c | 8 ++----- kernel/trace/trace_hwlat.c | 4 +--- kernel/trace/trace_mmiotrace.c | 8 ++----- kernel/trace/trace_osnoise.c | 12 +++------- kernel/trace/trace_sched_wakeup.c | 8 ++----- 10 files changed, 20 insertions(+), 78 deletions(-) (limited to 'include') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 42bedcddd511..f8f2e52653df 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -326,7 +326,6 @@ void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer, void trace_event_buffer_commit(struct trace_event_buffer *fbuffer); enum { - TRACE_EVENT_FL_FILTERED_BIT, TRACE_EVENT_FL_CAP_ANY_BIT, TRACE_EVENT_FL_NO_SET_FILTER_BIT, TRACE_EVENT_FL_IGNORE_ENABLE_BIT, @@ -341,7 +340,6 @@ enum { /* * Event flags: - * FILTERED - The event has a filter attached * CAP_ANY - Any user can enable for perf * NO_SET_FILTER - Set when filter has error and is to be ignored * IGNORE_ENABLE - For trace internal events, do not enable with debugfs file @@ -356,7 +354,6 @@ enum { * to a tracepoint yet, then it is cleared when it is. */ enum { - TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), TRACE_EVENT_FL_CAP_ANY = (1 << TRACE_EVENT_FL_CAP_ANY_BIT), TRACE_EVENT_FL_NO_SET_FILTER = (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT), TRACE_EVENT_FL_IGNORE_ENABLE = (1 << TRACE_EVENT_FL_IGNORE_ENABLE_BIT), @@ -381,7 +378,6 @@ struct trace_event_call { }; struct trace_event event; char *print_fmt; - struct event_filter *filter; /* * Static events can disappear with modules, * where as dynamic ones need their own ref count. diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 1c69ca1f1088..bdb776e6ceb9 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -593,19 +593,6 @@ int tracing_check_open_get_tr(struct trace_array *tr) return 0; } -int call_filter_check_discard(struct trace_event_call *call, void *rec, - struct trace_buffer *buffer, - struct ring_buffer_event *event) -{ - if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) && - !filter_match_preds(call->filter, rec)) { - __trace_event_discard_commit(buffer, event); - return 1; - } - - return 0; -} - /** * trace_find_filtered_pid - check if a pid exists in a filtered_pid list * @filtered_pids: The list of pids to check @@ -2889,7 +2876,6 @@ void trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, unsigned int trace_ctx) { - struct trace_event_call *call = &event_function; struct trace_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct ftrace_entry *entry; @@ -2902,11 +2888,9 @@ trace_function(struct trace_array *tr, unsigned long ip, unsigned long entry->ip = ip; entry->parent_ip = parent_ip; - if (!call_filter_check_discard(call, entry, buffer, event)) { - if (static_branch_unlikely(&trace_function_exports_enabled)) - ftrace_exports(event, TRACE_EXPORT_FUNCTION); - __buffer_unlock_commit(buffer, event); - } + if (static_branch_unlikely(&trace_function_exports_enabled)) + ftrace_exports(event, TRACE_EXPORT_FUNCTION); + __buffer_unlock_commit(buffer, event); } #ifdef CONFIG_STACKTRACE @@ -2932,7 +2916,6 @@ static void __ftrace_trace_stack(struct trace_buffer *buffer, unsigned int trace_ctx, int skip, struct pt_regs *regs) { - struct trace_event_call *call = &event_kernel_stack; struct ring_buffer_event *event; unsigned int size, nr_entries; struct ftrace_stack *fstack; @@ -2986,8 +2969,7 @@ static void __ftrace_trace_stack(struct trace_buffer *buffer, memcpy(&entry->caller, fstack->calls, flex_array_size(entry, caller, nr_entries)); - if (!call_filter_check_discard(call, entry, buffer, event)) - __buffer_unlock_commit(buffer, event); + __buffer_unlock_commit(buffer, event); out: /* Again, don't let gcc optimize things here */ @@ -3060,7 +3042,6 @@ static void ftrace_trace_userstack(struct trace_array *tr, struct trace_buffer *buffer, unsigned int trace_ctx) { - struct trace_event_call *call = &event_user_stack; struct ring_buffer_event *event; struct userstack_entry *entry; @@ -3094,8 +3075,7 @@ ftrace_trace_userstack(struct trace_array *tr, memset(&entry->caller, 0, sizeof(entry->caller)); stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES); - if (!call_filter_check_discard(call, entry, buffer, event)) - __buffer_unlock_commit(buffer, event); + __buffer_unlock_commit(buffer, event); out_drop_count: __this_cpu_dec(user_stack_count); @@ -3264,7 +3244,6 @@ static void trace_printk_start_stop_comm(int enabled) */ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) { - struct trace_event_call *call = &event_bprint; struct ring_buffer_event *event; struct trace_buffer *buffer; struct trace_array *tr = READ_ONCE(printk_trace); @@ -3308,10 +3287,8 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) entry->fmt = fmt; memcpy(entry->buf, tbuffer, sizeof(u32) * len); - if (!call_filter_check_discard(call, entry, buffer, event)) { - __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL); - } + __buffer_unlock_commit(buffer, event); + ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL); out: ring_buffer_nest_end(buffer); @@ -3331,7 +3308,6 @@ static int __trace_array_vprintk(struct trace_buffer *buffer, unsigned long ip, const char *fmt, va_list args) { - struct trace_event_call *call = &event_print; struct ring_buffer_event *event; int len = 0, size; struct print_entry *entry; @@ -3366,10 +3342,8 @@ __trace_array_vprintk(struct trace_buffer *buffer, entry->ip = ip; memcpy(&entry->buf, tbuffer, len + 1); - if (!call_filter_check_discard(call, entry, buffer, event)) { - __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL); - } + __buffer_unlock_commit(buffer, event); + ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL); out: ring_buffer_nest_end(buffer); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index c866991b9c78..638f452eec10 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1429,10 +1429,6 @@ struct trace_subsystem_dir { int nr_events; }; -extern int call_filter_check_discard(struct trace_event_call *call, void *rec, - struct trace_buffer *buffer, - struct ring_buffer_event *event); - void trace_buffer_unlock_commit_regs(struct trace_array *tr, struct trace_buffer *buffer, struct ring_buffer_event *event, diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index aa63548873c3..6d08a5523ce0 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -30,7 +30,6 @@ static struct trace_array *branch_tracer; static void probe_likely_condition(struct ftrace_likely_data *f, int val, int expect) { - struct trace_event_call *call = &event_branch; struct trace_array *tr = branch_tracer; struct trace_buffer *buffer; struct trace_array_cpu *data; @@ -80,8 +79,7 @@ probe_likely_condition(struct ftrace_likely_data *f, int val, int expect) entry->line = f->data.line; entry->correct = val == expect; - if (!call_filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit_nostack(buffer, event); + trace_buffer_unlock_commit_nostack(buffer, event); out: current->trace_recursion &= ~TRACE_BRANCH_BIT; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 7266ec2a4eea..77e68efbd43e 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -3149,8 +3149,6 @@ static void __trace_remove_event_call(struct trace_event_call *call) { event_remove(call); trace_destroy_fields(call); - free_event_filter(call->filter); - call->filter = NULL; } static int probe_remove_event_call(struct trace_event_call *call) diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index a569daaac4c4..ab57ec78ca04 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -102,7 +102,6 @@ int __trace_graph_entry(struct trace_array *tr, struct ftrace_graph_ent *trace, unsigned int trace_ctx) { - struct trace_event_call *call = &event_funcgraph_entry; struct ring_buffer_event *event; struct trace_buffer *buffer = tr->array_buffer.buffer; struct ftrace_graph_ent_entry *entry; @@ -113,8 +112,7 @@ int __trace_graph_entry(struct trace_array *tr, return 0; entry = ring_buffer_event_data(event); entry->graph_ent = *trace; - if (!call_filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit_nostack(buffer, event); + trace_buffer_unlock_commit_nostack(buffer, event); return 1; } @@ -223,7 +221,6 @@ void __trace_graph_return(struct trace_array *tr, struct ftrace_graph_ret *trace, unsigned int trace_ctx) { - struct trace_event_call *call = &event_funcgraph_exit; struct ring_buffer_event *event; struct trace_buffer *buffer = tr->array_buffer.buffer; struct ftrace_graph_ret_entry *entry; @@ -234,8 +231,7 @@ void __trace_graph_return(struct trace_array *tr, return; entry = ring_buffer_event_data(event); entry->ret = *trace; - if (!call_filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit_nostack(buffer, event); + trace_buffer_unlock_commit_nostack(buffer, event); } void trace_graph_return(struct ftrace_graph_ret *trace, diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index 3bd6071441ad..b65353ec2837 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -130,7 +130,6 @@ static bool hwlat_busy; static void trace_hwlat_sample(struct hwlat_sample *sample) { struct trace_array *tr = hwlat_trace; - struct trace_event_call *call = &event_hwlat; struct trace_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct hwlat_entry *entry; @@ -148,8 +147,7 @@ static void trace_hwlat_sample(struct hwlat_sample *sample) entry->nmi_count = sample->nmi_count; entry->count = sample->count; - if (!call_filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit_nostack(buffer, event); + trace_buffer_unlock_commit_nostack(buffer, event); } /* Macros to encapsulate the time capturing infrastructure */ diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 64e77b513697..ba5858866b2f 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -294,7 +294,6 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, struct trace_array_cpu *data, struct mmiotrace_rw *rw) { - struct trace_event_call *call = &event_mmiotrace_rw; struct trace_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct trace_mmiotrace_rw *entry; @@ -310,8 +309,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, entry = ring_buffer_event_data(event); entry->rw = *rw; - if (!call_filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit(tr, buffer, event, trace_ctx); + trace_buffer_unlock_commit(tr, buffer, event, trace_ctx); } void mmio_trace_rw(struct mmiotrace_rw *rw) @@ -325,7 +323,6 @@ static void __trace_mmiotrace_map(struct trace_array *tr, struct trace_array_cpu *data, struct mmiotrace_map *map) { - struct trace_event_call *call = &event_mmiotrace_map; struct trace_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct trace_mmiotrace_map *entry; @@ -341,8 +338,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr, entry = ring_buffer_event_data(event); entry->map = *map; - if (!call_filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit(tr, buffer, event, trace_ctx); + trace_buffer_unlock_commit(tr, buffer, event, trace_ctx); } void mmio_trace_mapping(struct mmiotrace_map *map) diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index a50ed23bee77..b9f96c77527d 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -499,7 +499,6 @@ static void print_osnoise_headers(struct seq_file *s) static void __trace_osnoise_sample(struct osnoise_sample *sample, struct trace_buffer *buffer) { - struct trace_event_call *call = &event_osnoise; struct ring_buffer_event *event; struct osnoise_entry *entry; @@ -517,8 +516,7 @@ __trace_osnoise_sample(struct osnoise_sample *sample, struct trace_buffer *buffe entry->softirq_count = sample->softirq_count; entry->thread_count = sample->thread_count; - if (!call_filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit_nostack(buffer, event); + trace_buffer_unlock_commit_nostack(buffer, event); } /* @@ -578,7 +576,6 @@ static void print_timerlat_headers(struct seq_file *s) static void __trace_timerlat_sample(struct timerlat_sample *sample, struct trace_buffer *buffer) { - struct trace_event_call *call = &event_osnoise; struct ring_buffer_event *event; struct timerlat_entry *entry; @@ -591,8 +588,7 @@ __trace_timerlat_sample(struct timerlat_sample *sample, struct trace_buffer *buf entry->context = sample->context; entry->timer_latency = sample->timer_latency; - if (!call_filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit_nostack(buffer, event); + trace_buffer_unlock_commit_nostack(buffer, event); } /* @@ -654,7 +650,6 @@ static void timerlat_save_stack(int skip) static void __timerlat_dump_stack(struct trace_buffer *buffer, struct trace_stack *fstack, unsigned int size) { - struct trace_event_call *call = &event_osnoise; struct ring_buffer_event *event; struct stack_entry *entry; @@ -668,8 +663,7 @@ __timerlat_dump_stack(struct trace_buffer *buffer, struct trace_stack *fstack, u memcpy(&entry->caller, fstack->calls, size); entry->size = fstack->nr_entries; - if (!call_filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit_nostack(buffer, event); + trace_buffer_unlock_commit_nostack(buffer, event); } /* diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index ae2ace5e515a..d6c7f18daa15 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -378,7 +378,6 @@ tracing_sched_switch_trace(struct trace_array *tr, struct task_struct *next, unsigned int trace_ctx) { - struct trace_event_call *call = &event_context_switch; struct trace_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct ctx_switch_entry *entry; @@ -396,8 +395,7 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->next_state = task_state_index(next); entry->next_cpu = task_cpu(next); - if (!call_filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit(tr, buffer, event, trace_ctx); + trace_buffer_unlock_commit(tr, buffer, event, trace_ctx); } static void @@ -406,7 +404,6 @@ tracing_sched_wakeup_trace(struct trace_array *tr, struct task_struct *curr, unsigned int trace_ctx) { - struct trace_event_call *call = &event_wakeup; struct ring_buffer_event *event; struct ctx_switch_entry *entry; struct trace_buffer *buffer = tr->array_buffer.buffer; @@ -424,8 +421,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->next_state = task_state_index(wakee); entry->next_cpu = task_cpu(wakee); - if (!call_filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit(tr, buffer, event, trace_ctx); + trace_buffer_unlock_commit(tr, buffer, event, trace_ctx); } static void notrace -- cgit v1.2.3 From 02f220b5267042d0de649614eec84ded8aeecb4f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 7 Oct 2024 20:26:55 +0200 Subject: wifi: ipw2x00/lib80211: move remaining lib80211 into libipw There's already much code in libipw that used to be shared with more drivers, but now with the prior cleanups, those old Intel ipw2x00 drivers are also the only ones using whatever is now left of lib80211. Move lib80211 entirely into libipw. Link: https://patch.msgid.link/20241007202707.915ef7b9e7c7.Ib9876d2fe3c90f11d6df458b16d0b7d4bf551a8d@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/ipw2x00/Kconfig | 6 - drivers/net/wireless/intel/ipw2x00/Makefile | 6 +- drivers/net/wireless/intel/ipw2x00/ipw2100.c | 7 +- drivers/net/wireless/intel/ipw2x00/ipw2200.c | 4 +- drivers/net/wireless/intel/ipw2x00/ipw2200.h | 2 - drivers/net/wireless/intel/ipw2x00/libipw.h | 101 ++- drivers/net/wireless/intel/ipw2x00/libipw_crypto.c | 246 +++++++ .../wireless/intel/ipw2x00/libipw_crypto_ccmp.c | 438 ++++++++++++ .../wireless/intel/ipw2x00/libipw_crypto_tkip.c | 728 ++++++++++++++++++++ .../net/wireless/intel/ipw2x00/libipw_crypto_wep.c | 247 +++++++ drivers/net/wireless/intel/ipw2x00/libipw_module.c | 36 +- drivers/net/wireless/intel/ipw2x00/libipw_rx.c | 9 +- drivers/net/wireless/intel/ipw2x00/libipw_tx.c | 4 +- drivers/net/wireless/intel/ipw2x00/libipw_wx.c | 43 +- include/net/lib80211.h | 122 ---- net/wireless/Kconfig | 33 - net/wireless/Makefile | 4 - net/wireless/lib80211.c | 257 ------- net/wireless/lib80211_crypt_ccmp.c | 448 ------------- net/wireless/lib80211_crypt_tkip.c | 738 --------------------- net/wireless/lib80211_crypt_wep.c | 256 ------- 21 files changed, 1825 insertions(+), 1910 deletions(-) create mode 100644 drivers/net/wireless/intel/ipw2x00/libipw_crypto.c create mode 100644 drivers/net/wireless/intel/ipw2x00/libipw_crypto_ccmp.c create mode 100644 drivers/net/wireless/intel/ipw2x00/libipw_crypto_tkip.c create mode 100644 drivers/net/wireless/intel/ipw2x00/libipw_crypto_wep.c delete mode 100644 include/net/lib80211.h delete mode 100644 net/wireless/lib80211.c delete mode 100644 net/wireless/lib80211_crypt_ccmp.c delete mode 100644 net/wireless/lib80211_crypt_tkip.c delete mode 100644 net/wireless/lib80211_crypt_wep.c (limited to 'include') diff --git a/drivers/net/wireless/intel/ipw2x00/Kconfig b/drivers/net/wireless/intel/ipw2x00/Kconfig index 1650d5865aa0..d9c042772399 100644 --- a/drivers/net/wireless/intel/ipw2x00/Kconfig +++ b/drivers/net/wireless/intel/ipw2x00/Kconfig @@ -10,7 +10,6 @@ config IPW2100 select WEXT_SPY select WEXT_PRIV select FW_LOADER - select LIB80211 select LIBIPW help A driver for the Intel PRO/Wireless 2100 Network @@ -72,7 +71,6 @@ config IPW2200 select WEXT_SPY select WEXT_PRIV select FW_LOADER - select LIB80211 select LIBIPW help A driver for the Intel PRO/Wireless 2200BG and 2915ABG Network @@ -162,10 +160,6 @@ config LIBIPW select CRYPTO select CRYPTO_MICHAEL_MIC select CRC32 - select LIB80211 - select LIB80211_CRYPT_WEP - select LIB80211_CRYPT_TKIP - select LIB80211_CRYPT_CCMP help This option enables the hardware independent IEEE 802.11 networking stack. This component is deprecated in favor of the diff --git a/drivers/net/wireless/intel/ipw2x00/Makefile b/drivers/net/wireless/intel/ipw2x00/Makefile index e1ec50359dff..60c5faccbe15 100644 --- a/drivers/net/wireless/intel/ipw2x00/Makefile +++ b/drivers/net/wireless/intel/ipw2x00/Makefile @@ -12,4 +12,8 @@ libipw-objs := \ libipw_tx.o \ libipw_rx.o \ libipw_wx.o \ - libipw_geo.o + libipw_geo.o \ + libipw_crypto.o \ + libipw_crypto_ccmp.o \ + libipw_crypto_tkip.o \ + libipw_crypto_wep.o diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2100.c b/drivers/net/wireless/intel/ipw2x00/ipw2100.c index b6636002c7d2..a89e06c1b8ee 100644 --- a/drivers/net/wireless/intel/ipw2x00/ipw2100.c +++ b/drivers/net/wireless/intel/ipw2x00/ipw2100.c @@ -148,9 +148,6 @@ that only one external action is invoked at a time. #include #include #include - -#include - #include "ipw2100.h" #include "ipw.h" @@ -7571,7 +7568,7 @@ static int ipw2100_wx_set_auth(struct net_device *dev, struct ipw2100_priv *priv = libipw_priv(dev); struct libipw_device *ieee = priv->ieee; struct iw_param *param = &wrqu->param; - struct lib80211_crypt_data *crypt; + struct libipw_crypt_data *crypt; unsigned long flags; int ret = 0; @@ -7663,7 +7660,7 @@ static int ipw2100_wx_get_auth(struct net_device *dev, { struct ipw2100_priv *priv = libipw_priv(dev); struct libipw_device *ieee = priv->ieee; - struct lib80211_crypt_data *crypt; + struct libipw_crypt_data *crypt; struct iw_param *param = &wrqu->param; switch (param->flags & IW_AUTH_INDEX) { diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2200.c b/drivers/net/wireless/intel/ipw2x00/ipw2200.c index eed9ef17bc29..f4fd1fc784b7 100644 --- a/drivers/net/wireless/intel/ipw2x00/ipw2200.c +++ b/drivers/net/wireless/intel/ipw2x00/ipw2200.c @@ -6549,7 +6549,7 @@ static int ipw_wx_set_auth(struct net_device *dev, struct ipw_priv *priv = libipw_priv(dev); struct libipw_device *ieee = priv->ieee; struct iw_param *param = &wrqu->param; - struct lib80211_crypt_data *crypt; + struct libipw_crypt_data *crypt; unsigned long flags; int ret = 0; @@ -6648,7 +6648,7 @@ static int ipw_wx_get_auth(struct net_device *dev, { struct ipw_priv *priv = libipw_priv(dev); struct libipw_device *ieee = priv->ieee; - struct lib80211_crypt_data *crypt; + struct libipw_crypt_data *crypt; struct iw_param *param = &wrqu->param; switch (param->flags & IW_AUTH_INDEX) { diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2200.h b/drivers/net/wireless/intel/ipw2x00/ipw2200.h index 8ebf09121e17..46f119123b49 100644 --- a/drivers/net/wireless/intel/ipw2x00/ipw2200.h +++ b/drivers/net/wireless/intel/ipw2x00/ipw2200.h @@ -31,8 +31,6 @@ #include #include #include - -#include #include #define DRV_NAME "ipw2200" diff --git a/drivers/net/wireless/intel/ipw2x00/libipw.h b/drivers/net/wireless/intel/ipw2x00/libipw.h index bad080d33c07..bc727c99ff3c 100644 --- a/drivers/net/wireless/intel/ipw2x00/libipw.h +++ b/drivers/net/wireless/intel/ipw2x00/libipw.h @@ -25,8 +25,6 @@ #include /* ARRAY_SIZE */ #include #include - -#include #include #define LIBIPW_VERSION "git-1.1.13" @@ -699,6 +697,76 @@ struct libipw_geo { struct libipw_channel a[LIBIPW_52GHZ_CHANNELS]; }; +#define NUM_WEP_KEYS 4 + +enum { + IEEE80211_CRYPTO_TKIP_COUNTERMEASURES = (1 << 0), +}; + +struct module; + +struct libipw_crypto_ops { + const char *name; + struct list_head list; + + /* init new crypto context (e.g., allocate private data space, + * select IV, etc.); returns NULL on failure or pointer to allocated + * private data on success */ + void *(*init) (int keyidx); + + /* deinitialize crypto context and free allocated private data */ + void (*deinit) (void *priv); + + /* encrypt/decrypt return < 0 on error or >= 0 on success. The return + * value from decrypt_mpdu is passed as the keyidx value for + * decrypt_msdu. skb must have enough head and tail room for the + * encryption; if not, error will be returned; these functions are + * called for all MPDUs (i.e., fragments). + */ + int (*encrypt_mpdu) (struct sk_buff * skb, int hdr_len, void *priv); + int (*decrypt_mpdu) (struct sk_buff * skb, int hdr_len, void *priv); + + /* These functions are called for full MSDUs, i.e. full frames. + * These can be NULL if full MSDU operations are not needed. */ + int (*encrypt_msdu) (struct sk_buff * skb, int hdr_len, void *priv); + int (*decrypt_msdu) (struct sk_buff * skb, int keyidx, int hdr_len, + void *priv); + + int (*set_key) (void *key, int len, u8 * seq, void *priv); + int (*get_key) (void *key, int len, u8 * seq, void *priv); + + /* procfs handler for printing out key information and possible + * statistics */ + void (*print_stats) (struct seq_file *m, void *priv); + + /* Crypto specific flag get/set for configuration settings */ + unsigned long (*get_flags) (void *priv); + unsigned long (*set_flags) (unsigned long flags, void *priv); + + /* maximum number of bytes added by encryption; encrypt buf is + * allocated with extra_prefix_len bytes, copy of in_buf, and + * extra_postfix_len; encrypt need not use all this space, but + * the result must start at the beginning of the buffer and correct + * length must be returned */ + int extra_mpdu_prefix_len, extra_mpdu_postfix_len; + int extra_msdu_prefix_len, extra_msdu_postfix_len; + + struct module *owner; +}; + +struct libipw_crypt_info { + char *name; + /* Most clients will already have a lock, + so just point to that. */ + spinlock_t *lock; + + struct libipw_crypt_data *crypt[NUM_WEP_KEYS]; + int tx_keyidx; /* default TX key index (crypt[tx_keyidx]) */ + struct list_head crypt_deinit_list; + struct timer_list crypt_deinit_timer; + int crypt_quiesced; +}; + struct libipw_device { struct net_device *dev; struct wireless_dev wdev; @@ -751,7 +819,7 @@ struct libipw_device { size_t wpa_ie_len; u8 *wpa_ie; - struct lib80211_crypt_info crypt_info; + struct libipw_crypt_info crypt_info; int bcrx_sta_key; /* use individual keys to override default keys even * with RX of broad/multicast frames */ @@ -988,4 +1056,31 @@ static inline int libipw_get_scans(struct libipw_device *ieee) return ieee->scans; } +struct libipw_crypt_data { + struct list_head list; /* delayed deletion list */ + const struct libipw_crypto_ops *ops; + void *priv; + atomic_t refcnt; +}; + +int libipw_crypt_info_init(struct libipw_crypt_info *info, char *name, + spinlock_t *lock); +void libipw_crypt_info_free(struct libipw_crypt_info *info); +int libipw_register_crypto_ops(const struct libipw_crypto_ops *ops); +int libipw_unregister_crypto_ops(const struct libipw_crypto_ops *ops); +const struct libipw_crypto_ops *libipw_get_crypto_ops(const char *name); +void libipw_crypt_delayed_deinit(struct libipw_crypt_info *info, + struct libipw_crypt_data **crypt); + +/* must be called in the listed order */ +int libipw_crypto_init(void); +int libipw_crypto_ccmp_init(void); +int libipw_crypto_tkip_init(void); +int libipw_crypto_wep_init(void); + +void libipw_crypto_wep_exit(void); +void libipw_crypto_tkip_exit(void); +void libipw_crypto_ccmp_exit(void); +void libipw_crypto_exit(void); + #endif /* LIBIPW_H */ diff --git a/drivers/net/wireless/intel/ipw2x00/libipw_crypto.c b/drivers/net/wireless/intel/ipw2x00/libipw_crypto.c new file mode 100644 index 000000000000..32639e0e8430 --- /dev/null +++ b/drivers/net/wireless/intel/ipw2x00/libipw_crypto.c @@ -0,0 +1,246 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * libipw -- common bits for IPW drivers + * + * Copyright(c) 2008 John W. Linville + * + * Portions copied from old ieee80211 component, w/ original copyright + * notices below: + * + * Host AP crypto routines + * + * Copyright (c) 2002-2003, Jouni Malinen + * Portions Copyright (C) 2004, Intel Corporation + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include "libipw.h" + +struct libipw_crypto_alg { + struct list_head list; + const struct libipw_crypto_ops *ops; +}; + +static LIST_HEAD(libipw_crypto_algs); +static DEFINE_SPINLOCK(libipw_crypto_lock); + +static void libipw_crypt_deinit_entries(struct libipw_crypt_info *info, + int force); +static void libipw_crypt_quiescing(struct libipw_crypt_info *info); +static void libipw_crypt_deinit_handler(struct timer_list *t); + +int libipw_crypt_info_init(struct libipw_crypt_info *info, char *name, + spinlock_t *lock) +{ + memset(info, 0, sizeof(*info)); + + info->name = name; + info->lock = lock; + + INIT_LIST_HEAD(&info->crypt_deinit_list); + timer_setup(&info->crypt_deinit_timer, libipw_crypt_deinit_handler, + 0); + + return 0; +} +EXPORT_SYMBOL(libipw_crypt_info_init); + +void libipw_crypt_info_free(struct libipw_crypt_info *info) +{ + int i; + + libipw_crypt_quiescing(info); + del_timer_sync(&info->crypt_deinit_timer); + libipw_crypt_deinit_entries(info, 1); + + for (i = 0; i < NUM_WEP_KEYS; i++) { + struct libipw_crypt_data *crypt = info->crypt[i]; + if (crypt) { + if (crypt->ops) { + crypt->ops->deinit(crypt->priv); + module_put(crypt->ops->owner); + } + kfree(crypt); + info->crypt[i] = NULL; + } + } +} +EXPORT_SYMBOL(libipw_crypt_info_free); + +static void libipw_crypt_deinit_entries(struct libipw_crypt_info *info, + int force) +{ + struct libipw_crypt_data *entry, *next; + unsigned long flags; + + spin_lock_irqsave(info->lock, flags); + list_for_each_entry_safe(entry, next, &info->crypt_deinit_list, list) { + if (atomic_read(&entry->refcnt) != 0 && !force) + continue; + + list_del(&entry->list); + + if (entry->ops) { + entry->ops->deinit(entry->priv); + module_put(entry->ops->owner); + } + kfree(entry); + } + spin_unlock_irqrestore(info->lock, flags); +} + +/* After this, crypt_deinit_list won't accept new members */ +static void libipw_crypt_quiescing(struct libipw_crypt_info *info) +{ + unsigned long flags; + + spin_lock_irqsave(info->lock, flags); + info->crypt_quiesced = 1; + spin_unlock_irqrestore(info->lock, flags); +} + +static void libipw_crypt_deinit_handler(struct timer_list *t) +{ + struct libipw_crypt_info *info = from_timer(info, t, + crypt_deinit_timer); + unsigned long flags; + + libipw_crypt_deinit_entries(info, 0); + + spin_lock_irqsave(info->lock, flags); + if (!list_empty(&info->crypt_deinit_list) && !info->crypt_quiesced) { + printk(KERN_DEBUG "%s: entries remaining in delayed crypt " + "deletion list\n", info->name); + info->crypt_deinit_timer.expires = jiffies + HZ; + add_timer(&info->crypt_deinit_timer); + } + spin_unlock_irqrestore(info->lock, flags); +} + +void libipw_crypt_delayed_deinit(struct libipw_crypt_info *info, + struct libipw_crypt_data **crypt) +{ + struct libipw_crypt_data *tmp; + unsigned long flags; + + if (*crypt == NULL) + return; + + tmp = *crypt; + *crypt = NULL; + + /* must not run ops->deinit() while there may be pending encrypt or + * decrypt operations. Use a list of delayed deinits to avoid needing + * locking. */ + + spin_lock_irqsave(info->lock, flags); + if (!info->crypt_quiesced) { + list_add(&tmp->list, &info->crypt_deinit_list); + if (!timer_pending(&info->crypt_deinit_timer)) { + info->crypt_deinit_timer.expires = jiffies + HZ; + add_timer(&info->crypt_deinit_timer); + } + } + spin_unlock_irqrestore(info->lock, flags); +} +EXPORT_SYMBOL(libipw_crypt_delayed_deinit); + +int libipw_register_crypto_ops(const struct libipw_crypto_ops *ops) +{ + unsigned long flags; + struct libipw_crypto_alg *alg; + + alg = kzalloc(sizeof(*alg), GFP_KERNEL); + if (alg == NULL) + return -ENOMEM; + + alg->ops = ops; + + spin_lock_irqsave(&libipw_crypto_lock, flags); + list_add(&alg->list, &libipw_crypto_algs); + spin_unlock_irqrestore(&libipw_crypto_lock, flags); + + printk(KERN_DEBUG "libipw_crypt: registered algorithm '%s'\n", + ops->name); + + return 0; +} +EXPORT_SYMBOL(libipw_register_crypto_ops); + +int libipw_unregister_crypto_ops(const struct libipw_crypto_ops *ops) +{ + struct libipw_crypto_alg *alg; + unsigned long flags; + + spin_lock_irqsave(&libipw_crypto_lock, flags); + list_for_each_entry(alg, &libipw_crypto_algs, list) { + if (alg->ops == ops) + goto found; + } + spin_unlock_irqrestore(&libipw_crypto_lock, flags); + return -EINVAL; + + found: + printk(KERN_DEBUG "libipw_crypt: unregistered algorithm '%s'\n", + ops->name); + list_del(&alg->list); + spin_unlock_irqrestore(&libipw_crypto_lock, flags); + kfree(alg); + return 0; +} +EXPORT_SYMBOL(libipw_unregister_crypto_ops); + +const struct libipw_crypto_ops *libipw_get_crypto_ops(const char *name) +{ + struct libipw_crypto_alg *alg; + unsigned long flags; + + spin_lock_irqsave(&libipw_crypto_lock, flags); + list_for_each_entry(alg, &libipw_crypto_algs, list) { + if (strcmp(alg->ops->name, name) == 0) + goto found; + } + spin_unlock_irqrestore(&libipw_crypto_lock, flags); + return NULL; + + found: + spin_unlock_irqrestore(&libipw_crypto_lock, flags); + return alg->ops; +} +EXPORT_SYMBOL(libipw_get_crypto_ops); + +static void *libipw_crypt_null_init(int keyidx) +{ + return (void *)1; +} + +static void libipw_crypt_null_deinit(void *priv) +{ +} + +static const struct libipw_crypto_ops libipw_crypt_null = { + .name = "NULL", + .init = libipw_crypt_null_init, + .deinit = libipw_crypt_null_deinit, + .owner = THIS_MODULE, +}; + +int __init libipw_crypto_init(void) +{ + return libipw_register_crypto_ops(&libipw_crypt_null); +} + +void libipw_crypto_exit(void) +{ + libipw_unregister_crypto_ops(&libipw_crypt_null); + BUG_ON(!list_empty(&libipw_crypto_algs)); +} diff --git a/drivers/net/wireless/intel/ipw2x00/libipw_crypto_ccmp.c b/drivers/net/wireless/intel/ipw2x00/libipw_crypto_ccmp.c new file mode 100644 index 000000000000..bf900d8c8ad3 --- /dev/null +++ b/drivers/net/wireless/intel/ipw2x00/libipw_crypto_ccmp.c @@ -0,0 +1,438 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * libipw crypt: host-based CCMP encryption implementation for libipw + * + * Copyright (c) 2003-2004, Jouni Malinen + * Copyright (c) 2008, John W. Linville + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "libipw.h" + +#define AES_BLOCK_LEN 16 +#define CCMP_HDR_LEN 8 +#define CCMP_MIC_LEN 8 +#define CCMP_TK_LEN 16 +#define CCMP_PN_LEN 6 + +struct libipw_ccmp_data { + u8 key[CCMP_TK_LEN]; + int key_set; + + u8 tx_pn[CCMP_PN_LEN]; + u8 rx_pn[CCMP_PN_LEN]; + + u32 dot11RSNAStatsCCMPFormatErrors; + u32 dot11RSNAStatsCCMPReplays; + u32 dot11RSNAStatsCCMPDecryptErrors; + + int key_idx; + + struct crypto_aead *tfm; + + /* scratch buffers for virt_to_page() (crypto API) */ + u8 tx_aad[2 * AES_BLOCK_LEN]; + u8 rx_aad[2 * AES_BLOCK_LEN]; +}; + +static void *libipw_ccmp_init(int key_idx) +{ + struct libipw_ccmp_data *priv; + + priv = kzalloc(sizeof(*priv), GFP_ATOMIC); + if (priv == NULL) + goto fail; + priv->key_idx = key_idx; + + priv->tfm = crypto_alloc_aead("ccm(aes)", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(priv->tfm)) { + priv->tfm = NULL; + goto fail; + } + + return priv; + + fail: + if (priv) { + if (priv->tfm) + crypto_free_aead(priv->tfm); + kfree(priv); + } + + return NULL; +} + +static void libipw_ccmp_deinit(void *priv) +{ + struct libipw_ccmp_data *_priv = priv; + if (_priv && _priv->tfm) + crypto_free_aead(_priv->tfm); + kfree(priv); +} + +static int ccmp_init_iv_and_aad(const struct ieee80211_hdr *hdr, + const u8 *pn, u8 *iv, u8 *aad) +{ + u8 *pos, qc = 0; + size_t aad_len; + int a4_included, qc_included; + + a4_included = ieee80211_has_a4(hdr->frame_control); + qc_included = ieee80211_is_data_qos(hdr->frame_control); + + aad_len = 22; + if (a4_included) + aad_len += 6; + if (qc_included) { + pos = (u8 *) & hdr->addr4; + if (a4_included) + pos += 6; + qc = *pos & 0x0f; + aad_len += 2; + } + + /* In CCM, the initial vectors (IV) used for CTR mode encryption and CBC + * mode authentication are not allowed to collide, yet both are derived + * from the same vector. We only set L := 1 here to indicate that the + * data size can be represented in (L+1) bytes. The CCM layer will take + * care of storing the data length in the top (L+1) bytes and setting + * and clearing the other bits as is required to derive the two IVs. + */ + iv[0] = 0x1; + + /* Nonce: QC | A2 | PN */ + iv[1] = qc; + memcpy(iv + 2, hdr->addr2, ETH_ALEN); + memcpy(iv + 8, pn, CCMP_PN_LEN); + + /* AAD: + * FC with bits 4..6 and 11..13 masked to zero; 14 is always one + * A1 | A2 | A3 + * SC with bits 4..15 (seq#) masked to zero + * A4 (if present) + * QC (if present) + */ + pos = (u8 *) hdr; + aad[0] = pos[0] & 0x8f; + aad[1] = pos[1] & 0xc7; + memcpy(aad + 2, &hdr->addrs, 3 * ETH_ALEN); + pos = (u8 *) & hdr->seq_ctrl; + aad[20] = pos[0] & 0x0f; + aad[21] = 0; /* all bits masked */ + memset(aad + 22, 0, 8); + if (a4_included) + memcpy(aad + 22, hdr->addr4, ETH_ALEN); + if (qc_included) { + aad[a4_included ? 28 : 22] = qc; + /* rest of QC masked */ + } + return aad_len; +} + +static int libipw_ccmp_hdr(struct sk_buff *skb, int hdr_len, + u8 *aeskey, int keylen, void *priv) +{ + struct libipw_ccmp_data *key = priv; + int i; + u8 *pos; + + if (skb_headroom(skb) < CCMP_HDR_LEN || skb->len < hdr_len) + return -1; + + if (aeskey != NULL && keylen >= CCMP_TK_LEN) + memcpy(aeskey, key->key, CCMP_TK_LEN); + + pos = skb_push(skb, CCMP_HDR_LEN); + memmove(pos, pos + CCMP_HDR_LEN, hdr_len); + pos += hdr_len; + + i = CCMP_PN_LEN - 1; + while (i >= 0) { + key->tx_pn[i]++; + if (key->tx_pn[i] != 0) + break; + i--; + } + + *pos++ = key->tx_pn[5]; + *pos++ = key->tx_pn[4]; + *pos++ = 0; + *pos++ = (key->key_idx << 6) | (1 << 5) /* Ext IV included */ ; + *pos++ = key->tx_pn[3]; + *pos++ = key->tx_pn[2]; + *pos++ = key->tx_pn[1]; + *pos++ = key->tx_pn[0]; + + return CCMP_HDR_LEN; +} + +static int libipw_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv) +{ + struct libipw_ccmp_data *key = priv; + struct ieee80211_hdr *hdr; + struct aead_request *req; + struct scatterlist sg[2]; + u8 *aad = key->tx_aad; + u8 iv[AES_BLOCK_LEN]; + int len, data_len, aad_len; + int ret; + + if (skb_tailroom(skb) < CCMP_MIC_LEN || skb->len < hdr_len) + return -1; + + data_len = skb->len - hdr_len; + len = libipw_ccmp_hdr(skb, hdr_len, NULL, 0, priv); + if (len < 0) + return -1; + + req = aead_request_alloc(key->tfm, GFP_ATOMIC); + if (!req) + return -ENOMEM; + + hdr = (struct ieee80211_hdr *)skb->data; + aad_len = ccmp_init_iv_and_aad(hdr, key->tx_pn, iv, aad); + + skb_put(skb, CCMP_MIC_LEN); + + sg_init_table(sg, 2); + sg_set_buf(&sg[0], aad, aad_len); + sg_set_buf(&sg[1], skb->data + hdr_len + CCMP_HDR_LEN, + data_len + CCMP_MIC_LEN); + + aead_request_set_callback(req, 0, NULL, NULL); + aead_request_set_ad(req, aad_len); + aead_request_set_crypt(req, sg, sg, data_len, iv); + + ret = crypto_aead_encrypt(req); + aead_request_free(req); + + return ret; +} + +/* + * deal with seq counter wrapping correctly. + * refer to timer_after() for jiffies wrapping handling + */ +static inline int ccmp_replay_check(u8 *pn_n, u8 *pn_o) +{ + u32 iv32_n, iv16_n; + u32 iv32_o, iv16_o; + + iv32_n = (pn_n[0] << 24) | (pn_n[1] << 16) | (pn_n[2] << 8) | pn_n[3]; + iv16_n = (pn_n[4] << 8) | pn_n[5]; + + iv32_o = (pn_o[0] << 24) | (pn_o[1] << 16) | (pn_o[2] << 8) | pn_o[3]; + iv16_o = (pn_o[4] << 8) | pn_o[5]; + + if ((s32)iv32_n - (s32)iv32_o < 0 || + (iv32_n == iv32_o && iv16_n <= iv16_o)) + return 1; + return 0; +} + +static int libipw_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) +{ + struct libipw_ccmp_data *key = priv; + u8 keyidx, *pos; + struct ieee80211_hdr *hdr; + struct aead_request *req; + struct scatterlist sg[2]; + u8 *aad = key->rx_aad; + u8 iv[AES_BLOCK_LEN]; + u8 pn[6]; + int aad_len, ret; + size_t data_len = skb->len - hdr_len - CCMP_HDR_LEN; + + if (skb->len < hdr_len + CCMP_HDR_LEN + CCMP_MIC_LEN) { + key->dot11RSNAStatsCCMPFormatErrors++; + return -1; + } + + hdr = (struct ieee80211_hdr *)skb->data; + pos = skb->data + hdr_len; + keyidx = pos[3]; + if (!(keyidx & (1 << 5))) { + net_dbg_ratelimited("CCMP: received packet without ExtIV flag from %pM\n", + hdr->addr2); + key->dot11RSNAStatsCCMPFormatErrors++; + return -2; + } + keyidx >>= 6; + if (key->key_idx != keyidx) { + net_dbg_ratelimited("CCMP: RX tkey->key_idx=%d frame keyidx=%d\n", + key->key_idx, keyidx); + return -6; + } + if (!key->key_set) { + net_dbg_ratelimited("CCMP: received packet from %pM with keyid=%d that does not have a configured key\n", + hdr->addr2, keyidx); + return -3; + } + + pn[0] = pos[7]; + pn[1] = pos[6]; + pn[2] = pos[5]; + pn[3] = pos[4]; + pn[4] = pos[1]; + pn[5] = pos[0]; + pos += 8; + + if (ccmp_replay_check(pn, key->rx_pn)) { +#ifdef CONFIG_LIBIPW_DEBUG + net_dbg_ratelimited("CCMP: replay detected: STA=%pM previous PN %02x%02x%02x%02x%02x%02x received PN %02x%02x%02x%02x%02x%02x\n", + hdr->addr2, + key->rx_pn[0], key->rx_pn[1], key->rx_pn[2], + key->rx_pn[3], key->rx_pn[4], key->rx_pn[5], + pn[0], pn[1], pn[2], pn[3], pn[4], pn[5]); +#endif + key->dot11RSNAStatsCCMPReplays++; + return -4; + } + + req = aead_request_alloc(key->tfm, GFP_ATOMIC); + if (!req) + return -ENOMEM; + + aad_len = ccmp_init_iv_and_aad(hdr, pn, iv, aad); + + sg_init_table(sg, 2); + sg_set_buf(&sg[0], aad, aad_len); + sg_set_buf(&sg[1], pos, data_len); + + aead_request_set_callback(req, 0, NULL, NULL); + aead_request_set_ad(req, aad_len); + aead_request_set_crypt(req, sg, sg, data_len, iv); + + ret = crypto_aead_decrypt(req); + aead_request_free(req); + + if (ret) { + net_dbg_ratelimited("CCMP: decrypt failed: STA=%pM (%d)\n", + hdr->addr2, ret); + key->dot11RSNAStatsCCMPDecryptErrors++; + return -5; + } + + memcpy(key->rx_pn, pn, CCMP_PN_LEN); + + /* Remove hdr and MIC */ + memmove(skb->data + CCMP_HDR_LEN, skb->data, hdr_len); + skb_pull(skb, CCMP_HDR_LEN); + skb_trim(skb, skb->len - CCMP_MIC_LEN); + + return keyidx; +} + +static int libipw_ccmp_set_key(void *key, int len, u8 * seq, void *priv) +{ + struct libipw_ccmp_data *data = priv; + int keyidx; + struct crypto_aead *tfm = data->tfm; + + keyidx = data->key_idx; + memset(data, 0, sizeof(*data)); + data->key_idx = keyidx; + data->tfm = tfm; + if (len == CCMP_TK_LEN) { + memcpy(data->key, key, CCMP_TK_LEN); + data->key_set = 1; + if (seq) { + data->rx_pn[0] = seq[5]; + data->rx_pn[1] = seq[4]; + data->rx_pn[2] = seq[3]; + data->rx_pn[3] = seq[2]; + data->rx_pn[4] = seq[1]; + data->rx_pn[5] = seq[0]; + } + if (crypto_aead_setauthsize(data->tfm, CCMP_MIC_LEN) || + crypto_aead_setkey(data->tfm, data->key, CCMP_TK_LEN)) + return -1; + } else if (len == 0) + data->key_set = 0; + else + return -1; + + return 0; +} + +static int libipw_ccmp_get_key(void *key, int len, u8 * seq, void *priv) +{ + struct libipw_ccmp_data *data = priv; + + if (len < CCMP_TK_LEN) + return -1; + + if (!data->key_set) + return 0; + memcpy(key, data->key, CCMP_TK_LEN); + + if (seq) { + seq[0] = data->tx_pn[5]; + seq[1] = data->tx_pn[4]; + seq[2] = data->tx_pn[3]; + seq[3] = data->tx_pn[2]; + seq[4] = data->tx_pn[1]; + seq[5] = data->tx_pn[0]; + } + + return CCMP_TK_LEN; +} + +static void libipw_ccmp_print_stats(struct seq_file *m, void *priv) +{ + struct libipw_ccmp_data *ccmp = priv; + + seq_printf(m, + "key[%d] alg=CCMP key_set=%d " + "tx_pn=%02x%02x%02x%02x%02x%02x " + "rx_pn=%02x%02x%02x%02x%02x%02x " + "format_errors=%d replays=%d decrypt_errors=%d\n", + ccmp->key_idx, ccmp->key_set, + ccmp->tx_pn[0], ccmp->tx_pn[1], ccmp->tx_pn[2], + ccmp->tx_pn[3], ccmp->tx_pn[4], ccmp->tx_pn[5], + ccmp->rx_pn[0], ccmp->rx_pn[1], ccmp->rx_pn[2], + ccmp->rx_pn[3], ccmp->rx_pn[4], ccmp->rx_pn[5], + ccmp->dot11RSNAStatsCCMPFormatErrors, + ccmp->dot11RSNAStatsCCMPReplays, + ccmp->dot11RSNAStatsCCMPDecryptErrors); +} + +static const struct libipw_crypto_ops libipw_crypt_ccmp = { + .name = "CCMP", + .init = libipw_ccmp_init, + .deinit = libipw_ccmp_deinit, + .encrypt_mpdu = libipw_ccmp_encrypt, + .decrypt_mpdu = libipw_ccmp_decrypt, + .encrypt_msdu = NULL, + .decrypt_msdu = NULL, + .set_key = libipw_ccmp_set_key, + .get_key = libipw_ccmp_get_key, + .print_stats = libipw_ccmp_print_stats, + .extra_mpdu_prefix_len = CCMP_HDR_LEN, + .extra_mpdu_postfix_len = CCMP_MIC_LEN, + .owner = THIS_MODULE, +}; + +int __init libipw_crypto_ccmp_init(void) +{ + return libipw_register_crypto_ops(&libipw_crypt_ccmp); +} + +void libipw_crypto_ccmp_exit(void) +{ + libipw_unregister_crypto_ops(&libipw_crypt_ccmp); +} diff --git a/drivers/net/wireless/intel/ipw2x00/libipw_crypto_tkip.c b/drivers/net/wireless/intel/ipw2x00/libipw_crypto_tkip.c new file mode 100644 index 000000000000..32288697da4f --- /dev/null +++ b/drivers/net/wireless/intel/ipw2x00/libipw_crypto_tkip.c @@ -0,0 +1,728 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * libipw crypt: host-based TKIP encryption implementation for libipw + * + * Copyright (c) 2003-2004, Jouni Malinen + * Copyright (c) 2008, John W. Linville + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "libipw.h" + +#define TKIP_HDR_LEN 8 + +struct libipw_tkip_data { +#define TKIP_KEY_LEN 32 + u8 key[TKIP_KEY_LEN]; + int key_set; + + u32 tx_iv32; + u16 tx_iv16; + u16 tx_ttak[5]; + int tx_phase1_done; + + u32 rx_iv32; + u16 rx_iv16; + u16 rx_ttak[5]; + int rx_phase1_done; + u32 rx_iv32_new; + u16 rx_iv16_new; + + u32 dot11RSNAStatsTKIPReplays; + u32 dot11RSNAStatsTKIPICVErrors; + u32 dot11RSNAStatsTKIPLocalMICFailures; + + int key_idx; + + struct arc4_ctx rx_ctx_arc4; + struct arc4_ctx tx_ctx_arc4; + struct crypto_shash *rx_tfm_michael; + struct crypto_shash *tx_tfm_michael; + + /* scratch buffers for virt_to_page() (crypto API) */ + u8 rx_hdr[16], tx_hdr[16]; + + unsigned long flags; +}; + +static unsigned long libipw_tkip_set_flags(unsigned long flags, void *priv) +{ + struct libipw_tkip_data *_priv = priv; + unsigned long old_flags = _priv->flags; + _priv->flags = flags; + return old_flags; +} + +static unsigned long libipw_tkip_get_flags(void *priv) +{ + struct libipw_tkip_data *_priv = priv; + return _priv->flags; +} + +static void *libipw_tkip_init(int key_idx) +{ + struct libipw_tkip_data *priv; + + if (fips_enabled) + return NULL; + + priv = kzalloc(sizeof(*priv), GFP_ATOMIC); + if (priv == NULL) + goto fail; + + priv->key_idx = key_idx; + + priv->tx_tfm_michael = crypto_alloc_shash("michael_mic", 0, 0); + if (IS_ERR(priv->tx_tfm_michael)) { + priv->tx_tfm_michael = NULL; + goto fail; + } + + priv->rx_tfm_michael = crypto_alloc_shash("michael_mic", 0, 0); + if (IS_ERR(priv->rx_tfm_michael)) { + priv->rx_tfm_michael = NULL; + goto fail; + } + + return priv; + + fail: + if (priv) { + crypto_free_shash(priv->tx_tfm_michael); + crypto_free_shash(priv->rx_tfm_michael); + kfree(priv); + } + + return NULL; +} + +static void libipw_tkip_deinit(void *priv) +{ + struct libipw_tkip_data *_priv = priv; + if (_priv) { + crypto_free_shash(_priv->tx_tfm_michael); + crypto_free_shash(_priv->rx_tfm_michael); + } + kfree_sensitive(priv); +} + +static inline u16 RotR1(u16 val) +{ + return (val >> 1) | (val << 15); +} + +static inline u8 Lo8(u16 val) +{ + return val & 0xff; +} + +static inline u8 Hi8(u16 val) +{ + return val >> 8; +} + +static inline u16 Lo16(u32 val) +{ + return val & 0xffff; +} + +static inline u16 Hi16(u32 val) +{ + return val >> 16; +} + +static inline u16 Mk16(u8 hi, u8 lo) +{ + return lo | (((u16) hi) << 8); +} + +static inline u16 Mk16_le(__le16 * v) +{ + return le16_to_cpu(*v); +} + +static const u16 Sbox[256] = { + 0xC6A5, 0xF884, 0xEE99, 0xF68D, 0xFF0D, 0xD6BD, 0xDEB1, 0x9154, + 0x6050, 0x0203, 0xCEA9, 0x567D, 0xE719, 0xB562, 0x4DE6, 0xEC9A, + 0x8F45, 0x1F9D, 0x8940, 0xFA87, 0xEF15, 0xB2EB, 0x8EC9, 0xFB0B, + 0x41EC, 0xB367, 0x5FFD, 0x45EA, 0x23BF, 0x53F7, 0xE496, 0x9B5B, + 0x75C2, 0xE11C, 0x3DAE, 0x4C6A, 0x6C5A, 0x7E41, 0xF502, 0x834F, + 0x685C, 0x51F4, 0xD134, 0xF908, 0xE293, 0xAB73, 0x6253, 0x2A3F, + 0x080C, 0x9552, 0x4665, 0x9D5E, 0x3028, 0x37A1, 0x0A0F, 0x2FB5, + 0x0E09, 0x2436, 0x1B9B, 0xDF3D, 0xCD26, 0x4E69, 0x7FCD, 0xEA9F, + 0x121B, 0x1D9E, 0x5874, 0x342E, 0x362D, 0xDCB2, 0xB4EE, 0x5BFB, + 0xA4F6, 0x764D, 0xB761, 0x7DCE, 0x527B, 0xDD3E, 0x5E71, 0x1397, + 0xA6F5, 0xB968, 0x0000, 0xC12C, 0x4060, 0xE31F, 0x79C8, 0xB6ED, + 0xD4BE, 0x8D46, 0x67D9, 0x724B, 0x94DE, 0x98D4, 0xB0E8, 0x854A, + 0xBB6B, 0xC52A, 0x4FE5, 0xED16, 0x86C5, 0x9AD7, 0x6655, 0x1194, + 0x8ACF, 0xE910, 0x0406, 0xFE81, 0xA0F0, 0x7844, 0x25BA, 0x4BE3, + 0xA2F3, 0x5DFE, 0x80C0, 0x058A, 0x3FAD, 0x21BC, 0x7048, 0xF104, + 0x63DF, 0x77C1, 0xAF75, 0x4263, 0x2030, 0xE51A, 0xFD0E, 0xBF6D, + 0x814C, 0x1814, 0x2635, 0xC32F, 0xBEE1, 0x35A2, 0x88CC, 0x2E39, + 0x9357, 0x55F2, 0xFC82, 0x7A47, 0xC8AC, 0xBAE7, 0x322B, 0xE695, + 0xC0A0, 0x1998, 0x9ED1, 0xA37F, 0x4466, 0x547E, 0x3BAB, 0x0B83, + 0x8CCA, 0xC729, 0x6BD3, 0x283C, 0xA779, 0xBCE2, 0x161D, 0xAD76, + 0xDB3B, 0x6456, 0x744E, 0x141E, 0x92DB, 0x0C0A, 0x486C, 0xB8E4, + 0x9F5D, 0xBD6E, 0x43EF, 0xC4A6, 0x39A8, 0x31A4, 0xD337, 0xF28B, + 0xD532, 0x8B43, 0x6E59, 0xDAB7, 0x018C, 0xB164, 0x9CD2, 0x49E0, + 0xD8B4, 0xACFA, 0xF307, 0xCF25, 0xCAAF, 0xF48E, 0x47E9, 0x1018, + 0x6FD5, 0xF088, 0x4A6F, 0x5C72, 0x3824, 0x57F1, 0x73C7, 0x9751, + 0xCB23, 0xA17C, 0xE89C, 0x3E21, 0x96DD, 0x61DC, 0x0D86, 0x0F85, + 0xE090, 0x7C42, 0x71C4, 0xCCAA, 0x90D8, 0x0605, 0xF701, 0x1C12, + 0xC2A3, 0x6A5F, 0xAEF9, 0x69D0, 0x1791, 0x9958, 0x3A27, 0x27B9, + 0xD938, 0xEB13, 0x2BB3, 0x2233, 0xD2BB, 0xA970, 0x0789, 0x33A7, + 0x2DB6, 0x3C22, 0x1592, 0xC920, 0x8749, 0xAAFF, 0x5078, 0xA57A, + 0x038F, 0x59F8, 0x0980, 0x1A17, 0x65DA, 0xD731, 0x84C6, 0xD0B8, + 0x82C3, 0x29B0, 0x5A77, 0x1E11, 0x7BCB, 0xA8FC, 0x6DD6, 0x2C3A, +}; + +static inline u16 _S_(u16 v) +{ + u16 t = Sbox[Hi8(v)]; + return Sbox[Lo8(v)] ^ ((t << 8) | (t >> 8)); +} + +#define PHASE1_LOOP_COUNT 8 + +static void tkip_mixing_phase1(u16 * TTAK, const u8 * TK, const u8 * TA, + u32 IV32) +{ + int i, j; + + /* Initialize the 80-bit TTAK from TSC (IV32) and TA[0..5] */ + TTAK[0] = Lo16(IV32); + TTAK[1] = Hi16(IV32); + TTAK[2] = Mk16(TA[1], TA[0]); + TTAK[3] = Mk16(TA[3], TA[2]); + TTAK[4] = Mk16(TA[5], TA[4]); + + for (i = 0; i < PHASE1_LOOP_COUNT; i++) { + j = 2 * (i & 1); + TTAK[0] += _S_(TTAK[4] ^ Mk16(TK[1 + j], TK[0 + j])); + TTAK[1] += _S_(TTAK[0] ^ Mk16(TK[5 + j], TK[4 + j])); + TTAK[2] += _S_(TTAK[1] ^ Mk16(TK[9 + j], TK[8 + j])); + TTAK[3] += _S_(TTAK[2] ^ Mk16(TK[13 + j], TK[12 + j])); + TTAK[4] += _S_(TTAK[3] ^ Mk16(TK[1 + j], TK[0 + j])) + i; + } +} + +static void tkip_mixing_phase2(u8 * WEPSeed, const u8 * TK, const u16 * TTAK, + u16 IV16) +{ + /* Make temporary area overlap WEP seed so that the final copy can be + * avoided on little endian hosts. */ + u16 *PPK = (u16 *) & WEPSeed[4]; + + /* Step 1 - make copy of TTAK and bring in TSC */ + PPK[0] = TTAK[0]; + PPK[1] = TTAK[1]; + PPK[2] = TTAK[2]; + PPK[3] = TTAK[3]; + PPK[4] = TTAK[4]; + PPK[5] = TTAK[4] + IV16; + + /* Step 2 - 96-bit bijective mixing using S-box */ + PPK[0] += _S_(PPK[5] ^ Mk16_le((__le16 *) & TK[0])); + PPK[1] += _S_(PPK[0] ^ Mk16_le((__le16 *) & TK[2])); + PPK[2] += _S_(PPK[1] ^ Mk16_le((__le16 *) & TK[4])); + PPK[3] += _S_(PPK[2] ^ Mk16_le((__le16 *) & TK[6])); + PPK[4] += _S_(PPK[3] ^ Mk16_le((__le16 *) & TK[8])); + PPK[5] += _S_(PPK[4] ^ Mk16_le((__le16 *) & TK[10])); + + PPK[0] += RotR1(PPK[5] ^ Mk16_le((__le16 *) & TK[12])); + PPK[1] += RotR1(PPK[0] ^ Mk16_le((__le16 *) & TK[14])); + PPK[2] += RotR1(PPK[1]); + PPK[3] += RotR1(PPK[2]); + PPK[4] += RotR1(PPK[3]); + PPK[5] += RotR1(PPK[4]); + + /* Step 3 - bring in last of TK bits, assign 24-bit WEP IV value + * WEPSeed[0..2] is transmitted as WEP IV */ + WEPSeed[0] = Hi8(IV16); + WEPSeed[1] = (Hi8(IV16) | 0x20) & 0x7F; + WEPSeed[2] = Lo8(IV16); + WEPSeed[3] = Lo8((PPK[5] ^ Mk16_le((__le16 *) & TK[0])) >> 1); + +#ifdef __BIG_ENDIAN + { + int i; + for (i = 0; i < 6; i++) + PPK[i] = (PPK[i] << 8) | (PPK[i] >> 8); + } +#endif +} + +static int libipw_tkip_hdr(struct sk_buff *skb, int hdr_len, + u8 * rc4key, int keylen, void *priv) +{ + struct libipw_tkip_data *tkey = priv; + u8 *pos; + struct ieee80211_hdr *hdr; + + hdr = (struct ieee80211_hdr *)skb->data; + + if (skb_headroom(skb) < TKIP_HDR_LEN || skb->len < hdr_len) + return -1; + + if (rc4key == NULL || keylen < 16) + return -1; + + if (!tkey->tx_phase1_done) { + tkip_mixing_phase1(tkey->tx_ttak, tkey->key, hdr->addr2, + tkey->tx_iv32); + tkey->tx_phase1_done = 1; + } + tkip_mixing_phase2(rc4key, tkey->key, tkey->tx_ttak, tkey->tx_iv16); + + pos = skb_push(skb, TKIP_HDR_LEN); + memmove(pos, pos + TKIP_HDR_LEN, hdr_len); + pos += hdr_len; + + *pos++ = *rc4key; + *pos++ = *(rc4key + 1); + *pos++ = *(rc4key + 2); + *pos++ = (tkey->key_idx << 6) | (1 << 5) /* Ext IV included */ ; + *pos++ = tkey->tx_iv32 & 0xff; + *pos++ = (tkey->tx_iv32 >> 8) & 0xff; + *pos++ = (tkey->tx_iv32 >> 16) & 0xff; + *pos++ = (tkey->tx_iv32 >> 24) & 0xff; + + tkey->tx_iv16++; + if (tkey->tx_iv16 == 0) { + tkey->tx_phase1_done = 0; + tkey->tx_iv32++; + } + + return TKIP_HDR_LEN; +} + +static int libipw_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv) +{ + struct libipw_tkip_data *tkey = priv; + int len; + u8 rc4key[16], *pos, *icv; + u32 crc; + + if (tkey->flags & IEEE80211_CRYPTO_TKIP_COUNTERMEASURES) { + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + net_dbg_ratelimited("TKIP countermeasures: dropped TX packet to %pM\n", + hdr->addr1); + return -1; + } + + if (skb_tailroom(skb) < 4 || skb->len < hdr_len) + return -1; + + len = skb->len - hdr_len; + pos = skb->data + hdr_len; + + if ((libipw_tkip_hdr(skb, hdr_len, rc4key, 16, priv)) < 0) + return -1; + + crc = ~crc32_le(~0, pos, len); + icv = skb_put(skb, 4); + icv[0] = crc; + icv[1] = crc >> 8; + icv[2] = crc >> 16; + icv[3] = crc >> 24; + + arc4_setkey(&tkey->tx_ctx_arc4, rc4key, 16); + arc4_crypt(&tkey->tx_ctx_arc4, pos, pos, len + 4); + + return 0; +} + +/* + * deal with seq counter wrapping correctly. + * refer to timer_after() for jiffies wrapping handling + */ +static inline int tkip_replay_check(u32 iv32_n, u16 iv16_n, + u32 iv32_o, u16 iv16_o) +{ + if ((s32)iv32_n - (s32)iv32_o < 0 || + (iv32_n == iv32_o && iv16_n <= iv16_o)) + return 1; + return 0; +} + +static int libipw_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) +{ + struct libipw_tkip_data *tkey = priv; + u8 rc4key[16]; + u8 keyidx, *pos; + u32 iv32; + u16 iv16; + struct ieee80211_hdr *hdr; + u8 icv[4]; + u32 crc; + int plen; + + hdr = (struct ieee80211_hdr *)skb->data; + + if (tkey->flags & IEEE80211_CRYPTO_TKIP_COUNTERMEASURES) { + net_dbg_ratelimited("TKIP countermeasures: dropped received packet from %pM\n", + hdr->addr2); + return -1; + } + + if (skb->len < hdr_len + TKIP_HDR_LEN + 4) + return -1; + + pos = skb->data + hdr_len; + keyidx = pos[3]; + if (!(keyidx & (1 << 5))) { + net_dbg_ratelimited("TKIP: received packet without ExtIV flag from %pM\n", + hdr->addr2); + return -2; + } + keyidx >>= 6; + if (tkey->key_idx != keyidx) { + net_dbg_ratelimited("TKIP: RX tkey->key_idx=%d frame keyidx=%d\n", + tkey->key_idx, keyidx); + return -6; + } + if (!tkey->key_set) { + net_dbg_ratelimited("TKIP: received packet from %pM with keyid=%d that does not have a configured key\n", + hdr->addr2, keyidx); + return -3; + } + iv16 = (pos[0] << 8) | pos[2]; + iv32 = pos[4] | (pos[5] << 8) | (pos[6] << 16) | (pos[7] << 24); + pos += TKIP_HDR_LEN; + + if (tkip_replay_check(iv32, iv16, tkey->rx_iv32, tkey->rx_iv16)) { +#ifdef CONFIG_LIBIPW_DEBUG + net_dbg_ratelimited("TKIP: replay detected: STA=%pM previous TSC %08x%04x received TSC %08x%04x\n", + hdr->addr2, tkey->rx_iv32, tkey->rx_iv16, + iv32, iv16); +#endif + tkey->dot11RSNAStatsTKIPReplays++; + return -4; + } + + if (iv32 != tkey->rx_iv32 || !tkey->rx_phase1_done) { + tkip_mixing_phase1(tkey->rx_ttak, tkey->key, hdr->addr2, iv32); + tkey->rx_phase1_done = 1; + } + tkip_mixing_phase2(rc4key, tkey->key, tkey->rx_ttak, iv16); + + plen = skb->len - hdr_len - 12; + + arc4_setkey(&tkey->rx_ctx_arc4, rc4key, 16); + arc4_crypt(&tkey->rx_ctx_arc4, pos, pos, plen + 4); + + crc = ~crc32_le(~0, pos, plen); + icv[0] = crc; + icv[1] = crc >> 8; + icv[2] = crc >> 16; + icv[3] = crc >> 24; + if (memcmp(icv, pos + plen, 4) != 0) { + if (iv32 != tkey->rx_iv32) { + /* Previously cached Phase1 result was already lost, so + * it needs to be recalculated for the next packet. */ + tkey->rx_phase1_done = 0; + } +#ifdef CONFIG_LIBIPW_DEBUG + net_dbg_ratelimited("TKIP: ICV error detected: STA=%pM\n", + hdr->addr2); +#endif + tkey->dot11RSNAStatsTKIPICVErrors++; + return -5; + } + + /* Update real counters only after Michael MIC verification has + * completed */ + tkey->rx_iv32_new = iv32; + tkey->rx_iv16_new = iv16; + + /* Remove IV and ICV */ + memmove(skb->data + TKIP_HDR_LEN, skb->data, hdr_len); + skb_pull(skb, TKIP_HDR_LEN); + skb_trim(skb, skb->len - 4); + + return keyidx; +} + +static int michael_mic(struct crypto_shash *tfm_michael, u8 *key, u8 *hdr, + u8 *data, size_t data_len, u8 *mic) +{ + SHASH_DESC_ON_STACK(desc, tfm_michael); + int err; + + if (tfm_michael == NULL) { + pr_warn("%s(): tfm_michael == NULL\n", __func__); + return -1; + } + + desc->tfm = tfm_michael; + + if (crypto_shash_setkey(tfm_michael, key, 8)) + return -1; + + err = crypto_shash_init(desc); + if (err) + goto out; + err = crypto_shash_update(desc, hdr, 16); + if (err) + goto out; + err = crypto_shash_update(desc, data, data_len); + if (err) + goto out; + err = crypto_shash_final(desc, mic); + +out: + shash_desc_zero(desc); + return err; +} + +static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr) +{ + struct ieee80211_hdr *hdr11; + + hdr11 = (struct ieee80211_hdr *)skb->data; + + switch (le16_to_cpu(hdr11->frame_control) & + (IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS)) { + case IEEE80211_FCTL_TODS: + memcpy(hdr, hdr11->addr3, ETH_ALEN); /* DA */ + memcpy(hdr + ETH_ALEN, hdr11->addr2, ETH_ALEN); /* SA */ + break; + case IEEE80211_FCTL_FROMDS: + memcpy(hdr, hdr11->addr1, ETH_ALEN); /* DA */ + memcpy(hdr + ETH_ALEN, hdr11->addr3, ETH_ALEN); /* SA */ + break; + case IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS: + memcpy(hdr, hdr11->addr3, ETH_ALEN); /* DA */ + memcpy(hdr + ETH_ALEN, hdr11->addr4, ETH_ALEN); /* SA */ + break; + default: + memcpy(hdr, hdr11->addr1, ETH_ALEN); /* DA */ + memcpy(hdr + ETH_ALEN, hdr11->addr2, ETH_ALEN); /* SA */ + break; + } + + if (ieee80211_is_data_qos(hdr11->frame_control)) { + hdr[12] = le16_to_cpu(*((__le16 *)ieee80211_get_qos_ctl(hdr11))) + & IEEE80211_QOS_CTL_TID_MASK; + } else + hdr[12] = 0; /* priority */ + + hdr[13] = hdr[14] = hdr[15] = 0; /* reserved */ +} + +static int libipw_michael_mic_add(struct sk_buff *skb, int hdr_len, + void *priv) +{ + struct libipw_tkip_data *tkey = priv; + u8 *pos; + + if (skb_tailroom(skb) < 8 || skb->len < hdr_len) { + printk(KERN_DEBUG "Invalid packet for Michael MIC add " + "(tailroom=%d hdr_len=%d skb->len=%d)\n", + skb_tailroom(skb), hdr_len, skb->len); + return -1; + } + + michael_mic_hdr(skb, tkey->tx_hdr); + pos = skb_put(skb, 8); + if (michael_mic(tkey->tx_tfm_michael, &tkey->key[16], tkey->tx_hdr, + skb->data + hdr_len, skb->len - 8 - hdr_len, pos)) + return -1; + + return 0; +} + +static void libipw_michael_mic_failure(struct net_device *dev, + struct ieee80211_hdr *hdr, + int keyidx) +{ + union iwreq_data wrqu; + struct iw_michaelmicfailure ev; + + /* TODO: needed parameters: count, keyid, key type, TSC */ + memset(&ev, 0, sizeof(ev)); + ev.flags = keyidx & IW_MICFAILURE_KEY_ID; + if (hdr->addr1[0] & 0x01) + ev.flags |= IW_MICFAILURE_GROUP; + else + ev.flags |= IW_MICFAILURE_PAIRWISE; + ev.src_addr.sa_family = ARPHRD_ETHER; + memcpy(ev.src_addr.sa_data, hdr->addr2, ETH_ALEN); + memset(&wrqu, 0, sizeof(wrqu)); + wrqu.data.length = sizeof(ev); + wireless_send_event(dev, IWEVMICHAELMICFAILURE, &wrqu, (char *)&ev); +} + +static int libipw_michael_mic_verify(struct sk_buff *skb, int keyidx, + int hdr_len, void *priv) +{ + struct libipw_tkip_data *tkey = priv; + u8 mic[8]; + + if (!tkey->key_set) + return -1; + + michael_mic_hdr(skb, tkey->rx_hdr); + if (michael_mic(tkey->rx_tfm_michael, &tkey->key[24], tkey->rx_hdr, + skb->data + hdr_len, skb->len - 8 - hdr_len, mic)) + return -1; + if (memcmp(mic, skb->data + skb->len - 8, 8) != 0) { + struct ieee80211_hdr *hdr; + hdr = (struct ieee80211_hdr *)skb->data; + printk(KERN_DEBUG "%s: Michael MIC verification failed for " + "MSDU from %pM keyidx=%d\n", + skb->dev ? skb->dev->name : "N/A", hdr->addr2, + keyidx); + if (skb->dev) + libipw_michael_mic_failure(skb->dev, hdr, keyidx); + tkey->dot11RSNAStatsTKIPLocalMICFailures++; + return -1; + } + + /* Update TSC counters for RX now that the packet verification has + * completed. */ + tkey->rx_iv32 = tkey->rx_iv32_new; + tkey->rx_iv16 = tkey->rx_iv16_new; + + skb_trim(skb, skb->len - 8); + + return 0; +} + +static int libipw_tkip_set_key(void *key, int len, u8 * seq, void *priv) +{ + struct libipw_tkip_data *tkey = priv; + int keyidx; + struct crypto_shash *tfm = tkey->tx_tfm_michael; + struct arc4_ctx *tfm2 = &tkey->tx_ctx_arc4; + struct crypto_shash *tfm3 = tkey->rx_tfm_michael; + struct arc4_ctx *tfm4 = &tkey->rx_ctx_arc4; + + keyidx = tkey->key_idx; + memset(tkey, 0, sizeof(*tkey)); + tkey->key_idx = keyidx; + tkey->tx_tfm_michael = tfm; + tkey->tx_ctx_arc4 = *tfm2; + tkey->rx_tfm_michael = tfm3; + tkey->rx_ctx_arc4 = *tfm4; + if (len == TKIP_KEY_LEN) { + memcpy(tkey->key, key, TKIP_KEY_LEN); + tkey->key_set = 1; + tkey->tx_iv16 = 1; /* TSC is initialized to 1 */ + if (seq) { + tkey->rx_iv32 = (seq[5] << 24) | (seq[4] << 16) | + (seq[3] << 8) | seq[2]; + tkey->rx_iv16 = (seq[1] << 8) | seq[0]; + } + } else if (len == 0) + tkey->key_set = 0; + else + return -1; + + return 0; +} + +static int libipw_tkip_get_key(void *key, int len, u8 * seq, void *priv) +{ + struct libipw_tkip_data *tkey = priv; + + if (len < TKIP_KEY_LEN) + return -1; + + if (!tkey->key_set) + return 0; + memcpy(key, tkey->key, TKIP_KEY_LEN); + + if (seq) { + /* + * Not clear if this should return the value as is + * or - as the code previously seemed to partially + * have been written as - subtract one from it. It + * was working this way for a long time so leave it. + */ + seq[0] = tkey->tx_iv16; + seq[1] = tkey->tx_iv16 >> 8; + seq[2] = tkey->tx_iv32; + seq[3] = tkey->tx_iv32 >> 8; + seq[4] = tkey->tx_iv32 >> 16; + seq[5] = tkey->tx_iv32 >> 24; + } + + return TKIP_KEY_LEN; +} + +static void libipw_tkip_print_stats(struct seq_file *m, void *priv) +{ + struct libipw_tkip_data *tkip = priv; + seq_printf(m, + "key[%d] alg=TKIP key_set=%d " + "tx_pn=%02x%02x%02x%02x%02x%02x " + "rx_pn=%02x%02x%02x%02x%02x%02x " + "replays=%d icv_errors=%d local_mic_failures=%d\n", + tkip->key_idx, tkip->key_set, + (tkip->tx_iv32 >> 24) & 0xff, + (tkip->tx_iv32 >> 16) & 0xff, + (tkip->tx_iv32 >> 8) & 0xff, + tkip->tx_iv32 & 0xff, + (tkip->tx_iv16 >> 8) & 0xff, + tkip->tx_iv16 & 0xff, + (tkip->rx_iv32 >> 24) & 0xff, + (tkip->rx_iv32 >> 16) & 0xff, + (tkip->rx_iv32 >> 8) & 0xff, + tkip->rx_iv32 & 0xff, + (tkip->rx_iv16 >> 8) & 0xff, + tkip->rx_iv16 & 0xff, + tkip->dot11RSNAStatsTKIPReplays, + tkip->dot11RSNAStatsTKIPICVErrors, + tkip->dot11RSNAStatsTKIPLocalMICFailures); +} + +static const struct libipw_crypto_ops libipw_crypt_tkip = { + .name = "TKIP", + .init = libipw_tkip_init, + .deinit = libipw_tkip_deinit, + .encrypt_mpdu = libipw_tkip_encrypt, + .decrypt_mpdu = libipw_tkip_decrypt, + .encrypt_msdu = libipw_michael_mic_add, + .decrypt_msdu = libipw_michael_mic_verify, + .set_key = libipw_tkip_set_key, + .get_key = libipw_tkip_get_key, + .print_stats = libipw_tkip_print_stats, + .extra_mpdu_prefix_len = 4 + 4, /* IV + ExtIV */ + .extra_mpdu_postfix_len = 4, /* ICV */ + .extra_msdu_postfix_len = 8, /* MIC */ + .get_flags = libipw_tkip_get_flags, + .set_flags = libipw_tkip_set_flags, + .owner = THIS_MODULE, +}; + +int __init libipw_crypto_tkip_init(void) +{ + return libipw_register_crypto_ops(&libipw_crypt_tkip); +} + +void libipw_crypto_tkip_exit(void) +{ + libipw_unregister_crypto_ops(&libipw_crypt_tkip); +} diff --git a/drivers/net/wireless/intel/ipw2x00/libipw_crypto_wep.c b/drivers/net/wireless/intel/ipw2x00/libipw_crypto_wep.c new file mode 100644 index 000000000000..c3a4ccb9de17 --- /dev/null +++ b/drivers/net/wireless/intel/ipw2x00/libipw_crypto_wep.c @@ -0,0 +1,247 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * libipw crypt: host-based WEP encryption implementation for libipw + * + * Copyright (c) 2002-2004, Jouni Malinen + * Copyright (c) 2008, John W. Linville + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "libipw.h" + +struct libipw_wep_data { + u32 iv; +#define WEP_KEY_LEN 13 + u8 key[WEP_KEY_LEN + 1]; + u8 key_len; + u8 key_idx; + struct arc4_ctx tx_ctx; + struct arc4_ctx rx_ctx; +}; + +static void *libipw_wep_init(int keyidx) +{ + struct libipw_wep_data *priv; + + if (fips_enabled) + return NULL; + + priv = kzalloc(sizeof(*priv), GFP_ATOMIC); + if (priv == NULL) + return NULL; + priv->key_idx = keyidx; + + /* start WEP IV from a random value */ + get_random_bytes(&priv->iv, 4); + + return priv; +} + +static void libipw_wep_deinit(void *priv) +{ + kfree_sensitive(priv); +} + +/* Add WEP IV/key info to a frame that has at least 4 bytes of headroom */ +static int libipw_wep_build_iv(struct sk_buff *skb, int hdr_len, + u8 *key, int keylen, void *priv) +{ + struct libipw_wep_data *wep = priv; + u32 klen; + u8 *pos; + + if (skb_headroom(skb) < 4 || skb->len < hdr_len) + return -1; + + pos = skb_push(skb, 4); + memmove(pos, pos + 4, hdr_len); + pos += hdr_len; + + klen = 3 + wep->key_len; + + wep->iv++; + + /* Fluhrer, Mantin, and Shamir have reported weaknesses in the key + * scheduling algorithm of RC4. At least IVs (KeyByte + 3, 0xff, N) + * can be used to speedup attacks, so avoid using them. */ + if ((wep->iv & 0xff00) == 0xff00) { + u8 B = (wep->iv >> 16) & 0xff; + if (B >= 3 && B < klen) + wep->iv += 0x0100; + } + + /* Prepend 24-bit IV to RC4 key and TX frame */ + *pos++ = (wep->iv >> 16) & 0xff; + *pos++ = (wep->iv >> 8) & 0xff; + *pos++ = wep->iv & 0xff; + *pos++ = wep->key_idx << 6; + + return 0; +} + +/* Perform WEP encryption on given skb that has at least 4 bytes of headroom + * for IV and 4 bytes of tailroom for ICV. Both IV and ICV will be transmitted, + * so the payload length increases with 8 bytes. + * + * WEP frame payload: IV + TX key idx, RC4(data), ICV = RC4(CRC32(data)) + */ +static int libipw_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv) +{ + struct libipw_wep_data *wep = priv; + u32 crc, klen, len; + u8 *pos, *icv; + u8 key[WEP_KEY_LEN + 3]; + + /* other checks are in libipw_wep_build_iv */ + if (skb_tailroom(skb) < 4) + return -1; + + /* add the IV to the frame */ + if (libipw_wep_build_iv(skb, hdr_len, NULL, 0, priv)) + return -1; + + /* Copy the IV into the first 3 bytes of the key */ + skb_copy_from_linear_data_offset(skb, hdr_len, key, 3); + + /* Copy rest of the WEP key (the secret part) */ + memcpy(key + 3, wep->key, wep->key_len); + + len = skb->len - hdr_len - 4; + pos = skb->data + hdr_len + 4; + klen = 3 + wep->key_len; + + /* Append little-endian CRC32 over only the data and encrypt it to produce ICV */ + crc = ~crc32_le(~0, pos, len); + icv = skb_put(skb, 4); + icv[0] = crc; + icv[1] = crc >> 8; + icv[2] = crc >> 16; + icv[3] = crc >> 24; + + arc4_setkey(&wep->tx_ctx, key, klen); + arc4_crypt(&wep->tx_ctx, pos, pos, len + 4); + + return 0; +} + +/* Perform WEP decryption on given buffer. Buffer includes whole WEP part of + * the frame: IV (4 bytes), encrypted payload (including SNAP header), + * ICV (4 bytes). len includes both IV and ICV. + * + * Returns 0 if frame was decrypted successfully and ICV was correct and -1 on + * failure. If frame is OK, IV and ICV will be removed. + */ +static int libipw_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv) +{ + struct libipw_wep_data *wep = priv; + u32 crc, klen, plen; + u8 key[WEP_KEY_LEN + 3]; + u8 keyidx, *pos, icv[4]; + + if (skb->len < hdr_len + 8) + return -1; + + pos = skb->data + hdr_len; + key[0] = *pos++; + key[1] = *pos++; + key[2] = *pos++; + keyidx = *pos++ >> 6; + if (keyidx != wep->key_idx) + return -1; + + klen = 3 + wep->key_len; + + /* Copy rest of the WEP key (the secret part) */ + memcpy(key + 3, wep->key, wep->key_len); + + /* Apply RC4 to data and compute CRC32 over decrypted data */ + plen = skb->len - hdr_len - 8; + + arc4_setkey(&wep->rx_ctx, key, klen); + arc4_crypt(&wep->rx_ctx, pos, pos, plen + 4); + + crc = ~crc32_le(~0, pos, plen); + icv[0] = crc; + icv[1] = crc >> 8; + icv[2] = crc >> 16; + icv[3] = crc >> 24; + if (memcmp(icv, pos + plen, 4) != 0) { + /* ICV mismatch - drop frame */ + return -2; + } + + /* Remove IV and ICV */ + memmove(skb->data + 4, skb->data, hdr_len); + skb_pull(skb, 4); + skb_trim(skb, skb->len - 4); + + return 0; +} + +static int libipw_wep_set_key(void *key, int len, u8 * seq, void *priv) +{ + struct libipw_wep_data *wep = priv; + + if (len < 0 || len > WEP_KEY_LEN) + return -1; + + memcpy(wep->key, key, len); + wep->key_len = len; + + return 0; +} + +static int libipw_wep_get_key(void *key, int len, u8 * seq, void *priv) +{ + struct libipw_wep_data *wep = priv; + + if (len < wep->key_len) + return -1; + + memcpy(key, wep->key, wep->key_len); + + return wep->key_len; +} + +static void libipw_wep_print_stats(struct seq_file *m, void *priv) +{ + struct libipw_wep_data *wep = priv; + seq_printf(m, "key[%d] alg=WEP len=%d\n", wep->key_idx, wep->key_len); +} + +static const struct libipw_crypto_ops libipw_crypt_wep = { + .name = "WEP", + .init = libipw_wep_init, + .deinit = libipw_wep_deinit, + .encrypt_mpdu = libipw_wep_encrypt, + .decrypt_mpdu = libipw_wep_decrypt, + .encrypt_msdu = NULL, + .decrypt_msdu = NULL, + .set_key = libipw_wep_set_key, + .get_key = libipw_wep_get_key, + .print_stats = libipw_wep_print_stats, + .extra_mpdu_prefix_len = 4, /* IV */ + .extra_mpdu_postfix_len = 4, /* ICV */ + .owner = THIS_MODULE, +}; + +int __init libipw_crypto_wep_init(void) +{ + return libipw_register_crypto_ops(&libipw_crypt_wep); +} + +void __exit libipw_crypto_wep_exit(void) +{ + libipw_unregister_crypto_ops(&libipw_crypt_wep); +} diff --git a/drivers/net/wireless/intel/ipw2x00/libipw_module.c b/drivers/net/wireless/intel/ipw2x00/libipw_module.c index 43bab92a4148..0a16127bfd68 100644 --- a/drivers/net/wireless/intel/ipw2x00/libipw_module.c +++ b/drivers/net/wireless/intel/ipw2x00/libipw_module.c @@ -169,7 +169,7 @@ struct net_device *alloc_libipw(int sizeof_priv, int monitor) spin_lock_init(&ieee->lock); - lib80211_crypt_info_init(&ieee->crypt_info, dev->name, &ieee->lock); + libipw_crypt_info_init(&ieee->crypt_info, dev->name, &ieee->lock); ieee->wpa_enabled = 0; ieee->drop_unencrypted = 0; @@ -191,7 +191,7 @@ void free_libipw(struct net_device *dev, int monitor) { struct libipw_device *ieee = netdev_priv(dev); - lib80211_crypt_info_free(&ieee->crypt_info); + libipw_crypt_info_free(&ieee->crypt_info); libipw_networks_free(ieee); @@ -251,6 +251,7 @@ static const struct proc_ops debug_level_proc_ops = { static int __init libipw_init(void) { + int err; #ifdef CONFIG_LIBIPW_DEBUG struct proc_dir_entry *e; @@ -273,7 +274,33 @@ static int __init libipw_init(void) printk(KERN_INFO DRV_NAME ": " DRV_DESCRIPTION ", " DRV_VERSION "\n"); printk(KERN_INFO DRV_NAME ": " DRV_COPYRIGHT "\n"); + err = libipw_crypto_init(); + if (err) + goto remove_debugfs; + err = libipw_crypto_ccmp_init(); + if (err) + goto uninit_crypto; + err = libipw_crypto_tkip_init(); + if (err) + goto uninit_crypto_ccmp; + err = libipw_crypto_wep_init(); + if (err) + goto uninit_crypto_tkip; + return 0; +uninit_crypto_tkip: + libipw_crypto_tkip_exit(); +uninit_crypto_ccmp: + libipw_crypto_ccmp_exit(); +uninit_crypto: + libipw_crypto_exit(); +remove_debugfs: +#ifdef CONFIG_LIBIPW_DEBUG + remove_proc_entry("debug_level", libipw_proc); + remove_proc_entry(DRV_PROCNAME, init_net.proc_net); + libipw_proc = NULL; +#endif + return err; } static void __exit libipw_exit(void) @@ -285,6 +312,11 @@ static void __exit libipw_exit(void) libipw_proc = NULL; } #endif /* CONFIG_LIBIPW_DEBUG */ + + libipw_crypto_ccmp_exit(); + libipw_crypto_tkip_exit(); + libipw_crypto_wep_exit(); + libipw_crypto_exit(); } #ifdef CONFIG_LIBIPW_DEBUG diff --git a/drivers/net/wireless/intel/ipw2x00/libipw_rx.c b/drivers/net/wireless/intel/ipw2x00/libipw_rx.c index 48d6870bbf4e..1fe05e73a17c 100644 --- a/drivers/net/wireless/intel/ipw2x00/libipw_rx.c +++ b/drivers/net/wireless/intel/ipw2x00/libipw_rx.c @@ -27,9 +27,6 @@ #include #include #include - -#include - #include "libipw.h" static void libipw_monitor_rx(struct libipw_device *ieee, @@ -266,7 +263,7 @@ static int libipw_is_eapol_frame(struct libipw_device *ieee, /* Called only as a tasklet (software IRQ), by libipw_rx */ static int libipw_rx_frame_decrypt(struct libipw_device *ieee, struct sk_buff *skb, - struct lib80211_crypt_data *crypt) + struct libipw_crypt_data *crypt) { struct libipw_hdr_3addr *hdr; int res, hdrlen; @@ -298,7 +295,7 @@ libipw_rx_frame_decrypt(struct libipw_device *ieee, struct sk_buff *skb, static int libipw_rx_frame_decrypt_msdu(struct libipw_device *ieee, struct sk_buff *skb, int keyidx, - struct lib80211_crypt_data *crypt) + struct libipw_crypt_data *crypt) { struct libipw_hdr_3addr *hdr; int res, hdrlen; @@ -345,7 +342,7 @@ int libipw_rx(struct libipw_device *ieee, struct sk_buff *skb, #endif u8 dst[ETH_ALEN]; u8 src[ETH_ALEN]; - struct lib80211_crypt_data *crypt = NULL; + struct libipw_crypt_data *crypt = NULL; int keyidx = 0; int can_be_decrypted = 0; diff --git a/drivers/net/wireless/intel/ipw2x00/libipw_tx.c b/drivers/net/wireless/intel/ipw2x00/libipw_tx.c index e22a6732a4c3..80edaa3dea9c 100644 --- a/drivers/net/wireless/intel/ipw2x00/libipw_tx.c +++ b/drivers/net/wireless/intel/ipw2x00/libipw_tx.c @@ -138,7 +138,7 @@ static int libipw_copy_snap(u8 * data, __be16 h_proto) static int libipw_encrypt_fragment(struct libipw_device *ieee, struct sk_buff *frag, int hdr_len) { - struct lib80211_crypt_data *crypt = + struct libipw_crypt_data *crypt = ieee->crypt_info.crypt[ieee->crypt_info.tx_keyidx]; int res; @@ -255,7 +255,7 @@ netdev_tx_t libipw_xmit(struct sk_buff *skb, struct net_device *dev) .qos_ctl = 0 }; u8 dest[ETH_ALEN], src[ETH_ALEN]; - struct lib80211_crypt_data *crypt; + struct libipw_crypt_data *crypt; int priority = skb->priority; int snapped = 0; diff --git a/drivers/net/wireless/intel/ipw2x00/libipw_wx.c b/drivers/net/wireless/intel/ipw2x00/libipw_wx.c index dbc7153d0a3d..db71d81b0d4f 100644 --- a/drivers/net/wireless/intel/ipw2x00/libipw_wx.c +++ b/drivers/net/wireless/intel/ipw2x00/libipw_wx.c @@ -21,10 +21,7 @@ #include #include #include - -#include #include - #include "libipw.h" static const char *libipw_modes[] = { @@ -303,7 +300,7 @@ int libipw_wx_set_encode(struct libipw_device *ieee, .flags = 0 }; int i, key, key_provided, len; - struct lib80211_crypt_data **crypt; + struct libipw_crypt_data **crypt; int host_crypto = ieee->host_encrypt || ieee->host_decrypt; LIBIPW_DEBUG_WX("SET_ENCODE\n"); @@ -328,7 +325,7 @@ int libipw_wx_set_encode(struct libipw_device *ieee, if (key_provided && *crypt) { LIBIPW_DEBUG_WX("Disabling encryption on key %d.\n", key); - lib80211_crypt_delayed_deinit(&ieee->crypt_info, crypt); + libipw_crypt_delayed_deinit(&ieee->crypt_info, crypt); } else LIBIPW_DEBUG_WX("Disabling encryption.\n"); @@ -338,7 +335,7 @@ int libipw_wx_set_encode(struct libipw_device *ieee, if (ieee->crypt_info.crypt[i] != NULL) { if (key_provided) break; - lib80211_crypt_delayed_deinit(&ieee->crypt_info, + libipw_crypt_delayed_deinit(&ieee->crypt_info, &ieee->crypt_info.crypt[i]); } } @@ -361,21 +358,21 @@ int libipw_wx_set_encode(struct libipw_device *ieee, strcmp((*crypt)->ops->name, "WEP") != 0) { /* changing to use WEP; deinit previously used algorithm * on this key */ - lib80211_crypt_delayed_deinit(&ieee->crypt_info, crypt); + libipw_crypt_delayed_deinit(&ieee->crypt_info, crypt); } if (*crypt == NULL && host_crypto) { - struct lib80211_crypt_data *new_crypt; + struct libipw_crypt_data *new_crypt; /* take WEP into use */ - new_crypt = kzalloc(sizeof(struct lib80211_crypt_data), + new_crypt = kzalloc(sizeof(struct libipw_crypt_data), GFP_KERNEL); if (new_crypt == NULL) return -ENOMEM; - new_crypt->ops = lib80211_get_crypto_ops("WEP"); + new_crypt->ops = libipw_get_crypto_ops("WEP"); if (!new_crypt->ops) { - request_module("lib80211_crypt_wep"); - new_crypt->ops = lib80211_get_crypto_ops("WEP"); + request_module("libipw_crypt_wep"); + new_crypt->ops = libipw_get_crypto_ops("WEP"); } if (new_crypt->ops && try_module_get(new_crypt->ops->owner)) @@ -386,7 +383,7 @@ int libipw_wx_set_encode(struct libipw_device *ieee, new_crypt = NULL; printk(KERN_WARNING "%s: could not initialize WEP: " - "load module lib80211_crypt_wep\n", dev->name); + "load module libipw_crypt_wep\n", dev->name); return -EOPNOTSUPP; } *crypt = new_crypt; @@ -509,8 +506,8 @@ int libipw_wx_set_encodeext(struct libipw_device *ieee, int i, idx, ret = 0; int group_key = 0; const char *alg, *module; - const struct lib80211_crypto_ops *ops; - struct lib80211_crypt_data **crypt; + const struct libipw_crypto_ops *ops; + struct libipw_crypt_data **crypt; struct libipw_security sec = { .flags = 0, @@ -541,7 +538,7 @@ int libipw_wx_set_encodeext(struct libipw_device *ieee, if ((encoding->flags & IW_ENCODE_DISABLED) || ext->alg == IW_ENCODE_ALG_NONE) { if (*crypt) - lib80211_crypt_delayed_deinit(&ieee->crypt_info, crypt); + libipw_crypt_delayed_deinit(&ieee->crypt_info, crypt); for (i = 0; i < WEP_KEYS; i++) if (ieee->crypt_info.crypt[i] != NULL) @@ -567,15 +564,15 @@ int libipw_wx_set_encodeext(struct libipw_device *ieee, switch (ext->alg) { case IW_ENCODE_ALG_WEP: alg = "WEP"; - module = "lib80211_crypt_wep"; + module = "libipw_crypt_wep"; break; case IW_ENCODE_ALG_TKIP: alg = "TKIP"; - module = "lib80211_crypt_tkip"; + module = "libipw_crypt_tkip"; break; case IW_ENCODE_ALG_CCMP: alg = "CCMP"; - module = "lib80211_crypt_ccmp"; + module = "libipw_crypt_ccmp"; break; default: LIBIPW_DEBUG_WX("%s: unknown crypto alg %d\n", @@ -584,10 +581,10 @@ int libipw_wx_set_encodeext(struct libipw_device *ieee, goto done; } - ops = lib80211_get_crypto_ops(alg); + ops = libipw_get_crypto_ops(alg); if (ops == NULL) { request_module(module); - ops = lib80211_get_crypto_ops(alg); + ops = libipw_get_crypto_ops(alg); } if (ops == NULL) { LIBIPW_DEBUG_WX("%s: unknown crypto alg %d\n", @@ -597,9 +594,9 @@ int libipw_wx_set_encodeext(struct libipw_device *ieee, } if (*crypt == NULL || (*crypt)->ops != ops) { - struct lib80211_crypt_data *new_crypt; + struct libipw_crypt_data *new_crypt; - lib80211_crypt_delayed_deinit(&ieee->crypt_info, crypt); + libipw_crypt_delayed_deinit(&ieee->crypt_info, crypt); new_crypt = kzalloc(sizeof(*new_crypt), GFP_KERNEL); if (new_crypt == NULL) { diff --git a/include/net/lib80211.h b/include/net/lib80211.h deleted file mode 100644 index fd0f15d87d80..000000000000 --- a/include/net/lib80211.h +++ /dev/null @@ -1,122 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * lib80211.h -- common bits for IEEE802.11 wireless drivers - * - * Copyright (c) 2008, John W. Linville - * - * Some bits copied from old ieee80211 component, w/ original copyright - * notices below: - * - * Original code based on Host AP (software wireless LAN access point) driver - * for Intersil Prism2/2.5/3. - * - * Copyright (c) 2001-2002, SSH Communications Security Corp and Jouni Malinen - * - * Copyright (c) 2002-2003, Jouni Malinen - * - * Adaption to a generic IEEE 802.11 stack by James Ketrenos - * - * - * Copyright (c) 2004, Intel Corporation - * - */ - -#ifndef LIB80211_H -#define LIB80211_H - -#include -#include -#include -#include -#include -#include -#include -#include - -#define NUM_WEP_KEYS 4 - -enum { - IEEE80211_CRYPTO_TKIP_COUNTERMEASURES = (1 << 0), -}; - -struct module; - -struct lib80211_crypto_ops { - const char *name; - struct list_head list; - - /* init new crypto context (e.g., allocate private data space, - * select IV, etc.); returns NULL on failure or pointer to allocated - * private data on success */ - void *(*init) (int keyidx); - - /* deinitialize crypto context and free allocated private data */ - void (*deinit) (void *priv); - - /* encrypt/decrypt return < 0 on error or >= 0 on success. The return - * value from decrypt_mpdu is passed as the keyidx value for - * decrypt_msdu. skb must have enough head and tail room for the - * encryption; if not, error will be returned; these functions are - * called for all MPDUs (i.e., fragments). - */ - int (*encrypt_mpdu) (struct sk_buff * skb, int hdr_len, void *priv); - int (*decrypt_mpdu) (struct sk_buff * skb, int hdr_len, void *priv); - - /* These functions are called for full MSDUs, i.e. full frames. - * These can be NULL if full MSDU operations are not needed. */ - int (*encrypt_msdu) (struct sk_buff * skb, int hdr_len, void *priv); - int (*decrypt_msdu) (struct sk_buff * skb, int keyidx, int hdr_len, - void *priv); - - int (*set_key) (void *key, int len, u8 * seq, void *priv); - int (*get_key) (void *key, int len, u8 * seq, void *priv); - - /* procfs handler for printing out key information and possible - * statistics */ - void (*print_stats) (struct seq_file *m, void *priv); - - /* Crypto specific flag get/set for configuration settings */ - unsigned long (*get_flags) (void *priv); - unsigned long (*set_flags) (unsigned long flags, void *priv); - - /* maximum number of bytes added by encryption; encrypt buf is - * allocated with extra_prefix_len bytes, copy of in_buf, and - * extra_postfix_len; encrypt need not use all this space, but - * the result must start at the beginning of the buffer and correct - * length must be returned */ - int extra_mpdu_prefix_len, extra_mpdu_postfix_len; - int extra_msdu_prefix_len, extra_msdu_postfix_len; - - struct module *owner; -}; - -struct lib80211_crypt_data { - struct list_head list; /* delayed deletion list */ - const struct lib80211_crypto_ops *ops; - void *priv; - atomic_t refcnt; -}; - -struct lib80211_crypt_info { - char *name; - /* Most clients will already have a lock, - so just point to that. */ - spinlock_t *lock; - - struct lib80211_crypt_data *crypt[NUM_WEP_KEYS]; - int tx_keyidx; /* default TX key index (crypt[tx_keyidx]) */ - struct list_head crypt_deinit_list; - struct timer_list crypt_deinit_timer; - int crypt_quiesced; -}; - -int lib80211_crypt_info_init(struct lib80211_crypt_info *info, char *name, - spinlock_t *lock); -void lib80211_crypt_info_free(struct lib80211_crypt_info *info); -int lib80211_register_crypto_ops(const struct lib80211_crypto_ops *ops); -int lib80211_unregister_crypto_ops(const struct lib80211_crypto_ops *ops); -const struct lib80211_crypto_ops *lib80211_get_crypto_ops(const char *name); -void lib80211_crypt_delayed_deinit(struct lib80211_crypt_info *info, - struct lib80211_crypt_data **crypt); - -#endif /* LIB80211_H */ diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index 10345388ad13..733c53ad4de5 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -212,36 +212,3 @@ config CFG80211_KUNIT_TEST If unsure, say N. endif # CFG80211 - -config LIB80211 - tristate - default n - help - This options enables a library of common routines used - by IEEE802.11 wireless LAN drivers. - - Drivers should select this themselves if needed. - -config LIB80211_CRYPT_WEP - tristate - select CRYPTO_LIB_ARC4 - -config LIB80211_CRYPT_CCMP - tristate - select CRYPTO - select CRYPTO_AES - select CRYPTO_CCM - -config LIB80211_CRYPT_TKIP - tristate - select CRYPTO_LIB_ARC4 - -config LIB80211_DEBUG - bool "lib80211 debugging messages" - depends on LIB80211 - default n - help - You can enable this if you want verbose debugging messages - from lib80211. - - If unsure, say N. diff --git a/net/wireless/Makefile b/net/wireless/Makefile index 1d49cc8b6da1..27f211bd9954 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -1,9 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_CFG80211) += cfg80211.o -obj-$(CONFIG_LIB80211) += lib80211.o -obj-$(CONFIG_LIB80211_CRYPT_WEP) += lib80211_crypt_wep.o -obj-$(CONFIG_LIB80211_CRYPT_CCMP) += lib80211_crypt_ccmp.o -obj-$(CONFIG_LIB80211_CRYPT_TKIP) += lib80211_crypt_tkip.o obj-y += tests/ obj-$(CONFIG_WEXT_CORE) += wext-core.o diff --git a/net/wireless/lib80211.c b/net/wireless/lib80211.c deleted file mode 100644 index 64c447040786..000000000000 --- a/net/wireless/lib80211.c +++ /dev/null @@ -1,257 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * lib80211 -- common bits for IEEE802.11 drivers - * - * Copyright(c) 2008 John W. Linville - * - * Portions copied from old ieee80211 component, w/ original copyright - * notices below: - * - * Host AP crypto routines - * - * Copyright (c) 2002-2003, Jouni Malinen - * Portions Copyright (C) 2004, Intel Corporation - * - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include -#include - -#include - -#define DRV_DESCRIPTION "common routines for IEEE802.11 drivers" - -MODULE_DESCRIPTION(DRV_DESCRIPTION); -MODULE_AUTHOR("John W. Linville "); -MODULE_LICENSE("GPL"); - -struct lib80211_crypto_alg { - struct list_head list; - const struct lib80211_crypto_ops *ops; -}; - -static LIST_HEAD(lib80211_crypto_algs); -static DEFINE_SPINLOCK(lib80211_crypto_lock); - -static void lib80211_crypt_deinit_entries(struct lib80211_crypt_info *info, - int force); -static void lib80211_crypt_quiescing(struct lib80211_crypt_info *info); -static void lib80211_crypt_deinit_handler(struct timer_list *t); - -int lib80211_crypt_info_init(struct lib80211_crypt_info *info, char *name, - spinlock_t *lock) -{ - memset(info, 0, sizeof(*info)); - - info->name = name; - info->lock = lock; - - INIT_LIST_HEAD(&info->crypt_deinit_list); - timer_setup(&info->crypt_deinit_timer, lib80211_crypt_deinit_handler, - 0); - - return 0; -} -EXPORT_SYMBOL(lib80211_crypt_info_init); - -void lib80211_crypt_info_free(struct lib80211_crypt_info *info) -{ - int i; - - lib80211_crypt_quiescing(info); - del_timer_sync(&info->crypt_deinit_timer); - lib80211_crypt_deinit_entries(info, 1); - - for (i = 0; i < NUM_WEP_KEYS; i++) { - struct lib80211_crypt_data *crypt = info->crypt[i]; - if (crypt) { - if (crypt->ops) { - crypt->ops->deinit(crypt->priv); - module_put(crypt->ops->owner); - } - kfree(crypt); - info->crypt[i] = NULL; - } - } -} -EXPORT_SYMBOL(lib80211_crypt_info_free); - -static void lib80211_crypt_deinit_entries(struct lib80211_crypt_info *info, - int force) -{ - struct lib80211_crypt_data *entry, *next; - unsigned long flags; - - spin_lock_irqsave(info->lock, flags); - list_for_each_entry_safe(entry, next, &info->crypt_deinit_list, list) { - if (atomic_read(&entry->refcnt) != 0 && !force) - continue; - - list_del(&entry->list); - - if (entry->ops) { - entry->ops->deinit(entry->priv); - module_put(entry->ops->owner); - } - kfree(entry); - } - spin_unlock_irqrestore(info->lock, flags); -} - -/* After this, crypt_deinit_list won't accept new members */ -static void lib80211_crypt_quiescing(struct lib80211_crypt_info *info) -{ - unsigned long flags; - - spin_lock_irqsave(info->lock, flags); - info->crypt_quiesced = 1; - spin_unlock_irqrestore(info->lock, flags); -} - -static void lib80211_crypt_deinit_handler(struct timer_list *t) -{ - struct lib80211_crypt_info *info = from_timer(info, t, - crypt_deinit_timer); - unsigned long flags; - - lib80211_crypt_deinit_entries(info, 0); - - spin_lock_irqsave(info->lock, flags); - if (!list_empty(&info->crypt_deinit_list) && !info->crypt_quiesced) { - printk(KERN_DEBUG "%s: entries remaining in delayed crypt " - "deletion list\n", info->name); - info->crypt_deinit_timer.expires = jiffies + HZ; - add_timer(&info->crypt_deinit_timer); - } - spin_unlock_irqrestore(info->lock, flags); -} - -void lib80211_crypt_delayed_deinit(struct lib80211_crypt_info *info, - struct lib80211_crypt_data **crypt) -{ - struct lib80211_crypt_data *tmp; - unsigned long flags; - - if (*crypt == NULL) - return; - - tmp = *crypt; - *crypt = NULL; - - /* must not run ops->deinit() while there may be pending encrypt or - * decrypt operations. Use a list of delayed deinits to avoid needing - * locking. */ - - spin_lock_irqsave(info->lock, flags); - if (!info->crypt_quiesced) { - list_add(&tmp->list, &info->crypt_deinit_list); - if (!timer_pending(&info->crypt_deinit_timer)) { - info->crypt_deinit_timer.expires = jiffies + HZ; - add_timer(&info->crypt_deinit_timer); - } - } - spin_unlock_irqrestore(info->lock, flags); -} -EXPORT_SYMBOL(lib80211_crypt_delayed_deinit); - -int lib80211_register_crypto_ops(const struct lib80211_crypto_ops *ops) -{ - unsigned long flags; - struct lib80211_crypto_alg *alg; - - alg = kzalloc(sizeof(*alg), GFP_KERNEL); - if (alg == NULL) - return -ENOMEM; - - alg->ops = ops; - - spin_lock_irqsave(&lib80211_crypto_lock, flags); - list_add(&alg->list, &lib80211_crypto_algs); - spin_unlock_irqrestore(&lib80211_crypto_lock, flags); - - printk(KERN_DEBUG "lib80211_crypt: registered algorithm '%s'\n", - ops->name); - - return 0; -} -EXPORT_SYMBOL(lib80211_register_crypto_ops); - -int lib80211_unregister_crypto_ops(const struct lib80211_crypto_ops *ops) -{ - struct lib80211_crypto_alg *alg; - unsigned long flags; - - spin_lock_irqsave(&lib80211_crypto_lock, flags); - list_for_each_entry(alg, &lib80211_crypto_algs, list) { - if (alg->ops == ops) - goto found; - } - spin_unlock_irqrestore(&lib80211_crypto_lock, flags); - return -EINVAL; - - found: - printk(KERN_DEBUG "lib80211_crypt: unregistered algorithm '%s'\n", - ops->name); - list_del(&alg->list); - spin_unlock_irqrestore(&lib80211_crypto_lock, flags); - kfree(alg); - return 0; -} -EXPORT_SYMBOL(lib80211_unregister_crypto_ops); - -const struct lib80211_crypto_ops *lib80211_get_crypto_ops(const char *name) -{ - struct lib80211_crypto_alg *alg; - unsigned long flags; - - spin_lock_irqsave(&lib80211_crypto_lock, flags); - list_for_each_entry(alg, &lib80211_crypto_algs, list) { - if (strcmp(alg->ops->name, name) == 0) - goto found; - } - spin_unlock_irqrestore(&lib80211_crypto_lock, flags); - return NULL; - - found: - spin_unlock_irqrestore(&lib80211_crypto_lock, flags); - return alg->ops; -} -EXPORT_SYMBOL(lib80211_get_crypto_ops); - -static void *lib80211_crypt_null_init(int keyidx) -{ - return (void *)1; -} - -static void lib80211_crypt_null_deinit(void *priv) -{ -} - -static const struct lib80211_crypto_ops lib80211_crypt_null = { - .name = "NULL", - .init = lib80211_crypt_null_init, - .deinit = lib80211_crypt_null_deinit, - .owner = THIS_MODULE, -}; - -static int __init lib80211_init(void) -{ - pr_info(DRV_DESCRIPTION "\n"); - return lib80211_register_crypto_ops(&lib80211_crypt_null); -} - -static void __exit lib80211_exit(void) -{ - lib80211_unregister_crypto_ops(&lib80211_crypt_null); - BUG_ON(!list_empty(&lib80211_crypto_algs)); -} - -module_init(lib80211_init); -module_exit(lib80211_exit); diff --git a/net/wireless/lib80211_crypt_ccmp.c b/net/wireless/lib80211_crypt_ccmp.c deleted file mode 100644 index 5aad139130e1..000000000000 --- a/net/wireless/lib80211_crypt_ccmp.c +++ /dev/null @@ -1,448 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * lib80211 crypt: host-based CCMP encryption implementation for lib80211 - * - * Copyright (c) 2003-2004, Jouni Malinen - * Copyright (c) 2008, John W. Linville - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include - -#include - -MODULE_AUTHOR("Jouni Malinen"); -MODULE_DESCRIPTION("Host AP crypt: CCMP"); -MODULE_LICENSE("GPL"); - -#define AES_BLOCK_LEN 16 -#define CCMP_HDR_LEN 8 -#define CCMP_MIC_LEN 8 -#define CCMP_TK_LEN 16 -#define CCMP_PN_LEN 6 - -struct lib80211_ccmp_data { - u8 key[CCMP_TK_LEN]; - int key_set; - - u8 tx_pn[CCMP_PN_LEN]; - u8 rx_pn[CCMP_PN_LEN]; - - u32 dot11RSNAStatsCCMPFormatErrors; - u32 dot11RSNAStatsCCMPReplays; - u32 dot11RSNAStatsCCMPDecryptErrors; - - int key_idx; - - struct crypto_aead *tfm; - - /* scratch buffers for virt_to_page() (crypto API) */ - u8 tx_aad[2 * AES_BLOCK_LEN]; - u8 rx_aad[2 * AES_BLOCK_LEN]; -}; - -static void *lib80211_ccmp_init(int key_idx) -{ - struct lib80211_ccmp_data *priv; - - priv = kzalloc(sizeof(*priv), GFP_ATOMIC); - if (priv == NULL) - goto fail; - priv->key_idx = key_idx; - - priv->tfm = crypto_alloc_aead("ccm(aes)", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(priv->tfm)) { - priv->tfm = NULL; - goto fail; - } - - return priv; - - fail: - if (priv) { - if (priv->tfm) - crypto_free_aead(priv->tfm); - kfree(priv); - } - - return NULL; -} - -static void lib80211_ccmp_deinit(void *priv) -{ - struct lib80211_ccmp_data *_priv = priv; - if (_priv && _priv->tfm) - crypto_free_aead(_priv->tfm); - kfree(priv); -} - -static int ccmp_init_iv_and_aad(const struct ieee80211_hdr *hdr, - const u8 *pn, u8 *iv, u8 *aad) -{ - u8 *pos, qc = 0; - size_t aad_len; - int a4_included, qc_included; - - a4_included = ieee80211_has_a4(hdr->frame_control); - qc_included = ieee80211_is_data_qos(hdr->frame_control); - - aad_len = 22; - if (a4_included) - aad_len += 6; - if (qc_included) { - pos = (u8 *) & hdr->addr4; - if (a4_included) - pos += 6; - qc = *pos & 0x0f; - aad_len += 2; - } - - /* In CCM, the initial vectors (IV) used for CTR mode encryption and CBC - * mode authentication are not allowed to collide, yet both are derived - * from the same vector. We only set L := 1 here to indicate that the - * data size can be represented in (L+1) bytes. The CCM layer will take - * care of storing the data length in the top (L+1) bytes and setting - * and clearing the other bits as is required to derive the two IVs. - */ - iv[0] = 0x1; - - /* Nonce: QC | A2 | PN */ - iv[1] = qc; - memcpy(iv + 2, hdr->addr2, ETH_ALEN); - memcpy(iv + 8, pn, CCMP_PN_LEN); - - /* AAD: - * FC with bits 4..6 and 11..13 masked to zero; 14 is always one - * A1 | A2 | A3 - * SC with bits 4..15 (seq#) masked to zero - * A4 (if present) - * QC (if present) - */ - pos = (u8 *) hdr; - aad[0] = pos[0] & 0x8f; - aad[1] = pos[1] & 0xc7; - memcpy(aad + 2, &hdr->addrs, 3 * ETH_ALEN); - pos = (u8 *) & hdr->seq_ctrl; - aad[20] = pos[0] & 0x0f; - aad[21] = 0; /* all bits masked */ - memset(aad + 22, 0, 8); - if (a4_included) - memcpy(aad + 22, hdr->addr4, ETH_ALEN); - if (qc_included) { - aad[a4_included ? 28 : 22] = qc; - /* rest of QC masked */ - } - return aad_len; -} - -static int lib80211_ccmp_hdr(struct sk_buff *skb, int hdr_len, - u8 *aeskey, int keylen, void *priv) -{ - struct lib80211_ccmp_data *key = priv; - int i; - u8 *pos; - - if (skb_headroom(skb) < CCMP_HDR_LEN || skb->len < hdr_len) - return -1; - - if (aeskey != NULL && keylen >= CCMP_TK_LEN) - memcpy(aeskey, key->key, CCMP_TK_LEN); - - pos = skb_push(skb, CCMP_HDR_LEN); - memmove(pos, pos + CCMP_HDR_LEN, hdr_len); - pos += hdr_len; - - i = CCMP_PN_LEN - 1; - while (i >= 0) { - key->tx_pn[i]++; - if (key->tx_pn[i] != 0) - break; - i--; - } - - *pos++ = key->tx_pn[5]; - *pos++ = key->tx_pn[4]; - *pos++ = 0; - *pos++ = (key->key_idx << 6) | (1 << 5) /* Ext IV included */ ; - *pos++ = key->tx_pn[3]; - *pos++ = key->tx_pn[2]; - *pos++ = key->tx_pn[1]; - *pos++ = key->tx_pn[0]; - - return CCMP_HDR_LEN; -} - -static int lib80211_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv) -{ - struct lib80211_ccmp_data *key = priv; - struct ieee80211_hdr *hdr; - struct aead_request *req; - struct scatterlist sg[2]; - u8 *aad = key->tx_aad; - u8 iv[AES_BLOCK_LEN]; - int len, data_len, aad_len; - int ret; - - if (skb_tailroom(skb) < CCMP_MIC_LEN || skb->len < hdr_len) - return -1; - - data_len = skb->len - hdr_len; - len = lib80211_ccmp_hdr(skb, hdr_len, NULL, 0, priv); - if (len < 0) - return -1; - - req = aead_request_alloc(key->tfm, GFP_ATOMIC); - if (!req) - return -ENOMEM; - - hdr = (struct ieee80211_hdr *)skb->data; - aad_len = ccmp_init_iv_and_aad(hdr, key->tx_pn, iv, aad); - - skb_put(skb, CCMP_MIC_LEN); - - sg_init_table(sg, 2); - sg_set_buf(&sg[0], aad, aad_len); - sg_set_buf(&sg[1], skb->data + hdr_len + CCMP_HDR_LEN, - data_len + CCMP_MIC_LEN); - - aead_request_set_callback(req, 0, NULL, NULL); - aead_request_set_ad(req, aad_len); - aead_request_set_crypt(req, sg, sg, data_len, iv); - - ret = crypto_aead_encrypt(req); - aead_request_free(req); - - return ret; -} - -/* - * deal with seq counter wrapping correctly. - * refer to timer_after() for jiffies wrapping handling - */ -static inline int ccmp_replay_check(u8 *pn_n, u8 *pn_o) -{ - u32 iv32_n, iv16_n; - u32 iv32_o, iv16_o; - - iv32_n = (pn_n[0] << 24) | (pn_n[1] << 16) | (pn_n[2] << 8) | pn_n[3]; - iv16_n = (pn_n[4] << 8) | pn_n[5]; - - iv32_o = (pn_o[0] << 24) | (pn_o[1] << 16) | (pn_o[2] << 8) | pn_o[3]; - iv16_o = (pn_o[4] << 8) | pn_o[5]; - - if ((s32)iv32_n - (s32)iv32_o < 0 || - (iv32_n == iv32_o && iv16_n <= iv16_o)) - return 1; - return 0; -} - -static int lib80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) -{ - struct lib80211_ccmp_data *key = priv; - u8 keyidx, *pos; - struct ieee80211_hdr *hdr; - struct aead_request *req; - struct scatterlist sg[2]; - u8 *aad = key->rx_aad; - u8 iv[AES_BLOCK_LEN]; - u8 pn[6]; - int aad_len, ret; - size_t data_len = skb->len - hdr_len - CCMP_HDR_LEN; - - if (skb->len < hdr_len + CCMP_HDR_LEN + CCMP_MIC_LEN) { - key->dot11RSNAStatsCCMPFormatErrors++; - return -1; - } - - hdr = (struct ieee80211_hdr *)skb->data; - pos = skb->data + hdr_len; - keyidx = pos[3]; - if (!(keyidx & (1 << 5))) { - net_dbg_ratelimited("CCMP: received packet without ExtIV flag from %pM\n", - hdr->addr2); - key->dot11RSNAStatsCCMPFormatErrors++; - return -2; - } - keyidx >>= 6; - if (key->key_idx != keyidx) { - net_dbg_ratelimited("CCMP: RX tkey->key_idx=%d frame keyidx=%d\n", - key->key_idx, keyidx); - return -6; - } - if (!key->key_set) { - net_dbg_ratelimited("CCMP: received packet from %pM with keyid=%d that does not have a configured key\n", - hdr->addr2, keyidx); - return -3; - } - - pn[0] = pos[7]; - pn[1] = pos[6]; - pn[2] = pos[5]; - pn[3] = pos[4]; - pn[4] = pos[1]; - pn[5] = pos[0]; - pos += 8; - - if (ccmp_replay_check(pn, key->rx_pn)) { -#ifdef CONFIG_LIB80211_DEBUG - net_dbg_ratelimited("CCMP: replay detected: STA=%pM previous PN %02x%02x%02x%02x%02x%02x received PN %02x%02x%02x%02x%02x%02x\n", - hdr->addr2, - key->rx_pn[0], key->rx_pn[1], key->rx_pn[2], - key->rx_pn[3], key->rx_pn[4], key->rx_pn[5], - pn[0], pn[1], pn[2], pn[3], pn[4], pn[5]); -#endif - key->dot11RSNAStatsCCMPReplays++; - return -4; - } - - req = aead_request_alloc(key->tfm, GFP_ATOMIC); - if (!req) - return -ENOMEM; - - aad_len = ccmp_init_iv_and_aad(hdr, pn, iv, aad); - - sg_init_table(sg, 2); - sg_set_buf(&sg[0], aad, aad_len); - sg_set_buf(&sg[1], pos, data_len); - - aead_request_set_callback(req, 0, NULL, NULL); - aead_request_set_ad(req, aad_len); - aead_request_set_crypt(req, sg, sg, data_len, iv); - - ret = crypto_aead_decrypt(req); - aead_request_free(req); - - if (ret) { - net_dbg_ratelimited("CCMP: decrypt failed: STA=%pM (%d)\n", - hdr->addr2, ret); - key->dot11RSNAStatsCCMPDecryptErrors++; - return -5; - } - - memcpy(key->rx_pn, pn, CCMP_PN_LEN); - - /* Remove hdr and MIC */ - memmove(skb->data + CCMP_HDR_LEN, skb->data, hdr_len); - skb_pull(skb, CCMP_HDR_LEN); - skb_trim(skb, skb->len - CCMP_MIC_LEN); - - return keyidx; -} - -static int lib80211_ccmp_set_key(void *key, int len, u8 * seq, void *priv) -{ - struct lib80211_ccmp_data *data = priv; - int keyidx; - struct crypto_aead *tfm = data->tfm; - - keyidx = data->key_idx; - memset(data, 0, sizeof(*data)); - data->key_idx = keyidx; - data->tfm = tfm; - if (len == CCMP_TK_LEN) { - memcpy(data->key, key, CCMP_TK_LEN); - data->key_set = 1; - if (seq) { - data->rx_pn[0] = seq[5]; - data->rx_pn[1] = seq[4]; - data->rx_pn[2] = seq[3]; - data->rx_pn[3] = seq[2]; - data->rx_pn[4] = seq[1]; - data->rx_pn[5] = seq[0]; - } - if (crypto_aead_setauthsize(data->tfm, CCMP_MIC_LEN) || - crypto_aead_setkey(data->tfm, data->key, CCMP_TK_LEN)) - return -1; - } else if (len == 0) - data->key_set = 0; - else - return -1; - - return 0; -} - -static int lib80211_ccmp_get_key(void *key, int len, u8 * seq, void *priv) -{ - struct lib80211_ccmp_data *data = priv; - - if (len < CCMP_TK_LEN) - return -1; - - if (!data->key_set) - return 0; - memcpy(key, data->key, CCMP_TK_LEN); - - if (seq) { - seq[0] = data->tx_pn[5]; - seq[1] = data->tx_pn[4]; - seq[2] = data->tx_pn[3]; - seq[3] = data->tx_pn[2]; - seq[4] = data->tx_pn[1]; - seq[5] = data->tx_pn[0]; - } - - return CCMP_TK_LEN; -} - -static void lib80211_ccmp_print_stats(struct seq_file *m, void *priv) -{ - struct lib80211_ccmp_data *ccmp = priv; - - seq_printf(m, - "key[%d] alg=CCMP key_set=%d " - "tx_pn=%02x%02x%02x%02x%02x%02x " - "rx_pn=%02x%02x%02x%02x%02x%02x " - "format_errors=%d replays=%d decrypt_errors=%d\n", - ccmp->key_idx, ccmp->key_set, - ccmp->tx_pn[0], ccmp->tx_pn[1], ccmp->tx_pn[2], - ccmp->tx_pn[3], ccmp->tx_pn[4], ccmp->tx_pn[5], - ccmp->rx_pn[0], ccmp->rx_pn[1], ccmp->rx_pn[2], - ccmp->rx_pn[3], ccmp->rx_pn[4], ccmp->rx_pn[5], - ccmp->dot11RSNAStatsCCMPFormatErrors, - ccmp->dot11RSNAStatsCCMPReplays, - ccmp->dot11RSNAStatsCCMPDecryptErrors); -} - -static const struct lib80211_crypto_ops lib80211_crypt_ccmp = { - .name = "CCMP", - .init = lib80211_ccmp_init, - .deinit = lib80211_ccmp_deinit, - .encrypt_mpdu = lib80211_ccmp_encrypt, - .decrypt_mpdu = lib80211_ccmp_decrypt, - .encrypt_msdu = NULL, - .decrypt_msdu = NULL, - .set_key = lib80211_ccmp_set_key, - .get_key = lib80211_ccmp_get_key, - .print_stats = lib80211_ccmp_print_stats, - .extra_mpdu_prefix_len = CCMP_HDR_LEN, - .extra_mpdu_postfix_len = CCMP_MIC_LEN, - .owner = THIS_MODULE, -}; - -static int __init lib80211_crypto_ccmp_init(void) -{ - return lib80211_register_crypto_ops(&lib80211_crypt_ccmp); -} - -static void __exit lib80211_crypto_ccmp_exit(void) -{ - lib80211_unregister_crypto_ops(&lib80211_crypt_ccmp); -} - -module_init(lib80211_crypto_ccmp_init); -module_exit(lib80211_crypto_ccmp_exit); diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c deleted file mode 100644 index 63e68e5e121e..000000000000 --- a/net/wireless/lib80211_crypt_tkip.c +++ /dev/null @@ -1,738 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * lib80211 crypt: host-based TKIP encryption implementation for lib80211 - * - * Copyright (c) 2003-2004, Jouni Malinen - * Copyright (c) 2008, John W. Linville - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include - -#include - -MODULE_AUTHOR("Jouni Malinen"); -MODULE_DESCRIPTION("lib80211 crypt: TKIP"); -MODULE_LICENSE("GPL"); - -#define TKIP_HDR_LEN 8 - -struct lib80211_tkip_data { -#define TKIP_KEY_LEN 32 - u8 key[TKIP_KEY_LEN]; - int key_set; - - u32 tx_iv32; - u16 tx_iv16; - u16 tx_ttak[5]; - int tx_phase1_done; - - u32 rx_iv32; - u16 rx_iv16; - u16 rx_ttak[5]; - int rx_phase1_done; - u32 rx_iv32_new; - u16 rx_iv16_new; - - u32 dot11RSNAStatsTKIPReplays; - u32 dot11RSNAStatsTKIPICVErrors; - u32 dot11RSNAStatsTKIPLocalMICFailures; - - int key_idx; - - struct arc4_ctx rx_ctx_arc4; - struct arc4_ctx tx_ctx_arc4; - struct crypto_shash *rx_tfm_michael; - struct crypto_shash *tx_tfm_michael; - - /* scratch buffers for virt_to_page() (crypto API) */ - u8 rx_hdr[16], tx_hdr[16]; - - unsigned long flags; -}; - -static unsigned long lib80211_tkip_set_flags(unsigned long flags, void *priv) -{ - struct lib80211_tkip_data *_priv = priv; - unsigned long old_flags = _priv->flags; - _priv->flags = flags; - return old_flags; -} - -static unsigned long lib80211_tkip_get_flags(void *priv) -{ - struct lib80211_tkip_data *_priv = priv; - return _priv->flags; -} - -static void *lib80211_tkip_init(int key_idx) -{ - struct lib80211_tkip_data *priv; - - if (fips_enabled) - return NULL; - - priv = kzalloc(sizeof(*priv), GFP_ATOMIC); - if (priv == NULL) - goto fail; - - priv->key_idx = key_idx; - - priv->tx_tfm_michael = crypto_alloc_shash("michael_mic", 0, 0); - if (IS_ERR(priv->tx_tfm_michael)) { - priv->tx_tfm_michael = NULL; - goto fail; - } - - priv->rx_tfm_michael = crypto_alloc_shash("michael_mic", 0, 0); - if (IS_ERR(priv->rx_tfm_michael)) { - priv->rx_tfm_michael = NULL; - goto fail; - } - - return priv; - - fail: - if (priv) { - crypto_free_shash(priv->tx_tfm_michael); - crypto_free_shash(priv->rx_tfm_michael); - kfree(priv); - } - - return NULL; -} - -static void lib80211_tkip_deinit(void *priv) -{ - struct lib80211_tkip_data *_priv = priv; - if (_priv) { - crypto_free_shash(_priv->tx_tfm_michael); - crypto_free_shash(_priv->rx_tfm_michael); - } - kfree_sensitive(priv); -} - -static inline u16 RotR1(u16 val) -{ - return (val >> 1) | (val << 15); -} - -static inline u8 Lo8(u16 val) -{ - return val & 0xff; -} - -static inline u8 Hi8(u16 val) -{ - return val >> 8; -} - -static inline u16 Lo16(u32 val) -{ - return val & 0xffff; -} - -static inline u16 Hi16(u32 val) -{ - return val >> 16; -} - -static inline u16 Mk16(u8 hi, u8 lo) -{ - return lo | (((u16) hi) << 8); -} - -static inline u16 Mk16_le(__le16 * v) -{ - return le16_to_cpu(*v); -} - -static const u16 Sbox[256] = { - 0xC6A5, 0xF884, 0xEE99, 0xF68D, 0xFF0D, 0xD6BD, 0xDEB1, 0x9154, - 0x6050, 0x0203, 0xCEA9, 0x567D, 0xE719, 0xB562, 0x4DE6, 0xEC9A, - 0x8F45, 0x1F9D, 0x8940, 0xFA87, 0xEF15, 0xB2EB, 0x8EC9, 0xFB0B, - 0x41EC, 0xB367, 0x5FFD, 0x45EA, 0x23BF, 0x53F7, 0xE496, 0x9B5B, - 0x75C2, 0xE11C, 0x3DAE, 0x4C6A, 0x6C5A, 0x7E41, 0xF502, 0x834F, - 0x685C, 0x51F4, 0xD134, 0xF908, 0xE293, 0xAB73, 0x6253, 0x2A3F, - 0x080C, 0x9552, 0x4665, 0x9D5E, 0x3028, 0x37A1, 0x0A0F, 0x2FB5, - 0x0E09, 0x2436, 0x1B9B, 0xDF3D, 0xCD26, 0x4E69, 0x7FCD, 0xEA9F, - 0x121B, 0x1D9E, 0x5874, 0x342E, 0x362D, 0xDCB2, 0xB4EE, 0x5BFB, - 0xA4F6, 0x764D, 0xB761, 0x7DCE, 0x527B, 0xDD3E, 0x5E71, 0x1397, - 0xA6F5, 0xB968, 0x0000, 0xC12C, 0x4060, 0xE31F, 0x79C8, 0xB6ED, - 0xD4BE, 0x8D46, 0x67D9, 0x724B, 0x94DE, 0x98D4, 0xB0E8, 0x854A, - 0xBB6B, 0xC52A, 0x4FE5, 0xED16, 0x86C5, 0x9AD7, 0x6655, 0x1194, - 0x8ACF, 0xE910, 0x0406, 0xFE81, 0xA0F0, 0x7844, 0x25BA, 0x4BE3, - 0xA2F3, 0x5DFE, 0x80C0, 0x058A, 0x3FAD, 0x21BC, 0x7048, 0xF104, - 0x63DF, 0x77C1, 0xAF75, 0x4263, 0x2030, 0xE51A, 0xFD0E, 0xBF6D, - 0x814C, 0x1814, 0x2635, 0xC32F, 0xBEE1, 0x35A2, 0x88CC, 0x2E39, - 0x9357, 0x55F2, 0xFC82, 0x7A47, 0xC8AC, 0xBAE7, 0x322B, 0xE695, - 0xC0A0, 0x1998, 0x9ED1, 0xA37F, 0x4466, 0x547E, 0x3BAB, 0x0B83, - 0x8CCA, 0xC729, 0x6BD3, 0x283C, 0xA779, 0xBCE2, 0x161D, 0xAD76, - 0xDB3B, 0x6456, 0x744E, 0x141E, 0x92DB, 0x0C0A, 0x486C, 0xB8E4, - 0x9F5D, 0xBD6E, 0x43EF, 0xC4A6, 0x39A8, 0x31A4, 0xD337, 0xF28B, - 0xD532, 0x8B43, 0x6E59, 0xDAB7, 0x018C, 0xB164, 0x9CD2, 0x49E0, - 0xD8B4, 0xACFA, 0xF307, 0xCF25, 0xCAAF, 0xF48E, 0x47E9, 0x1018, - 0x6FD5, 0xF088, 0x4A6F, 0x5C72, 0x3824, 0x57F1, 0x73C7, 0x9751, - 0xCB23, 0xA17C, 0xE89C, 0x3E21, 0x96DD, 0x61DC, 0x0D86, 0x0F85, - 0xE090, 0x7C42, 0x71C4, 0xCCAA, 0x90D8, 0x0605, 0xF701, 0x1C12, - 0xC2A3, 0x6A5F, 0xAEF9, 0x69D0, 0x1791, 0x9958, 0x3A27, 0x27B9, - 0xD938, 0xEB13, 0x2BB3, 0x2233, 0xD2BB, 0xA970, 0x0789, 0x33A7, - 0x2DB6, 0x3C22, 0x1592, 0xC920, 0x8749, 0xAAFF, 0x5078, 0xA57A, - 0x038F, 0x59F8, 0x0980, 0x1A17, 0x65DA, 0xD731, 0x84C6, 0xD0B8, - 0x82C3, 0x29B0, 0x5A77, 0x1E11, 0x7BCB, 0xA8FC, 0x6DD6, 0x2C3A, -}; - -static inline u16 _S_(u16 v) -{ - u16 t = Sbox[Hi8(v)]; - return Sbox[Lo8(v)] ^ ((t << 8) | (t >> 8)); -} - -#define PHASE1_LOOP_COUNT 8 - -static void tkip_mixing_phase1(u16 * TTAK, const u8 * TK, const u8 * TA, - u32 IV32) -{ - int i, j; - - /* Initialize the 80-bit TTAK from TSC (IV32) and TA[0..5] */ - TTAK[0] = Lo16(IV32); - TTAK[1] = Hi16(IV32); - TTAK[2] = Mk16(TA[1], TA[0]); - TTAK[3] = Mk16(TA[3], TA[2]); - TTAK[4] = Mk16(TA[5], TA[4]); - - for (i = 0; i < PHASE1_LOOP_COUNT; i++) { - j = 2 * (i & 1); - TTAK[0] += _S_(TTAK[4] ^ Mk16(TK[1 + j], TK[0 + j])); - TTAK[1] += _S_(TTAK[0] ^ Mk16(TK[5 + j], TK[4 + j])); - TTAK[2] += _S_(TTAK[1] ^ Mk16(TK[9 + j], TK[8 + j])); - TTAK[3] += _S_(TTAK[2] ^ Mk16(TK[13 + j], TK[12 + j])); - TTAK[4] += _S_(TTAK[3] ^ Mk16(TK[1 + j], TK[0 + j])) + i; - } -} - -static void tkip_mixing_phase2(u8 * WEPSeed, const u8 * TK, const u16 * TTAK, - u16 IV16) -{ - /* Make temporary area overlap WEP seed so that the final copy can be - * avoided on little endian hosts. */ - u16 *PPK = (u16 *) & WEPSeed[4]; - - /* Step 1 - make copy of TTAK and bring in TSC */ - PPK[0] = TTAK[0]; - PPK[1] = TTAK[1]; - PPK[2] = TTAK[2]; - PPK[3] = TTAK[3]; - PPK[4] = TTAK[4]; - PPK[5] = TTAK[4] + IV16; - - /* Step 2 - 96-bit bijective mixing using S-box */ - PPK[0] += _S_(PPK[5] ^ Mk16_le((__le16 *) & TK[0])); - PPK[1] += _S_(PPK[0] ^ Mk16_le((__le16 *) & TK[2])); - PPK[2] += _S_(PPK[1] ^ Mk16_le((__le16 *) & TK[4])); - PPK[3] += _S_(PPK[2] ^ Mk16_le((__le16 *) & TK[6])); - PPK[4] += _S_(PPK[3] ^ Mk16_le((__le16 *) & TK[8])); - PPK[5] += _S_(PPK[4] ^ Mk16_le((__le16 *) & TK[10])); - - PPK[0] += RotR1(PPK[5] ^ Mk16_le((__le16 *) & TK[12])); - PPK[1] += RotR1(PPK[0] ^ Mk16_le((__le16 *) & TK[14])); - PPK[2] += RotR1(PPK[1]); - PPK[3] += RotR1(PPK[2]); - PPK[4] += RotR1(PPK[3]); - PPK[5] += RotR1(PPK[4]); - - /* Step 3 - bring in last of TK bits, assign 24-bit WEP IV value - * WEPSeed[0..2] is transmitted as WEP IV */ - WEPSeed[0] = Hi8(IV16); - WEPSeed[1] = (Hi8(IV16) | 0x20) & 0x7F; - WEPSeed[2] = Lo8(IV16); - WEPSeed[3] = Lo8((PPK[5] ^ Mk16_le((__le16 *) & TK[0])) >> 1); - -#ifdef __BIG_ENDIAN - { - int i; - for (i = 0; i < 6; i++) - PPK[i] = (PPK[i] << 8) | (PPK[i] >> 8); - } -#endif -} - -static int lib80211_tkip_hdr(struct sk_buff *skb, int hdr_len, - u8 * rc4key, int keylen, void *priv) -{ - struct lib80211_tkip_data *tkey = priv; - u8 *pos; - struct ieee80211_hdr *hdr; - - hdr = (struct ieee80211_hdr *)skb->data; - - if (skb_headroom(skb) < TKIP_HDR_LEN || skb->len < hdr_len) - return -1; - - if (rc4key == NULL || keylen < 16) - return -1; - - if (!tkey->tx_phase1_done) { - tkip_mixing_phase1(tkey->tx_ttak, tkey->key, hdr->addr2, - tkey->tx_iv32); - tkey->tx_phase1_done = 1; - } - tkip_mixing_phase2(rc4key, tkey->key, tkey->tx_ttak, tkey->tx_iv16); - - pos = skb_push(skb, TKIP_HDR_LEN); - memmove(pos, pos + TKIP_HDR_LEN, hdr_len); - pos += hdr_len; - - *pos++ = *rc4key; - *pos++ = *(rc4key + 1); - *pos++ = *(rc4key + 2); - *pos++ = (tkey->key_idx << 6) | (1 << 5) /* Ext IV included */ ; - *pos++ = tkey->tx_iv32 & 0xff; - *pos++ = (tkey->tx_iv32 >> 8) & 0xff; - *pos++ = (tkey->tx_iv32 >> 16) & 0xff; - *pos++ = (tkey->tx_iv32 >> 24) & 0xff; - - tkey->tx_iv16++; - if (tkey->tx_iv16 == 0) { - tkey->tx_phase1_done = 0; - tkey->tx_iv32++; - } - - return TKIP_HDR_LEN; -} - -static int lib80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv) -{ - struct lib80211_tkip_data *tkey = priv; - int len; - u8 rc4key[16], *pos, *icv; - u32 crc; - - if (tkey->flags & IEEE80211_CRYPTO_TKIP_COUNTERMEASURES) { - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; - net_dbg_ratelimited("TKIP countermeasures: dropped TX packet to %pM\n", - hdr->addr1); - return -1; - } - - if (skb_tailroom(skb) < 4 || skb->len < hdr_len) - return -1; - - len = skb->len - hdr_len; - pos = skb->data + hdr_len; - - if ((lib80211_tkip_hdr(skb, hdr_len, rc4key, 16, priv)) < 0) - return -1; - - crc = ~crc32_le(~0, pos, len); - icv = skb_put(skb, 4); - icv[0] = crc; - icv[1] = crc >> 8; - icv[2] = crc >> 16; - icv[3] = crc >> 24; - - arc4_setkey(&tkey->tx_ctx_arc4, rc4key, 16); - arc4_crypt(&tkey->tx_ctx_arc4, pos, pos, len + 4); - - return 0; -} - -/* - * deal with seq counter wrapping correctly. - * refer to timer_after() for jiffies wrapping handling - */ -static inline int tkip_replay_check(u32 iv32_n, u16 iv16_n, - u32 iv32_o, u16 iv16_o) -{ - if ((s32)iv32_n - (s32)iv32_o < 0 || - (iv32_n == iv32_o && iv16_n <= iv16_o)) - return 1; - return 0; -} - -static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) -{ - struct lib80211_tkip_data *tkey = priv; - u8 rc4key[16]; - u8 keyidx, *pos; - u32 iv32; - u16 iv16; - struct ieee80211_hdr *hdr; - u8 icv[4]; - u32 crc; - int plen; - - hdr = (struct ieee80211_hdr *)skb->data; - - if (tkey->flags & IEEE80211_CRYPTO_TKIP_COUNTERMEASURES) { - net_dbg_ratelimited("TKIP countermeasures: dropped received packet from %pM\n", - hdr->addr2); - return -1; - } - - if (skb->len < hdr_len + TKIP_HDR_LEN + 4) - return -1; - - pos = skb->data + hdr_len; - keyidx = pos[3]; - if (!(keyidx & (1 << 5))) { - net_dbg_ratelimited("TKIP: received packet without ExtIV flag from %pM\n", - hdr->addr2); - return -2; - } - keyidx >>= 6; - if (tkey->key_idx != keyidx) { - net_dbg_ratelimited("TKIP: RX tkey->key_idx=%d frame keyidx=%d\n", - tkey->key_idx, keyidx); - return -6; - } - if (!tkey->key_set) { - net_dbg_ratelimited("TKIP: received packet from %pM with keyid=%d that does not have a configured key\n", - hdr->addr2, keyidx); - return -3; - } - iv16 = (pos[0] << 8) | pos[2]; - iv32 = pos[4] | (pos[5] << 8) | (pos[6] << 16) | (pos[7] << 24); - pos += TKIP_HDR_LEN; - - if (tkip_replay_check(iv32, iv16, tkey->rx_iv32, tkey->rx_iv16)) { -#ifdef CONFIG_LIB80211_DEBUG - net_dbg_ratelimited("TKIP: replay detected: STA=%pM previous TSC %08x%04x received TSC %08x%04x\n", - hdr->addr2, tkey->rx_iv32, tkey->rx_iv16, - iv32, iv16); -#endif - tkey->dot11RSNAStatsTKIPReplays++; - return -4; - } - - if (iv32 != tkey->rx_iv32 || !tkey->rx_phase1_done) { - tkip_mixing_phase1(tkey->rx_ttak, tkey->key, hdr->addr2, iv32); - tkey->rx_phase1_done = 1; - } - tkip_mixing_phase2(rc4key, tkey->key, tkey->rx_ttak, iv16); - - plen = skb->len - hdr_len - 12; - - arc4_setkey(&tkey->rx_ctx_arc4, rc4key, 16); - arc4_crypt(&tkey->rx_ctx_arc4, pos, pos, plen + 4); - - crc = ~crc32_le(~0, pos, plen); - icv[0] = crc; - icv[1] = crc >> 8; - icv[2] = crc >> 16; - icv[3] = crc >> 24; - if (memcmp(icv, pos + plen, 4) != 0) { - if (iv32 != tkey->rx_iv32) { - /* Previously cached Phase1 result was already lost, so - * it needs to be recalculated for the next packet. */ - tkey->rx_phase1_done = 0; - } -#ifdef CONFIG_LIB80211_DEBUG - net_dbg_ratelimited("TKIP: ICV error detected: STA=%pM\n", - hdr->addr2); -#endif - tkey->dot11RSNAStatsTKIPICVErrors++; - return -5; - } - - /* Update real counters only after Michael MIC verification has - * completed */ - tkey->rx_iv32_new = iv32; - tkey->rx_iv16_new = iv16; - - /* Remove IV and ICV */ - memmove(skb->data + TKIP_HDR_LEN, skb->data, hdr_len); - skb_pull(skb, TKIP_HDR_LEN); - skb_trim(skb, skb->len - 4); - - return keyidx; -} - -static int michael_mic(struct crypto_shash *tfm_michael, u8 *key, u8 *hdr, - u8 *data, size_t data_len, u8 *mic) -{ - SHASH_DESC_ON_STACK(desc, tfm_michael); - int err; - - if (tfm_michael == NULL) { - pr_warn("%s(): tfm_michael == NULL\n", __func__); - return -1; - } - - desc->tfm = tfm_michael; - - if (crypto_shash_setkey(tfm_michael, key, 8)) - return -1; - - err = crypto_shash_init(desc); - if (err) - goto out; - err = crypto_shash_update(desc, hdr, 16); - if (err) - goto out; - err = crypto_shash_update(desc, data, data_len); - if (err) - goto out; - err = crypto_shash_final(desc, mic); - -out: - shash_desc_zero(desc); - return err; -} - -static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr) -{ - struct ieee80211_hdr *hdr11; - - hdr11 = (struct ieee80211_hdr *)skb->data; - - switch (le16_to_cpu(hdr11->frame_control) & - (IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS)) { - case IEEE80211_FCTL_TODS: - memcpy(hdr, hdr11->addr3, ETH_ALEN); /* DA */ - memcpy(hdr + ETH_ALEN, hdr11->addr2, ETH_ALEN); /* SA */ - break; - case IEEE80211_FCTL_FROMDS: - memcpy(hdr, hdr11->addr1, ETH_ALEN); /* DA */ - memcpy(hdr + ETH_ALEN, hdr11->addr3, ETH_ALEN); /* SA */ - break; - case IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS: - memcpy(hdr, hdr11->addr3, ETH_ALEN); /* DA */ - memcpy(hdr + ETH_ALEN, hdr11->addr4, ETH_ALEN); /* SA */ - break; - default: - memcpy(hdr, hdr11->addr1, ETH_ALEN); /* DA */ - memcpy(hdr + ETH_ALEN, hdr11->addr2, ETH_ALEN); /* SA */ - break; - } - - if (ieee80211_is_data_qos(hdr11->frame_control)) { - hdr[12] = le16_to_cpu(*((__le16 *)ieee80211_get_qos_ctl(hdr11))) - & IEEE80211_QOS_CTL_TID_MASK; - } else - hdr[12] = 0; /* priority */ - - hdr[13] = hdr[14] = hdr[15] = 0; /* reserved */ -} - -static int lib80211_michael_mic_add(struct sk_buff *skb, int hdr_len, - void *priv) -{ - struct lib80211_tkip_data *tkey = priv; - u8 *pos; - - if (skb_tailroom(skb) < 8 || skb->len < hdr_len) { - printk(KERN_DEBUG "Invalid packet for Michael MIC add " - "(tailroom=%d hdr_len=%d skb->len=%d)\n", - skb_tailroom(skb), hdr_len, skb->len); - return -1; - } - - michael_mic_hdr(skb, tkey->tx_hdr); - pos = skb_put(skb, 8); - if (michael_mic(tkey->tx_tfm_michael, &tkey->key[16], tkey->tx_hdr, - skb->data + hdr_len, skb->len - 8 - hdr_len, pos)) - return -1; - - return 0; -} - -static void lib80211_michael_mic_failure(struct net_device *dev, - struct ieee80211_hdr *hdr, - int keyidx) -{ - union iwreq_data wrqu; - struct iw_michaelmicfailure ev; - - /* TODO: needed parameters: count, keyid, key type, TSC */ - memset(&ev, 0, sizeof(ev)); - ev.flags = keyidx & IW_MICFAILURE_KEY_ID; - if (hdr->addr1[0] & 0x01) - ev.flags |= IW_MICFAILURE_GROUP; - else - ev.flags |= IW_MICFAILURE_PAIRWISE; - ev.src_addr.sa_family = ARPHRD_ETHER; - memcpy(ev.src_addr.sa_data, hdr->addr2, ETH_ALEN); - memset(&wrqu, 0, sizeof(wrqu)); - wrqu.data.length = sizeof(ev); - wireless_send_event(dev, IWEVMICHAELMICFAILURE, &wrqu, (char *)&ev); -} - -static int lib80211_michael_mic_verify(struct sk_buff *skb, int keyidx, - int hdr_len, void *priv) -{ - struct lib80211_tkip_data *tkey = priv; - u8 mic[8]; - - if (!tkey->key_set) - return -1; - - michael_mic_hdr(skb, tkey->rx_hdr); - if (michael_mic(tkey->rx_tfm_michael, &tkey->key[24], tkey->rx_hdr, - skb->data + hdr_len, skb->len - 8 - hdr_len, mic)) - return -1; - if (memcmp(mic, skb->data + skb->len - 8, 8) != 0) { - struct ieee80211_hdr *hdr; - hdr = (struct ieee80211_hdr *)skb->data; - printk(KERN_DEBUG "%s: Michael MIC verification failed for " - "MSDU from %pM keyidx=%d\n", - skb->dev ? skb->dev->name : "N/A", hdr->addr2, - keyidx); - if (skb->dev) - lib80211_michael_mic_failure(skb->dev, hdr, keyidx); - tkey->dot11RSNAStatsTKIPLocalMICFailures++; - return -1; - } - - /* Update TSC counters for RX now that the packet verification has - * completed. */ - tkey->rx_iv32 = tkey->rx_iv32_new; - tkey->rx_iv16 = tkey->rx_iv16_new; - - skb_trim(skb, skb->len - 8); - - return 0; -} - -static int lib80211_tkip_set_key(void *key, int len, u8 * seq, void *priv) -{ - struct lib80211_tkip_data *tkey = priv; - int keyidx; - struct crypto_shash *tfm = tkey->tx_tfm_michael; - struct arc4_ctx *tfm2 = &tkey->tx_ctx_arc4; - struct crypto_shash *tfm3 = tkey->rx_tfm_michael; - struct arc4_ctx *tfm4 = &tkey->rx_ctx_arc4; - - keyidx = tkey->key_idx; - memset(tkey, 0, sizeof(*tkey)); - tkey->key_idx = keyidx; - tkey->tx_tfm_michael = tfm; - tkey->tx_ctx_arc4 = *tfm2; - tkey->rx_tfm_michael = tfm3; - tkey->rx_ctx_arc4 = *tfm4; - if (len == TKIP_KEY_LEN) { - memcpy(tkey->key, key, TKIP_KEY_LEN); - tkey->key_set = 1; - tkey->tx_iv16 = 1; /* TSC is initialized to 1 */ - if (seq) { - tkey->rx_iv32 = (seq[5] << 24) | (seq[4] << 16) | - (seq[3] << 8) | seq[2]; - tkey->rx_iv16 = (seq[1] << 8) | seq[0]; - } - } else if (len == 0) - tkey->key_set = 0; - else - return -1; - - return 0; -} - -static int lib80211_tkip_get_key(void *key, int len, u8 * seq, void *priv) -{ - struct lib80211_tkip_data *tkey = priv; - - if (len < TKIP_KEY_LEN) - return -1; - - if (!tkey->key_set) - return 0; - memcpy(key, tkey->key, TKIP_KEY_LEN); - - if (seq) { - /* - * Not clear if this should return the value as is - * or - as the code previously seemed to partially - * have been written as - subtract one from it. It - * was working this way for a long time so leave it. - */ - seq[0] = tkey->tx_iv16; - seq[1] = tkey->tx_iv16 >> 8; - seq[2] = tkey->tx_iv32; - seq[3] = tkey->tx_iv32 >> 8; - seq[4] = tkey->tx_iv32 >> 16; - seq[5] = tkey->tx_iv32 >> 24; - } - - return TKIP_KEY_LEN; -} - -static void lib80211_tkip_print_stats(struct seq_file *m, void *priv) -{ - struct lib80211_tkip_data *tkip = priv; - seq_printf(m, - "key[%d] alg=TKIP key_set=%d " - "tx_pn=%02x%02x%02x%02x%02x%02x " - "rx_pn=%02x%02x%02x%02x%02x%02x " - "replays=%d icv_errors=%d local_mic_failures=%d\n", - tkip->key_idx, tkip->key_set, - (tkip->tx_iv32 >> 24) & 0xff, - (tkip->tx_iv32 >> 16) & 0xff, - (tkip->tx_iv32 >> 8) & 0xff, - tkip->tx_iv32 & 0xff, - (tkip->tx_iv16 >> 8) & 0xff, - tkip->tx_iv16 & 0xff, - (tkip->rx_iv32 >> 24) & 0xff, - (tkip->rx_iv32 >> 16) & 0xff, - (tkip->rx_iv32 >> 8) & 0xff, - tkip->rx_iv32 & 0xff, - (tkip->rx_iv16 >> 8) & 0xff, - tkip->rx_iv16 & 0xff, - tkip->dot11RSNAStatsTKIPReplays, - tkip->dot11RSNAStatsTKIPICVErrors, - tkip->dot11RSNAStatsTKIPLocalMICFailures); -} - -static const struct lib80211_crypto_ops lib80211_crypt_tkip = { - .name = "TKIP", - .init = lib80211_tkip_init, - .deinit = lib80211_tkip_deinit, - .encrypt_mpdu = lib80211_tkip_encrypt, - .decrypt_mpdu = lib80211_tkip_decrypt, - .encrypt_msdu = lib80211_michael_mic_add, - .decrypt_msdu = lib80211_michael_mic_verify, - .set_key = lib80211_tkip_set_key, - .get_key = lib80211_tkip_get_key, - .print_stats = lib80211_tkip_print_stats, - .extra_mpdu_prefix_len = 4 + 4, /* IV + ExtIV */ - .extra_mpdu_postfix_len = 4, /* ICV */ - .extra_msdu_postfix_len = 8, /* MIC */ - .get_flags = lib80211_tkip_get_flags, - .set_flags = lib80211_tkip_set_flags, - .owner = THIS_MODULE, -}; - -static int __init lib80211_crypto_tkip_init(void) -{ - return lib80211_register_crypto_ops(&lib80211_crypt_tkip); -} - -static void __exit lib80211_crypto_tkip_exit(void) -{ - lib80211_unregister_crypto_ops(&lib80211_crypt_tkip); -} - -module_init(lib80211_crypto_tkip_init); -module_exit(lib80211_crypto_tkip_exit); diff --git a/net/wireless/lib80211_crypt_wep.c b/net/wireless/lib80211_crypt_wep.c deleted file mode 100644 index 3b148c7bef85..000000000000 --- a/net/wireless/lib80211_crypt_wep.c +++ /dev/null @@ -1,256 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * lib80211 crypt: host-based WEP encryption implementation for lib80211 - * - * Copyright (c) 2002-2004, Jouni Malinen - * Copyright (c) 2008, John W. Linville - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include - -MODULE_AUTHOR("Jouni Malinen"); -MODULE_DESCRIPTION("lib80211 crypt: WEP"); -MODULE_LICENSE("GPL"); - -struct lib80211_wep_data { - u32 iv; -#define WEP_KEY_LEN 13 - u8 key[WEP_KEY_LEN + 1]; - u8 key_len; - u8 key_idx; - struct arc4_ctx tx_ctx; - struct arc4_ctx rx_ctx; -}; - -static void *lib80211_wep_init(int keyidx) -{ - struct lib80211_wep_data *priv; - - if (fips_enabled) - return NULL; - - priv = kzalloc(sizeof(*priv), GFP_ATOMIC); - if (priv == NULL) - return NULL; - priv->key_idx = keyidx; - - /* start WEP IV from a random value */ - get_random_bytes(&priv->iv, 4); - - return priv; -} - -static void lib80211_wep_deinit(void *priv) -{ - kfree_sensitive(priv); -} - -/* Add WEP IV/key info to a frame that has at least 4 bytes of headroom */ -static int lib80211_wep_build_iv(struct sk_buff *skb, int hdr_len, - u8 *key, int keylen, void *priv) -{ - struct lib80211_wep_data *wep = priv; - u32 klen; - u8 *pos; - - if (skb_headroom(skb) < 4 || skb->len < hdr_len) - return -1; - - pos = skb_push(skb, 4); - memmove(pos, pos + 4, hdr_len); - pos += hdr_len; - - klen = 3 + wep->key_len; - - wep->iv++; - - /* Fluhrer, Mantin, and Shamir have reported weaknesses in the key - * scheduling algorithm of RC4. At least IVs (KeyByte + 3, 0xff, N) - * can be used to speedup attacks, so avoid using them. */ - if ((wep->iv & 0xff00) == 0xff00) { - u8 B = (wep->iv >> 16) & 0xff; - if (B >= 3 && B < klen) - wep->iv += 0x0100; - } - - /* Prepend 24-bit IV to RC4 key and TX frame */ - *pos++ = (wep->iv >> 16) & 0xff; - *pos++ = (wep->iv >> 8) & 0xff; - *pos++ = wep->iv & 0xff; - *pos++ = wep->key_idx << 6; - - return 0; -} - -/* Perform WEP encryption on given skb that has at least 4 bytes of headroom - * for IV and 4 bytes of tailroom for ICV. Both IV and ICV will be transmitted, - * so the payload length increases with 8 bytes. - * - * WEP frame payload: IV + TX key idx, RC4(data), ICV = RC4(CRC32(data)) - */ -static int lib80211_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv) -{ - struct lib80211_wep_data *wep = priv; - u32 crc, klen, len; - u8 *pos, *icv; - u8 key[WEP_KEY_LEN + 3]; - - /* other checks are in lib80211_wep_build_iv */ - if (skb_tailroom(skb) < 4) - return -1; - - /* add the IV to the frame */ - if (lib80211_wep_build_iv(skb, hdr_len, NULL, 0, priv)) - return -1; - - /* Copy the IV into the first 3 bytes of the key */ - skb_copy_from_linear_data_offset(skb, hdr_len, key, 3); - - /* Copy rest of the WEP key (the secret part) */ - memcpy(key + 3, wep->key, wep->key_len); - - len = skb->len - hdr_len - 4; - pos = skb->data + hdr_len + 4; - klen = 3 + wep->key_len; - - /* Append little-endian CRC32 over only the data and encrypt it to produce ICV */ - crc = ~crc32_le(~0, pos, len); - icv = skb_put(skb, 4); - icv[0] = crc; - icv[1] = crc >> 8; - icv[2] = crc >> 16; - icv[3] = crc >> 24; - - arc4_setkey(&wep->tx_ctx, key, klen); - arc4_crypt(&wep->tx_ctx, pos, pos, len + 4); - - return 0; -} - -/* Perform WEP decryption on given buffer. Buffer includes whole WEP part of - * the frame: IV (4 bytes), encrypted payload (including SNAP header), - * ICV (4 bytes). len includes both IV and ICV. - * - * Returns 0 if frame was decrypted successfully and ICV was correct and -1 on - * failure. If frame is OK, IV and ICV will be removed. - */ -static int lib80211_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv) -{ - struct lib80211_wep_data *wep = priv; - u32 crc, klen, plen; - u8 key[WEP_KEY_LEN + 3]; - u8 keyidx, *pos, icv[4]; - - if (skb->len < hdr_len + 8) - return -1; - - pos = skb->data + hdr_len; - key[0] = *pos++; - key[1] = *pos++; - key[2] = *pos++; - keyidx = *pos++ >> 6; - if (keyidx != wep->key_idx) - return -1; - - klen = 3 + wep->key_len; - - /* Copy rest of the WEP key (the secret part) */ - memcpy(key + 3, wep->key, wep->key_len); - - /* Apply RC4 to data and compute CRC32 over decrypted data */ - plen = skb->len - hdr_len - 8; - - arc4_setkey(&wep->rx_ctx, key, klen); - arc4_crypt(&wep->rx_ctx, pos, pos, plen + 4); - - crc = ~crc32_le(~0, pos, plen); - icv[0] = crc; - icv[1] = crc >> 8; - icv[2] = crc >> 16; - icv[3] = crc >> 24; - if (memcmp(icv, pos + plen, 4) != 0) { - /* ICV mismatch - drop frame */ - return -2; - } - - /* Remove IV and ICV */ - memmove(skb->data + 4, skb->data, hdr_len); - skb_pull(skb, 4); - skb_trim(skb, skb->len - 4); - - return 0; -} - -static int lib80211_wep_set_key(void *key, int len, u8 * seq, void *priv) -{ - struct lib80211_wep_data *wep = priv; - - if (len < 0 || len > WEP_KEY_LEN) - return -1; - - memcpy(wep->key, key, len); - wep->key_len = len; - - return 0; -} - -static int lib80211_wep_get_key(void *key, int len, u8 * seq, void *priv) -{ - struct lib80211_wep_data *wep = priv; - - if (len < wep->key_len) - return -1; - - memcpy(key, wep->key, wep->key_len); - - return wep->key_len; -} - -static void lib80211_wep_print_stats(struct seq_file *m, void *priv) -{ - struct lib80211_wep_data *wep = priv; - seq_printf(m, "key[%d] alg=WEP len=%d\n", wep->key_idx, wep->key_len); -} - -static const struct lib80211_crypto_ops lib80211_crypt_wep = { - .name = "WEP", - .init = lib80211_wep_init, - .deinit = lib80211_wep_deinit, - .encrypt_mpdu = lib80211_wep_encrypt, - .decrypt_mpdu = lib80211_wep_decrypt, - .encrypt_msdu = NULL, - .decrypt_msdu = NULL, - .set_key = lib80211_wep_set_key, - .get_key = lib80211_wep_get_key, - .print_stats = lib80211_wep_print_stats, - .extra_mpdu_prefix_len = 4, /* IV */ - .extra_mpdu_postfix_len = 4, /* ICV */ - .owner = THIS_MODULE, -}; - -static int __init lib80211_crypto_wep_init(void) -{ - return lib80211_register_crypto_ops(&lib80211_crypt_wep); -} - -static void __exit lib80211_crypto_wep_exit(void) -{ - lib80211_unregister_crypto_ops(&lib80211_crypt_wep); -} - -module_init(lib80211_crypto_wep_init); -module_exit(lib80211_crypto_wep_exit); -- cgit v1.2.3 From 3a1d429ebd43bcfdf3590096ca72cbf593d1598b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 7 Oct 2024 21:02:53 +0200 Subject: wifi: wext/libipw: move spy implementation to libipw There's no driver left using this other than ipw2200, so move the data bookkeeping and code into libipw. Link: https://patch.msgid.link/20241007210254.037d864cda7d.Ib2197cb056ff05746d3521a5fba637062acb7314@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/ipw2x00/Kconfig | 3 - drivers/net/wireless/intel/ipw2x00/Makefile | 1 + drivers/net/wireless/intel/ipw2x00/ipw2200.c | 10 +- drivers/net/wireless/intel/ipw2x00/libipw.h | 13 ++ drivers/net/wireless/intel/ipw2x00/libipw_rx.c | 2 +- drivers/net/wireless/intel/ipw2x00/libipw_spy.c | 232 ++++++++++++++++++++++++ include/net/iw_handler.h | 18 -- net/wireless/Kconfig | 3 - net/wireless/Makefile | 1 - net/wireless/wext-spy.c | 232 ------------------------ 10 files changed, 252 insertions(+), 263 deletions(-) create mode 100644 drivers/net/wireless/intel/ipw2x00/libipw_spy.c delete mode 100644 net/wireless/wext-spy.c (limited to 'include') diff --git a/drivers/net/wireless/intel/ipw2x00/Kconfig b/drivers/net/wireless/intel/ipw2x00/Kconfig index d9c042772399..ce34118f1e90 100644 --- a/drivers/net/wireless/intel/ipw2x00/Kconfig +++ b/drivers/net/wireless/intel/ipw2x00/Kconfig @@ -7,7 +7,6 @@ config IPW2100 tristate "Intel PRO/Wireless 2100 Network Connection" depends on PCI && CFG80211 select WIRELESS_EXT - select WEXT_SPY select WEXT_PRIV select FW_LOADER select LIBIPW @@ -68,7 +67,6 @@ config IPW2200 depends on PCI && CFG80211 select CFG80211_WEXT_EXPORT select WIRELESS_EXT - select WEXT_SPY select WEXT_PRIV select FW_LOADER select LIBIPW @@ -156,7 +154,6 @@ config LIBIPW tristate depends on PCI && CFG80211 select WIRELESS_EXT - select WEXT_SPY select CRYPTO select CRYPTO_MICHAEL_MIC select CRC32 diff --git a/drivers/net/wireless/intel/ipw2x00/Makefile b/drivers/net/wireless/intel/ipw2x00/Makefile index 60c5faccbe15..91e6091c4ebf 100644 --- a/drivers/net/wireless/intel/ipw2x00/Makefile +++ b/drivers/net/wireless/intel/ipw2x00/Makefile @@ -13,6 +13,7 @@ libipw-objs := \ libipw_rx.o \ libipw_wx.o \ libipw_geo.o \ + libipw_spy.o \ libipw_crypto.o \ libipw_crypto_ccmp.o \ libipw_crypto_tkip.o \ diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2200.c b/drivers/net/wireless/intel/ipw2x00/ipw2200.c index f4fd1fc784b7..0008b4615731 100644 --- a/drivers/net/wireless/intel/ipw2x00/ipw2200.c +++ b/drivers/net/wireless/intel/ipw2x00/ipw2200.c @@ -9856,10 +9856,10 @@ static iw_handler ipw_wx_handlers[] = { IW_HANDLER(SIOCGIWENCODE, ipw_wx_get_encode), IW_HANDLER(SIOCSIWPOWER, ipw_wx_set_power), IW_HANDLER(SIOCGIWPOWER, ipw_wx_get_power), - IW_HANDLER(SIOCSIWSPY, iw_handler_set_spy), - IW_HANDLER(SIOCGIWSPY, iw_handler_get_spy), - IW_HANDLER(SIOCSIWTHRSPY, iw_handler_set_thrspy), - IW_HANDLER(SIOCGIWTHRSPY, iw_handler_get_thrspy), + IW_HANDLER(SIOCSIWSPY, ipw_wx_set_spy), + IW_HANDLER(SIOCGIWSPY, ipw_wx_get_spy), + IW_HANDLER(SIOCSIWTHRSPY, ipw_wx_set_thrspy), + IW_HANDLER(SIOCGIWTHRSPY, ipw_wx_get_thrspy), IW_HANDLER(SIOCSIWGENIE, ipw_wx_set_genie), IW_HANDLER(SIOCGIWGENIE, ipw_wx_get_genie), IW_HANDLER(SIOCSIWMLME, ipw_wx_set_mlme), @@ -11636,7 +11636,7 @@ static int ipw_pci_probe(struct pci_dev *pdev, priv->ieee->worst_rssi = -85; net_dev->netdev_ops = &ipw_netdev_ops; - priv->wireless_data.spy_data = &priv->ieee->spy_data; + priv->ieee->spy_enabled = true; net_dev->wireless_data = &priv->wireless_data; net_dev->wireless_handlers = &ipw_wx_handler_def; net_dev->ethtool_ops = &ipw_ethtool_ops; diff --git a/drivers/net/wireless/intel/ipw2x00/libipw.h b/drivers/net/wireless/intel/ipw2x00/libipw.h index bc727c99ff3c..3c20353e5a41 100644 --- a/drivers/net/wireless/intel/ipw2x00/libipw.h +++ b/drivers/net/wireless/intel/ipw2x00/libipw.h @@ -788,6 +788,7 @@ struct libipw_device { int iw_mode; /* operating mode (IW_MODE_*) */ struct iw_spy_data spy_data; /* iwspy support */ + bool spy_enabled; spinlock_t lock; @@ -1083,4 +1084,16 @@ void libipw_crypto_tkip_exit(void); void libipw_crypto_ccmp_exit(void); void libipw_crypto_exit(void); + +int ipw_wx_set_spy(struct net_device *dev, struct iw_request_info *info, + union iwreq_data *wrqu, char *extra); +int ipw_wx_get_spy(struct net_device *dev, struct iw_request_info *info, + union iwreq_data *wrqu, char *extra); +int ipw_wx_set_thrspy(struct net_device *dev, struct iw_request_info *info, + union iwreq_data *wrqu, char *extra); +int ipw_wx_get_thrspy(struct net_device *dev, struct iw_request_info *info, + union iwreq_data *wrqu, char *extra); +void libipw_spy_update(struct net_device *dev, unsigned char *address, + struct iw_quality *wstats); + #endif /* LIBIPW_H */ diff --git a/drivers/net/wireless/intel/ipw2x00/libipw_rx.c b/drivers/net/wireless/intel/ipw2x00/libipw_rx.c index 1fe05e73a17c..7e41cb7bbfe0 100644 --- a/drivers/net/wireless/intel/ipw2x00/libipw_rx.c +++ b/drivers/net/wireless/intel/ipw2x00/libipw_rx.c @@ -393,7 +393,7 @@ int libipw_rx(struct libipw_device *ieee, struct sk_buff *skb, wstats.updated |= IW_QUAL_QUAL_INVALID; /* Update spy records */ - wireless_spy_update(ieee->dev, hdr->addr2, &wstats); + libipw_spy_update(ieee->dev, hdr->addr2, &wstats); } #endif /* IW_WIRELESS_SPY */ #endif /* CONFIG_WIRELESS_EXT */ diff --git a/drivers/net/wireless/intel/ipw2x00/libipw_spy.c b/drivers/net/wireless/intel/ipw2x00/libipw_spy.c new file mode 100644 index 000000000000..979aeb10aeeb --- /dev/null +++ b/drivers/net/wireless/intel/ipw2x00/libipw_spy.c @@ -0,0 +1,232 @@ +/* + * This file implement the Wireless Extensions spy API. + * + * Authors : Jean Tourrilhes - HPL - + * Copyright (c) 1997-2007 Jean Tourrilhes, All Rights Reserved. + * + * (As all part of the Linux kernel, this file is GPL) + */ + +#include +#include +#include +#include +#include +#include +#include +#include "libipw.h" + +static struct iw_spy_data *get_spydata(struct net_device *dev) +{ + if (dev->wireless_data && dev->wireless_data->libipw && + dev->wireless_data->libipw->spy_enabled) + return &dev->wireless_data->libipw->spy_data; + return NULL; +} + +int ipw_wx_set_spy(struct net_device * dev, + struct iw_request_info * info, + union iwreq_data * wrqu, + char * extra) +{ + struct iw_spy_data * spydata = get_spydata(dev); + struct sockaddr * address = (struct sockaddr *) extra; + + /* Make sure driver is not buggy or using the old API */ + if (!spydata) + return -EOPNOTSUPP; + + /* Disable spy collection while we copy the addresses. + * While we copy addresses, any call to libipw_spy_update() + * will NOP. This is OK, as anyway the addresses are changing. */ + spydata->spy_number = 0; + + /* We want to operate without locking, because libipw_spy_update() + * most likely will happen in the interrupt handler, and therefore + * have its own locking constraints and needs performance. + * The rtnl_lock() make sure we don't race with the other iw_handlers. + * This make sure libipw_spy_update() "see" that the spy list + * is temporarily disabled. */ + smp_wmb(); + + /* Are there are addresses to copy? */ + if (wrqu->data.length > 0) { + int i; + + /* Copy addresses */ + for (i = 0; i < wrqu->data.length; i++) + memcpy(spydata->spy_address[i], address[i].sa_data, + ETH_ALEN); + /* Reset stats */ + memset(spydata->spy_stat, 0, + sizeof(struct iw_quality) * IW_MAX_SPY); + } + + /* Make sure above is updated before re-enabling */ + smp_wmb(); + + /* Enable addresses */ + spydata->spy_number = wrqu->data.length; + + return 0; +} +EXPORT_SYMBOL(ipw_wx_set_spy); + +int ipw_wx_get_spy(struct net_device * dev, + struct iw_request_info * info, + union iwreq_data * wrqu, + char * extra) +{ + struct iw_spy_data * spydata = get_spydata(dev); + struct sockaddr * address = (struct sockaddr *) extra; + int i; + + /* Make sure driver is not buggy or using the old API */ + if (!spydata) + return -EOPNOTSUPP; + + wrqu->data.length = spydata->spy_number; + + /* Copy addresses. */ + for (i = 0; i < spydata->spy_number; i++) { + memcpy(address[i].sa_data, spydata->spy_address[i], ETH_ALEN); + address[i].sa_family = AF_UNIX; + } + /* Copy stats to the user buffer (just after). */ + if (spydata->spy_number > 0) + memcpy(extra + (sizeof(struct sockaddr) *spydata->spy_number), + spydata->spy_stat, + sizeof(struct iw_quality) * spydata->spy_number); + /* Reset updated flags. */ + for (i = 0; i < spydata->spy_number; i++) + spydata->spy_stat[i].updated &= ~IW_QUAL_ALL_UPDATED; + return 0; +} +EXPORT_SYMBOL(ipw_wx_get_spy); + +/*------------------------------------------------------------------*/ +/* + * Standard Wireless Handler : set spy threshold + */ +int ipw_wx_set_thrspy(struct net_device * dev, + struct iw_request_info * info, + union iwreq_data * wrqu, + char * extra) +{ + struct iw_spy_data * spydata = get_spydata(dev); + struct iw_thrspy * threshold = (struct iw_thrspy *) extra; + + /* Make sure driver is not buggy or using the old API */ + if (!spydata) + return -EOPNOTSUPP; + + /* Just do it */ + spydata->spy_thr_low = threshold->low; + spydata->spy_thr_high = threshold->high; + + /* Clear flag */ + memset(spydata->spy_thr_under, '\0', sizeof(spydata->spy_thr_under)); + + return 0; +} +EXPORT_SYMBOL(ipw_wx_set_thrspy); + +/*------------------------------------------------------------------*/ +/* + * Standard Wireless Handler : get spy threshold + */ +int ipw_wx_get_thrspy(struct net_device * dev, + struct iw_request_info * info, + union iwreq_data * wrqu, + char * extra) +{ + struct iw_spy_data * spydata = get_spydata(dev); + struct iw_thrspy * threshold = (struct iw_thrspy *) extra; + + /* Make sure driver is not buggy or using the old API */ + if (!spydata) + return -EOPNOTSUPP; + + /* Just do it */ + threshold->low = spydata->spy_thr_low; + threshold->high = spydata->spy_thr_high; + + return 0; +} +EXPORT_SYMBOL(ipw_wx_get_thrspy); + +/*------------------------------------------------------------------*/ +/* + * Prepare and send a Spy Threshold event + */ +static void iw_send_thrspy_event(struct net_device * dev, + struct iw_spy_data * spydata, + unsigned char * address, + struct iw_quality * wstats) +{ + union iwreq_data wrqu; + struct iw_thrspy threshold; + + /* Init */ + wrqu.data.length = 1; + wrqu.data.flags = 0; + /* Copy address */ + memcpy(threshold.addr.sa_data, address, ETH_ALEN); + threshold.addr.sa_family = ARPHRD_ETHER; + /* Copy stats */ + threshold.qual = *wstats; + /* Copy also thresholds */ + threshold.low = spydata->spy_thr_low; + threshold.high = spydata->spy_thr_high; + + /* Send event to user space */ + wireless_send_event(dev, SIOCGIWTHRSPY, &wrqu, (char *) &threshold); +} + +/* ---------------------------------------------------------------- */ +/* + * Call for the driver to update the spy data. + * For now, the spy data is a simple array. As the size of the array is + * small, this is good enough. If we wanted to support larger number of + * spy addresses, we should use something more efficient... + */ +void libipw_spy_update(struct net_device * dev, + unsigned char * address, + struct iw_quality * wstats) +{ + struct iw_spy_data * spydata = get_spydata(dev); + int i; + int match = -1; + + /* Make sure driver is not buggy or using the old API */ + if (!spydata) + return; + + /* Update all records that match */ + for (i = 0; i < spydata->spy_number; i++) + if (ether_addr_equal(address, spydata->spy_address[i])) { + memcpy(&(spydata->spy_stat[i]), wstats, + sizeof(struct iw_quality)); + match = i; + } + + /* Generate an event if we cross the spy threshold. + * To avoid event storms, we have a simple hysteresis : we generate + * event only when we go under the low threshold or above the + * high threshold. */ + if (match >= 0) { + if (spydata->spy_thr_under[match]) { + if (wstats->level > spydata->spy_thr_high.level) { + spydata->spy_thr_under[match] = 0; + iw_send_thrspy_event(dev, spydata, + address, wstats); + } + } else { + if (wstats->level < spydata->spy_thr_low.level) { + spydata->spy_thr_under[match] = 1; + iw_send_thrspy_event(dev, spydata, + address, wstats); + } + } + } +} diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h index 7af1082ea9a0..a7b502958d27 100644 --- a/include/net/iw_handler.h +++ b/include/net/iw_handler.h @@ -418,8 +418,6 @@ struct iw_spy_data { struct libipw_device; /* The struct */ struct iw_public_data { - /* Driver enhanced spy support */ - struct iw_spy_data * spy_data; /* Legacy structure managed by the ipw2x00-specific IEEE 802.11 layer */ struct libipw_device * libipw; }; @@ -443,22 +441,6 @@ static inline void wireless_nlevent_flush(void) {} /* We may need a function to send a stream of events to user space. * More on that later... */ -/* Standard handler for SIOCSIWSPY */ -int iw_handler_set_spy(struct net_device *dev, struct iw_request_info *info, - union iwreq_data *wrqu, char *extra); -/* Standard handler for SIOCGIWSPY */ -int iw_handler_get_spy(struct net_device *dev, struct iw_request_info *info, - union iwreq_data *wrqu, char *extra); -/* Standard handler for SIOCSIWTHRSPY */ -int iw_handler_set_thrspy(struct net_device *dev, struct iw_request_info *info, - union iwreq_data *wrqu, char *extra); -/* Standard handler for SIOCGIWTHRSPY */ -int iw_handler_get_thrspy(struct net_device *dev, struct iw_request_info *info, - union iwreq_data *wrqu, char *extra); -/* Driver call to update spy records */ -void wireless_spy_update(struct net_device *dev, unsigned char *address, - struct iw_quality *wstats); - /************************* INLINE FUNCTIONS *************************/ /* * Function that are so simple that it's more efficient inlining them diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index 733c53ad4de5..8c8bd8b75708 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -11,9 +11,6 @@ config WEXT_PROC depends on PROC_FS depends on WEXT_CORE -config WEXT_SPY - bool - config WEXT_PRIV bool diff --git a/net/wireless/Makefile b/net/wireless/Makefile index 27f211bd9954..62a83faf0e07 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -4,7 +4,6 @@ obj-y += tests/ obj-$(CONFIG_WEXT_CORE) += wext-core.o obj-$(CONFIG_WEXT_PROC) += wext-proc.o -obj-$(CONFIG_WEXT_SPY) += wext-spy.o obj-$(CONFIG_WEXT_PRIV) += wext-priv.o cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o scan.o nl80211.o diff --git a/net/wireless/wext-spy.c b/net/wireless/wext-spy.c deleted file mode 100644 index b379a0371653..000000000000 --- a/net/wireless/wext-spy.c +++ /dev/null @@ -1,232 +0,0 @@ -/* - * This file implement the Wireless Extensions spy API. - * - * Authors : Jean Tourrilhes - HPL - - * Copyright (c) 1997-2007 Jean Tourrilhes, All Rights Reserved. - * - * (As all part of the Linux kernel, this file is GPL) - */ - -#include -#include -#include -#include -#include -#include -#include - -static inline struct iw_spy_data *get_spydata(struct net_device *dev) -{ - /* This is the new way */ - if (dev->wireless_data) - return dev->wireless_data->spy_data; - return NULL; -} - -int iw_handler_set_spy(struct net_device * dev, - struct iw_request_info * info, - union iwreq_data * wrqu, - char * extra) -{ - struct iw_spy_data * spydata = get_spydata(dev); - struct sockaddr * address = (struct sockaddr *) extra; - - /* Make sure driver is not buggy or using the old API */ - if (!spydata) - return -EOPNOTSUPP; - - /* Disable spy collection while we copy the addresses. - * While we copy addresses, any call to wireless_spy_update() - * will NOP. This is OK, as anyway the addresses are changing. */ - spydata->spy_number = 0; - - /* We want to operate without locking, because wireless_spy_update() - * most likely will happen in the interrupt handler, and therefore - * have its own locking constraints and needs performance. - * The rtnl_lock() make sure we don't race with the other iw_handlers. - * This make sure wireless_spy_update() "see" that the spy list - * is temporarily disabled. */ - smp_wmb(); - - /* Are there are addresses to copy? */ - if (wrqu->data.length > 0) { - int i; - - /* Copy addresses */ - for (i = 0; i < wrqu->data.length; i++) - memcpy(spydata->spy_address[i], address[i].sa_data, - ETH_ALEN); - /* Reset stats */ - memset(spydata->spy_stat, 0, - sizeof(struct iw_quality) * IW_MAX_SPY); - } - - /* Make sure above is updated before re-enabling */ - smp_wmb(); - - /* Enable addresses */ - spydata->spy_number = wrqu->data.length; - - return 0; -} -EXPORT_SYMBOL(iw_handler_set_spy); - -int iw_handler_get_spy(struct net_device * dev, - struct iw_request_info * info, - union iwreq_data * wrqu, - char * extra) -{ - struct iw_spy_data * spydata = get_spydata(dev); - struct sockaddr * address = (struct sockaddr *) extra; - int i; - - /* Make sure driver is not buggy or using the old API */ - if (!spydata) - return -EOPNOTSUPP; - - wrqu->data.length = spydata->spy_number; - - /* Copy addresses. */ - for (i = 0; i < spydata->spy_number; i++) { - memcpy(address[i].sa_data, spydata->spy_address[i], ETH_ALEN); - address[i].sa_family = AF_UNIX; - } - /* Copy stats to the user buffer (just after). */ - if (spydata->spy_number > 0) - memcpy(extra + (sizeof(struct sockaddr) *spydata->spy_number), - spydata->spy_stat, - sizeof(struct iw_quality) * spydata->spy_number); - /* Reset updated flags. */ - for (i = 0; i < spydata->spy_number; i++) - spydata->spy_stat[i].updated &= ~IW_QUAL_ALL_UPDATED; - return 0; -} -EXPORT_SYMBOL(iw_handler_get_spy); - -/*------------------------------------------------------------------*/ -/* - * Standard Wireless Handler : set spy threshold - */ -int iw_handler_set_thrspy(struct net_device * dev, - struct iw_request_info *info, - union iwreq_data * wrqu, - char * extra) -{ - struct iw_spy_data * spydata = get_spydata(dev); - struct iw_thrspy * threshold = (struct iw_thrspy *) extra; - - /* Make sure driver is not buggy or using the old API */ - if (!spydata) - return -EOPNOTSUPP; - - /* Just do it */ - spydata->spy_thr_low = threshold->low; - spydata->spy_thr_high = threshold->high; - - /* Clear flag */ - memset(spydata->spy_thr_under, '\0', sizeof(spydata->spy_thr_under)); - - return 0; -} -EXPORT_SYMBOL(iw_handler_set_thrspy); - -/*------------------------------------------------------------------*/ -/* - * Standard Wireless Handler : get spy threshold - */ -int iw_handler_get_thrspy(struct net_device * dev, - struct iw_request_info *info, - union iwreq_data * wrqu, - char * extra) -{ - struct iw_spy_data * spydata = get_spydata(dev); - struct iw_thrspy * threshold = (struct iw_thrspy *) extra; - - /* Make sure driver is not buggy or using the old API */ - if (!spydata) - return -EOPNOTSUPP; - - /* Just do it */ - threshold->low = spydata->spy_thr_low; - threshold->high = spydata->spy_thr_high; - - return 0; -} -EXPORT_SYMBOL(iw_handler_get_thrspy); - -/*------------------------------------------------------------------*/ -/* - * Prepare and send a Spy Threshold event - */ -static void iw_send_thrspy_event(struct net_device * dev, - struct iw_spy_data * spydata, - unsigned char * address, - struct iw_quality * wstats) -{ - union iwreq_data wrqu; - struct iw_thrspy threshold; - - /* Init */ - wrqu.data.length = 1; - wrqu.data.flags = 0; - /* Copy address */ - memcpy(threshold.addr.sa_data, address, ETH_ALEN); - threshold.addr.sa_family = ARPHRD_ETHER; - /* Copy stats */ - threshold.qual = *wstats; - /* Copy also thresholds */ - threshold.low = spydata->spy_thr_low; - threshold.high = spydata->spy_thr_high; - - /* Send event to user space */ - wireless_send_event(dev, SIOCGIWTHRSPY, &wrqu, (char *) &threshold); -} - -/* ---------------------------------------------------------------- */ -/* - * Call for the driver to update the spy data. - * For now, the spy data is a simple array. As the size of the array is - * small, this is good enough. If we wanted to support larger number of - * spy addresses, we should use something more efficient... - */ -void wireless_spy_update(struct net_device * dev, - unsigned char * address, - struct iw_quality * wstats) -{ - struct iw_spy_data * spydata = get_spydata(dev); - int i; - int match = -1; - - /* Make sure driver is not buggy or using the old API */ - if (!spydata) - return; - - /* Update all records that match */ - for (i = 0; i < spydata->spy_number; i++) - if (ether_addr_equal(address, spydata->spy_address[i])) { - memcpy(&(spydata->spy_stat[i]), wstats, - sizeof(struct iw_quality)); - match = i; - } - - /* Generate an event if we cross the spy threshold. - * To avoid event storms, we have a simple hysteresis : we generate - * event only when we go under the low threshold or above the - * high threshold. */ - if (match >= 0) { - if (spydata->spy_thr_under[match]) { - if (wstats->level > spydata->spy_thr_high.level) { - spydata->spy_thr_under[match] = 0; - iw_send_thrspy_event(dev, spydata, - address, wstats); - } - } else { - if (wstats->level < spydata->spy_thr_low.level) { - spydata->spy_thr_under[match] = 1; - iw_send_thrspy_event(dev, spydata, - address, wstats); - } - } - } -} -EXPORT_SYMBOL(wireless_spy_update); -- cgit v1.2.3 From 836265d31631e28000fc8917ce697fc687a58724 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 7 Oct 2024 21:35:25 +0200 Subject: wifi: remove iw_public_data from struct net_device Given the previous patches, we no longer need the struct iw_public_data etc., it's only used by the old Intel drivers (and ps3_gelic creates it but then doesn't use it). Remove all of that, including the pointer in struct net_device. Link: https://patch.msgid.link/20241007213525.8b2d52b60531.I6a27aaf30bded9a0977f07f47fba2bd31a3b3330@changeid Signed-off-by: Johannes Berg --- Documentation/networking/net_cachelines/net_device.rst | 1 - drivers/net/ethernet/toshiba/ps3_gelic_wireless.c | 1 - drivers/net/ethernet/toshiba/ps3_gelic_wireless.h | 1 - drivers/net/wireless/intel/ipw2x00/ipw2100.c | 2 -- drivers/net/wireless/intel/ipw2x00/ipw2100.h | 2 -- drivers/net/wireless/intel/ipw2x00/ipw2200.c | 1 - drivers/net/wireless/intel/ipw2x00/ipw2200.h | 2 -- drivers/net/wireless/intel/ipw2x00/libipw_spy.c | 7 ++++--- include/linux/netdevice.h | 2 -- include/net/iw_handler.h | 18 ------------------ 10 files changed, 4 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst index 22b07c814f4a..b7d1d4c7a1ab 100644 --- a/Documentation/networking/net_cachelines/net_device.rst +++ b/Documentation/networking/net_cachelines/net_device.rst @@ -51,7 +51,6 @@ struct_net_device_core_stats* core_stats atomic_t carrier_up_count atomic_t carrier_down_count struct_iw_handler_def* wireless_handlers -struct_iw_public_data* wireless_data struct_ethtool_ops* ethtool_ops struct_l3mdev_ops* l3mdev_ops struct_ndisc_ops* ndisc_ops diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c b/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c index 44488c153ea2..4fbe4b7cd12a 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c +++ b/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c @@ -2566,7 +2566,6 @@ static void gelic_wl_setup_netdev_ops(struct net_device *netdev) netdev->ethtool_ops = &gelic_wl_ethtool_ops; netdev->netdev_ops = &gelic_wl_netdevice_ops; - netdev->wireless_data = &wl->wireless_data; netdev->wireless_handlers = &gelic_wl_wext_handler_def; } diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_wireless.h b/drivers/net/ethernet/toshiba/ps3_gelic_wireless.h index 1f203d1ae8db..dbabf538e10a 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_wireless.h +++ b/drivers/net/ethernet/toshiba/ps3_gelic_wireless.h @@ -276,7 +276,6 @@ struct gelic_wl_info { u8 active_bssid[ETH_ALEN]; /* associated bssid */ unsigned int essid_len; - struct iw_public_data wireless_data; struct iw_statistics iwstat; }; diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2100.c b/drivers/net/wireless/intel/ipw2x00/ipw2100.c index a89e06c1b8ee..15bf35f2de2a 100644 --- a/drivers/net/wireless/intel/ipw2x00/ipw2100.c +++ b/drivers/net/wireless/intel/ipw2x00/ipw2100.c @@ -6022,8 +6022,6 @@ static struct net_device *ipw2100_alloc_device(struct pci_dev *pci_dev, dev->netdev_ops = &ipw2100_netdev_ops; dev->ethtool_ops = &ipw2100_ethtool_ops; dev->wireless_handlers = &ipw2100_wx_handler_def; - priv->wireless_data.libipw = priv->ieee; - dev->wireless_data = &priv->wireless_data; dev->watchdog_timeo = 3 * HZ; dev->irq = 0; dev->min_mtu = 68; diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2100.h b/drivers/net/wireless/intel/ipw2x00/ipw2100.h index b34085ade3aa..6f81f509b9cb 100644 --- a/drivers/net/wireless/intel/ipw2x00/ipw2100.h +++ b/drivers/net/wireless/intel/ipw2x00/ipw2100.h @@ -554,8 +554,6 @@ struct ipw2100_priv { struct net_device *net_dev; struct iw_statistics wstats; - struct iw_public_data wireless_data; - struct tasklet_struct irq_tasklet; struct delayed_work reset_work; diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2200.c b/drivers/net/wireless/intel/ipw2x00/ipw2200.c index c0e9d2109e34..be1d971b3d32 100644 --- a/drivers/net/wireless/intel/ipw2x00/ipw2200.c +++ b/drivers/net/wireless/intel/ipw2x00/ipw2200.c @@ -11645,7 +11645,6 @@ static int ipw_pci_probe(struct pci_dev *pdev, net_dev->netdev_ops = &ipw_netdev_ops; priv->ieee->spy_enabled = true; - net_dev->wireless_data = &priv->wireless_data; net_dev->wireless_handlers = &ipw_wx_handler_def; net_dev->ethtool_ops = &ipw_ethtool_ops; diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2200.h b/drivers/net/wireless/intel/ipw2x00/ipw2200.h index 46f119123b49..d3db54e6d37c 100644 --- a/drivers/net/wireless/intel/ipw2x00/ipw2200.h +++ b/drivers/net/wireless/intel/ipw2x00/ipw2200.h @@ -1274,8 +1274,6 @@ struct ipw_priv { struct iw_statistics wstats; - struct iw_public_data wireless_data; - int user_requested_scan; u8 direct_scan_ssid[IW_ESSID_MAX_SIZE]; u8 direct_scan_ssid_len; diff --git a/drivers/net/wireless/intel/ipw2x00/libipw_spy.c b/drivers/net/wireless/intel/ipw2x00/libipw_spy.c index 979aeb10aeeb..ba876e92f7f6 100644 --- a/drivers/net/wireless/intel/ipw2x00/libipw_spy.c +++ b/drivers/net/wireless/intel/ipw2x00/libipw_spy.c @@ -18,9 +18,10 @@ static struct iw_spy_data *get_spydata(struct net_device *dev) { - if (dev->wireless_data && dev->wireless_data->libipw && - dev->wireless_data->libipw->spy_enabled) - return &dev->wireless_data->libipw->spy_data; + struct libipw_device *ieee = netdev_priv(dev); + + if (ieee->spy_enabled) + return &ieee->spy_data; return NULL; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e87b5e488325..f0abed1e6e45 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1773,7 +1773,6 @@ enum netdev_reg_state { * @wireless_handlers: List of functions to handle Wireless Extensions, * instead of ioctl, * see for details. - * @wireless_data: Instance data managed by the core of wireless extensions * * @netdev_ops: Includes several pointers to callbacks, * if one wants to override the ndo_*() functions @@ -2150,7 +2149,6 @@ struct net_device { #ifdef CONFIG_WIRELESS_EXT const struct iw_handler_def *wireless_handlers; - struct iw_public_data *wireless_data; #endif const struct ethtool_ops *ethtool_ops; #ifdef CONFIG_NET_L3_MASTER_DEV diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h index a7b502958d27..fc44fcca1d5c 100644 --- a/include/net/iw_handler.h +++ b/include/net/iw_handler.h @@ -404,24 +404,6 @@ struct iw_spy_data { u_char spy_thr_under[IW_MAX_SPY]; }; -/* --------------------- DEVICE WIRELESS DATA --------------------- */ -/* - * This is all the wireless data specific to a device instance that - * is managed by the core of Wireless Extensions or the 802.11 layer. - * We only keep pointer to those structures, so that a driver is free - * to share them between instances. - * This structure should be initialised before registering the device. - * Access to this data follow the same rules as any other struct net_device - * data (i.e. valid as long as struct net_device exist, same locking rules). - */ -/* Forward declaration */ -struct libipw_device; -/* The struct */ -struct iw_public_data { - /* Legacy structure managed by the ipw2x00-specific IEEE 802.11 layer */ - struct libipw_device * libipw; -}; - /**************************** PROTOTYPES ****************************/ /* * Functions part of the Wireless Extensions (defined in net/wireless/wext-core.c). -- cgit v1.2.3 From aee809aaa2d13bf560fe38d28c4969605e6d9d0e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 7 Oct 2024 21:47:16 +0200 Subject: wifi: cfg80211: unexport wireless_nlevent_flush() This no longer needs to be exported, so don't export it. Link: https://patch.msgid.link/20241007214715.3dd736dc3ac0.I1388536e99c37f28a007dd753c473ad21513d9a9@changeid Signed-off-by: Johannes Berg --- include/net/iw_handler.h | 6 ------ net/wireless/wext-compat.h | 6 ++++++ net/wireless/wext-core.c | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h index fc44fcca1d5c..804587b7592b 100644 --- a/include/net/iw_handler.h +++ b/include/net/iw_handler.h @@ -413,12 +413,6 @@ struct iw_spy_data { /* Send a single event to user space */ void wireless_send_event(struct net_device *dev, unsigned int cmd, union iwreq_data *wrqu, const char *extra); -#ifdef CONFIG_WEXT_CORE -/* flush all previous wext events - if work is done from netdev notifiers */ -void wireless_nlevent_flush(void); -#else -static inline void wireless_nlevent_flush(void) {} -#endif /* We may need a function to send a stream of events to user space. * More on that later... */ diff --git a/net/wireless/wext-compat.h b/net/wireless/wext-compat.h index 8251ca5df8ae..f680dd134582 100644 --- a/net/wireless/wext-compat.h +++ b/net/wireless/wext-compat.h @@ -5,6 +5,12 @@ #include #include +#ifdef CONFIG_WEXT_CORE +void wireless_nlevent_flush(void); +#else +static inline void wireless_nlevent_flush(void) {} +#endif + int cfg80211_ibss_wext_siwfreq(struct net_device *dev, struct iw_request_info *info, struct iw_freq *wextfreq, char *extra); diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index 3bb04b05c5ce..00c640b3e86e 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -20,6 +20,7 @@ #include #include #include +#include "wext-compat.h" typedef int (*wext_ioctl_func)(struct net_device *, struct iwreq *, unsigned int, struct iw_request_info *, @@ -356,7 +357,6 @@ void wireless_nlevent_flush(void) } up_read(&net_rwsem); } -EXPORT_SYMBOL_GPL(wireless_nlevent_flush); static int wext_netdev_notifier_call(struct notifier_block *nb, unsigned long state, void *ptr) -- cgit v1.2.3 From 9e1a98aac11b757e2634304bbe6bed5f5a58e0ee Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 7 Oct 2024 21:50:25 +0200 Subject: wifi: wext: merge adjacent CONFIG_COMPAT ifdef blocks Simplify this, and also add a comment at the #endif. Link: https://patch.msgid.link/20241007215025.5ecdad1e02ed.I54efa895efc496e06ba41e1c39c9df9e23b0171f@changeid Signed-off-by: Johannes Berg --- include/linux/wireless.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/wireless.h b/include/linux/wireless.h index e6e34d74dda0..03e5d3fe226d 100644 --- a/include/linux/wireless.h +++ b/include/linux/wireless.h @@ -21,8 +21,7 @@ struct compat_iw_point { __u16 length; __u16 flags; }; -#endif -#ifdef CONFIG_COMPAT + struct __compat_iw_event { __u16 len; /* Real length of this stuff */ __u16 cmd; /* Wireless IOCTL */ @@ -49,5 +48,5 @@ struct __compat_iw_event { #define IW_EV_COMPAT_POINT_LEN \ (IW_EV_COMPAT_LCP_LEN + sizeof(struct compat_iw_point) - \ IW_EV_COMPAT_POINT_OFF) -#endif +#endif /* CONFIG_COMPAT */ #endif /* _LINUX_WIRELESS_H */ -- cgit v1.2.3 From ff919efb5fe8256fba132b5f2c58df3c38bdd0b3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 7 Oct 2024 22:00:03 +0200 Subject: wireless: wext: shorten struct iw_ioctl_description There's no need for "future" extensions in an internal struct, and we don't need a u32 for flags, use just a u8. Also remove the unused IW_DESCR_FLAG_WAIT flag. Link: https://patch.msgid.link/20241007220003.309bd52fa763.I9a1229fa7f2be53d4f50e63671ed441d0968bb41@changeid Signed-off-by: Johannes Berg --- include/net/iw_handler.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h index 804587b7592b..c9b46b996197 100644 --- a/include/net/iw_handler.h +++ b/include/net/iw_handler.h @@ -279,8 +279,6 @@ #define IW_DESCR_FLAG_RESTRICT 0x0004 /* GET : request is ROOT only */ /* SET : Omit payload from generated iwevent */ #define IW_DESCR_FLAG_NOMAX 0x0008 /* GET : no limit on request size */ -/* Driver level flags */ -#define IW_DESCR_FLAG_WAIT 0x0100 /* Wait for driver event */ /****************************** TYPES ******************************/ @@ -373,11 +371,10 @@ struct iw_handler_def { */ struct iw_ioctl_description { __u8 header_type; /* NULL, iw_point or other */ - __u8 token_type; /* Future */ + __u8 flags; /* Special handling of the request */ __u16 token_size; /* Granularity of payload */ __u16 min_tokens; /* Min acceptable token number */ __u16 max_tokens; /* Max acceptable token number */ - __u32 flags; /* Special handling of the request */ }; /* Need to think of short header translation table. Later. */ -- cgit v1.2.3 From da5e06dee58ad153a4933fd40fc53d571bfef373 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Sun, 6 Oct 2024 07:26:09 +0900 Subject: net-timestamp: namespacify the sysctl_tstamp_allow_data Let it be tuned in per netns by admins. Signed-off-by: Jason Xing Reviewed-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20241005222609.94980-1-kerneljasonxing@gmail.com Signed-off-by: Jakub Kicinski --- include/net/netns/core.h | 1 + include/net/sock.h | 2 -- net/core/net_namespace.c | 1 + net/core/skbuff.c | 2 +- net/core/sock.c | 2 -- net/core/sysctl_net_core.c | 18 +++++++++--------- 6 files changed, 12 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/net/netns/core.h b/include/net/netns/core.h index 78214f1b43a2..9b36f0ff0c20 100644 --- a/include/net/netns/core.h +++ b/include/net/netns/core.h @@ -15,6 +15,7 @@ struct netns_core { int sysctl_somaxconn; int sysctl_optmem_max; u8 sysctl_txrehash; + u8 sysctl_tstamp_allow_data; #ifdef CONFIG_PROC_FS struct prot_inuse __percpu *prot_inuse; diff --git a/include/net/sock.h b/include/net/sock.h index e282127092ab..b32f1424ecc5 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2824,8 +2824,6 @@ void sk_get_meminfo(const struct sock *sk, u32 *meminfo); extern __u32 sysctl_wmem_max; extern __u32 sysctl_rmem_max; -extern int sysctl_tstamp_allow_data; - extern __u32 sysctl_wmem_default; extern __u32 sysctl_rmem_default; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 105e3cd26763..a5bc1fd8b034 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -317,6 +317,7 @@ static __net_init void preinit_net_sysctl(struct net *net) */ net->core.sysctl_optmem_max = 128 * 1024; net->core.sysctl_txrehash = SOCK_TXREHASH_ENABLED; + net->core.sysctl_tstamp_allow_data = 1; } /* init code that must occur even if setup_net() is not called. */ diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 74149dc4ee31..00afeb90c23a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5506,7 +5506,7 @@ static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly) { bool ret; - if (likely(READ_ONCE(sysctl_tstamp_allow_data) || tsonly)) + if (likely(tsonly || READ_ONCE(sock_net(sk)->core.sysctl_tstamp_allow_data))) return true; read_lock_bh(&sk->sk_callback_lock); diff --git a/net/core/sock.c b/net/core/sock.c index 846f494a17cf..083d438d8b6f 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -286,8 +286,6 @@ EXPORT_SYMBOL(sysctl_rmem_max); __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX; __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; -int sysctl_tstamp_allow_data __read_mostly = 1; - DEFINE_STATIC_KEY_FALSE(memalloc_socks_key); EXPORT_SYMBOL_GPL(memalloc_socks_key); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 86a2476678c4..b60fac380cec 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -491,15 +491,6 @@ static struct ctl_table net_core_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { - .procname = "tstamp_allow_data", - .data = &sysctl_tstamp_allow_data, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE - }, #ifdef CONFIG_RPS { .procname = "rps_sock_flow_entries", @@ -665,6 +656,15 @@ static struct ctl_table netns_core_table[] = { .extra2 = SYSCTL_ONE, .proc_handler = proc_dou8vec_minmax, }, + { + .procname = "tstamp_allow_data", + .data = &init_net.core.sysctl_tstamp_allow_data, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE + }, /* sysctl_core_net_init() will set the values after this * to readonly in network namespaces */ -- cgit v1.2.3 From 3fe3dbaf26723c473d42a58b636a2500586b821e Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 7 Oct 2024 01:44:56 +0100 Subject: caif: Remove unused cfsrvl_getphyid cfsrvl_getphyid() has been unused since 2011's commit f36214408470 ("caif: Use RCU and lists in cfcnfg.c for managing caif link layers") Remove it. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Simon Horman Link: https://patch.msgid.link/20241007004456.149899-1-linux@treblig.org Signed-off-by: Jakub Kicinski --- include/net/caif/cfsrvl.h | 1 - net/caif/cfsrvl.c | 6 ------ 2 files changed, 7 deletions(-) (limited to 'include') diff --git a/include/net/caif/cfsrvl.h b/include/net/caif/cfsrvl.h index 5ee7b322e18b..a000dc45f966 100644 --- a/include/net/caif/cfsrvl.h +++ b/include/net/caif/cfsrvl.h @@ -40,7 +40,6 @@ void cfsrvl_init(struct cfsrvl *service, struct dev_info *dev_info, bool supports_flowctrl); bool cfsrvl_ready(struct cfsrvl *service, int *err); -u8 cfsrvl_getphyid(struct cflayer *layer); static inline void cfsrvl_get(struct cflayer *layr) { diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c index 9cef9496a707..171fa32ada85 100644 --- a/net/caif/cfsrvl.c +++ b/net/caif/cfsrvl.c @@ -183,12 +183,6 @@ bool cfsrvl_ready(struct cfsrvl *service, int *err) return true; } -u8 cfsrvl_getphyid(struct cflayer *layer) -{ - struct cfsrvl *servl = container_obj(layer); - return servl->dev_info.id; -} - bool cfsrvl_phyid_match(struct cflayer *layer, int phyid) { struct cfsrvl *servl = container_obj(layer); -- cgit v1.2.3 From 4a8840af5f53f2902eba91130fae650879f18e7a Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 8 Oct 2024 12:17:19 -0700 Subject: tracepoints: Use new static branch API The old static key API is deprecated. Switch to the new one. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Alice Ryhl Link: https://lore.kernel.org/7a08dae3c5eddb14b13864923c1b58ac1f4af83c.1728414936.git.jpoimboe@kernel.org Signed-off-by: Josh Poimboeuf Signed-off-by: Steven Rostedt (Google) --- include/linux/tracepoint-defs.h | 4 ++-- include/linux/tracepoint.h | 8 ++++---- kernel/trace/trace_events_hist.c | 2 +- kernel/trace/trace_events_user.c | 4 ++-- kernel/tracepoint.c | 4 ++-- 5 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h index 4dc4955f0fbf..60a6e8314d4c 100644 --- a/include/linux/tracepoint-defs.h +++ b/include/linux/tracepoint-defs.h @@ -31,7 +31,7 @@ struct tracepoint_func { struct tracepoint { const char *name; /* Tracepoint name */ - struct static_key key; + struct static_key_false key; struct static_call_key *static_call_key; void *static_call_tramp; void *iterator; @@ -83,7 +83,7 @@ struct bpf_raw_event_map { #ifdef CONFIG_TRACEPOINTS # define tracepoint_enabled(tp) \ - static_key_false(&(__tracepoint_##tp).key) + static_branch_unlikely(&(__tracepoint_##tp).key) #else # define tracepoint_enabled(tracepoint) false #endif diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 93a9f3070b48..2a29334bbc02 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -248,7 +248,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #define __DECLARE_TRACE_RCU(name, proto, args, cond) \ static inline void trace_##name##_rcuidle(proto) \ { \ - if (static_key_false(&__tracepoint_##name.key)) \ + if (static_branch_unlikely(&__tracepoint_##name.key)) \ __DO_TRACE(name, \ TP_ARGS(args), \ TP_CONDITION(cond), 1); \ @@ -274,7 +274,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) extern struct tracepoint __tracepoint_##name; \ static inline void trace_##name(proto) \ { \ - if (static_key_false(&__tracepoint_##name.key)) \ + if (static_branch_unlikely(&__tracepoint_##name.key)) \ __DO_TRACE(name, \ TP_ARGS(args), \ TP_CONDITION(cond), 0); \ @@ -311,7 +311,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) static inline bool \ trace_##name##_enabled(void) \ { \ - return static_key_false(&__tracepoint_##name.key); \ + return static_branch_unlikely(&__tracepoint_##name.key);\ } /* @@ -328,7 +328,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) struct tracepoint __tracepoint_##_name __used \ __section("__tracepoints") = { \ .name = __tpstrtab_##_name, \ - .key = STATIC_KEY_INIT_FALSE, \ + .key = STATIC_KEY_FALSE_INIT, \ .static_call_key = &STATIC_CALL_KEY(tp_func_##_name), \ .static_call_tramp = STATIC_CALL_TRAMP_ADDR(tp_func_##_name), \ .iterator = &__traceiter_##_name, \ diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 5f9119eb7c67..cc2924ad32a3 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -822,7 +822,7 @@ static inline void trace_synth(struct synth_event *event, u64 *var_ref_vals, { struct tracepoint *tp = event->tp; - if (unlikely(atomic_read(&tp->key.enabled) > 0)) { + if (unlikely(static_key_enabled(&tp->key))) { struct tracepoint_func *probe_func_ptr; synth_probe_func_t probe_func; void *__data; diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index 42b0d998d103..17bcad8f79de 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -1676,7 +1676,7 @@ static void update_enable_bit_for(struct user_event *user) struct tracepoint *tp = &user->tracepoint; char status = 0; - if (atomic_read(&tp->key.enabled) > 0) { + if (static_key_enabled(&tp->key)) { struct tracepoint_func *probe_func_ptr; user_event_func_t probe_func; @@ -2280,7 +2280,7 @@ static ssize_t user_events_write_core(struct file *file, struct iov_iter *i) * It's possible key.enabled disables after this check, however * we don't mind if a few events are included in this condition. */ - if (likely(atomic_read(&tp->key.enabled) > 0)) { + if (likely(static_key_enabled(&tp->key))) { struct tracepoint_func *probe_func_ptr; user_event_func_t probe_func; struct iov_iter copy; diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 8879da16ef4d..1e3de77ea6b3 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -358,7 +358,7 @@ static int tracepoint_add_func(struct tracepoint *tp, tracepoint_update_call(tp, tp_funcs); /* Both iterator and static call handle NULL tp->funcs */ rcu_assign_pointer(tp->funcs, tp_funcs); - static_key_enable(&tp->key); + static_branch_enable(&tp->key); break; case TP_FUNC_2: /* 1->2 */ /* Set iterator static call */ @@ -414,7 +414,7 @@ static int tracepoint_remove_func(struct tracepoint *tp, if (tp->unregfunc && static_key_enabled(&tp->key)) tp->unregfunc(); - static_key_disable(&tp->key); + static_branch_disable(&tp->key); /* Set iterator static call */ tracepoint_update_call(tp, tp_funcs); /* Both iterator and static call handle NULL tp->funcs */ -- cgit v1.2.3 From 48bcda6848232667f13b4e97588de488c83c37d4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 3 Oct 2024 18:16:29 -0400 Subject: tracing: Remove definition of trace_*_rcuidle() The trace_*_rcuidle() variant of a tracepoint was to handle places where a tracepoint was located but RCU was not "watching". All those locations have been removed, and RCU should be watching where all tracepoints are located. We can now remove the trace_*_rcuidle() variant. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Mark Rutland Cc: Joel Fernandes Link: https://lore.kernel.org/20241003181629.36209057@gandalf.local.home Acked-by: Peter Zijlstra (Intel) Signed-off-by: Steven Rostedt (Google) --- include/linux/tracepoint.h | 50 ++------------------------------------- include/trace/events/preemptirq.h | 8 ------- kernel/trace/trace_preemptirq.c | 26 +++++--------------- scripts/tags.sh | 2 -- 4 files changed, 8 insertions(+), 78 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 2a29334bbc02..7e4af7b3633c 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -196,67 +196,25 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #define __DO_TRACE_CALL(name, args) __traceiter_##name(NULL, args) #endif /* CONFIG_HAVE_STATIC_CALL */ -/* - * ARCH_WANTS_NO_INSTR archs are expected to have sanitized entry and idle - * code that disallow any/all tracing/instrumentation when RCU isn't watching. - */ -#ifdef CONFIG_ARCH_WANTS_NO_INSTR -#define RCUIDLE_COND(rcuidle) (rcuidle) -#else -/* srcu can't be used from NMI */ -#define RCUIDLE_COND(rcuidle) (rcuidle && in_nmi()) -#endif - /* * it_func[0] is never NULL because there is at least one element in the array * when the array itself is non NULL. */ -#define __DO_TRACE(name, args, cond, rcuidle) \ +#define __DO_TRACE(name, args, cond) \ do { \ int __maybe_unused __idx = 0; \ \ if (!(cond)) \ return; \ \ - if (WARN_ONCE(RCUIDLE_COND(rcuidle), \ - "Bad RCU usage for tracepoint")) \ - return; \ - \ /* keep srcu and sched-rcu usage consistent */ \ preempt_disable_notrace(); \ \ - /* \ - * For rcuidle callers, use srcu since sched-rcu \ - * doesn't work from the idle path. \ - */ \ - if (rcuidle) { \ - __idx = srcu_read_lock_notrace(&tracepoint_srcu);\ - ct_irq_enter_irqson(); \ - } \ - \ __DO_TRACE_CALL(name, TP_ARGS(args)); \ \ - if (rcuidle) { \ - ct_irq_exit_irqson(); \ - srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\ - } \ - \ preempt_enable_notrace(); \ } while (0) -#ifndef MODULE -#define __DECLARE_TRACE_RCU(name, proto, args, cond) \ - static inline void trace_##name##_rcuidle(proto) \ - { \ - if (static_branch_unlikely(&__tracepoint_##name.key)) \ - __DO_TRACE(name, \ - TP_ARGS(args), \ - TP_CONDITION(cond), 1); \ - } -#else -#define __DECLARE_TRACE_RCU(name, proto, args, cond) -#endif - /* * Make sure the alignment of the structure in the __tracepoints section will * not add unwanted padding between the beginning of the section and the @@ -277,14 +235,12 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) if (static_branch_unlikely(&__tracepoint_##name.key)) \ __DO_TRACE(name, \ TP_ARGS(args), \ - TP_CONDITION(cond), 0); \ + TP_CONDITION(cond)); \ if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \ WARN_ONCE(!rcu_is_watching(), \ "RCU not watching for tracepoint"); \ } \ } \ - __DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args), \ - PARAMS(cond)) \ static inline int \ register_trace_##name(void (*probe)(data_proto), void *data) \ { \ @@ -375,8 +331,6 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #define __DECLARE_TRACE(name, proto, args, cond, data_proto) \ static inline void trace_##name(proto) \ { } \ - static inline void trace_##name##_rcuidle(proto) \ - { } \ static inline int \ register_trace_##name(void (*probe)(data_proto), \ void *data) \ diff --git a/include/trace/events/preemptirq.h b/include/trace/events/preemptirq.h index 3f249e150c0c..f99562d2b496 100644 --- a/include/trace/events/preemptirq.h +++ b/include/trace/events/preemptirq.h @@ -43,8 +43,6 @@ DEFINE_EVENT(preemptirq_template, irq_enable, #else #define trace_irq_enable(...) #define trace_irq_disable(...) -#define trace_irq_enable_rcuidle(...) -#define trace_irq_disable_rcuidle(...) #endif #ifdef CONFIG_TRACE_PREEMPT_TOGGLE @@ -58,8 +56,6 @@ DEFINE_EVENT(preemptirq_template, preempt_enable, #else #define trace_preempt_enable(...) #define trace_preempt_disable(...) -#define trace_preempt_enable_rcuidle(...) -#define trace_preempt_disable_rcuidle(...) #endif #endif /* _TRACE_PREEMPTIRQ_H */ @@ -69,10 +65,6 @@ DEFINE_EVENT(preemptirq_template, preempt_enable, #else /* !CONFIG_PREEMPTIRQ_TRACEPOINTS */ #define trace_irq_enable(...) #define trace_irq_disable(...) -#define trace_irq_enable_rcuidle(...) -#define trace_irq_disable_rcuidle(...) #define trace_preempt_enable(...) #define trace_preempt_disable(...) -#define trace_preempt_enable_rcuidle(...) -#define trace_preempt_disable_rcuidle(...) #endif diff --git a/kernel/trace/trace_preemptirq.c b/kernel/trace/trace_preemptirq.c index e37446f7916e..5c03633316a6 100644 --- a/kernel/trace/trace_preemptirq.c +++ b/kernel/trace/trace_preemptirq.c @@ -15,20 +15,6 @@ #define CREATE_TRACE_POINTS #include -/* - * Use regular trace points on architectures that implement noinstr - * tooling: these calls will only happen with RCU enabled, which can - * use a regular tracepoint. - * - * On older architectures, use the rcuidle tracing methods (which - * aren't NMI-safe - so exclude NMI contexts): - */ -#ifdef CONFIG_ARCH_WANTS_NO_INSTR -#define trace(point) trace_##point -#else -#define trace(point) if (!in_nmi()) trace_##point##_rcuidle -#endif - #ifdef CONFIG_TRACE_IRQFLAGS /* Per-cpu variable to prevent redundant calls when IRQs already off */ static DEFINE_PER_CPU(int, tracing_irq_cpu); @@ -42,7 +28,7 @@ static DEFINE_PER_CPU(int, tracing_irq_cpu); void trace_hardirqs_on_prepare(void) { if (this_cpu_read(tracing_irq_cpu)) { - trace(irq_enable)(CALLER_ADDR0, CALLER_ADDR1); + trace_irq_enable(CALLER_ADDR0, CALLER_ADDR1); tracer_hardirqs_on(CALLER_ADDR0, CALLER_ADDR1); this_cpu_write(tracing_irq_cpu, 0); } @@ -53,7 +39,7 @@ NOKPROBE_SYMBOL(trace_hardirqs_on_prepare); void trace_hardirqs_on(void) { if (this_cpu_read(tracing_irq_cpu)) { - trace(irq_enable)(CALLER_ADDR0, CALLER_ADDR1); + trace_irq_enable(CALLER_ADDR0, CALLER_ADDR1); tracer_hardirqs_on(CALLER_ADDR0, CALLER_ADDR1); this_cpu_write(tracing_irq_cpu, 0); } @@ -75,7 +61,7 @@ void trace_hardirqs_off_finish(void) if (!this_cpu_read(tracing_irq_cpu)) { this_cpu_write(tracing_irq_cpu, 1); tracer_hardirqs_off(CALLER_ADDR0, CALLER_ADDR1); - trace(irq_disable)(CALLER_ADDR0, CALLER_ADDR1); + trace_irq_disable(CALLER_ADDR0, CALLER_ADDR1); } } @@ -89,7 +75,7 @@ void trace_hardirqs_off(void) if (!this_cpu_read(tracing_irq_cpu)) { this_cpu_write(tracing_irq_cpu, 1); tracer_hardirqs_off(CALLER_ADDR0, CALLER_ADDR1); - trace(irq_disable)(CALLER_ADDR0, CALLER_ADDR1); + trace_irq_disable(CALLER_ADDR0, CALLER_ADDR1); } } EXPORT_SYMBOL(trace_hardirqs_off); @@ -100,13 +86,13 @@ NOKPROBE_SYMBOL(trace_hardirqs_off); void trace_preempt_on(unsigned long a0, unsigned long a1) { - trace(preempt_enable)(a0, a1); + trace_preempt_enable(a0, a1); tracer_preempt_on(a0, a1); } void trace_preempt_off(unsigned long a0, unsigned long a1) { - trace(preempt_disable)(a0, a1); + trace_preempt_disable(a0, a1); tracer_preempt_off(a0, a1); } #endif diff --git a/scripts/tags.sh b/scripts/tags.sh index 191e0461d6d5..0d01c1cafb70 100755 --- a/scripts/tags.sh +++ b/scripts/tags.sh @@ -152,9 +152,7 @@ regex_c=( '/^BPF_CALL_[0-9]([[:space:]]*\([[:alnum:]_]*\).*/\1/' '/^COMPAT_SYSCALL_DEFINE[0-9]([[:space:]]*\([[:alnum:]_]*\).*/compat_sys_\1/' '/^TRACE_EVENT([[:space:]]*\([[:alnum:]_]*\).*/trace_\1/' - '/^TRACE_EVENT([[:space:]]*\([[:alnum:]_]*\).*/trace_\1_rcuidle/' '/^DEFINE_EVENT([^,)]*,[[:space:]]*\([[:alnum:]_]*\).*/trace_\1/' - '/^DEFINE_EVENT([^,)]*,[[:space:]]*\([[:alnum:]_]*\).*/trace_\1_rcuidle/' '/^DEFINE_INSN_CACHE_OPS([[:space:]]*\([[:alnum:]_]*\).*/get_\1_slot/' '/^DEFINE_INSN_CACHE_OPS([[:space:]]*\([[:alnum:]_]*\).*/free_\1_slot/' '/^PAGEFLAG([[:space:]]*\([[:alnum:]_]*\).*/Page\1/' -- cgit v1.2.3 From e53244e2c8931f9e80c1841293aea86ef8ad32a3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 3 Oct 2024 18:42:20 -0400 Subject: tracepoint: Remove SRCU protection With the removal of the trace_*_rcuidle() tracepoints, there is no reason to protect tracepoints with SRCU. The reason the SRCU protection was added, was because it can protect tracepoints when RCU is not "watching". Now that tracepoints are only used when RCU is watching, remove the SRCU protection. It just made things more complex and confusing anyway. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Joel Fernandes Link: https://lore.kernel.org/20241003184220.0dc21d35@gandalf.local.home Acked-by: Peter Zijlstra (Intel) Signed-off-by: Steven Rostedt (Google) --- include/linux/tracepoint.h | 4 ---- kernel/tracepoint.c | 51 +--------------------------------------------- 2 files changed, 1 insertion(+), 54 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 7e4af7b3633c..3d33b9872cec 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -32,8 +32,6 @@ struct trace_eval_map { #define TRACEPOINT_DEFAULT_PRIO 10 -extern struct srcu_struct tracepoint_srcu; - extern int tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data); extern int @@ -109,7 +107,6 @@ void for_each_tracepoint_in_module(struct module *mod, #ifdef CONFIG_TRACEPOINTS static inline void tracepoint_synchronize_unregister(void) { - synchronize_srcu(&tracepoint_srcu); synchronize_rcu(); } #else @@ -207,7 +204,6 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) if (!(cond)) \ return; \ \ - /* keep srcu and sched-rcu usage consistent */ \ preempt_disable_notrace(); \ \ __DO_TRACE_CALL(name, TP_ARGS(args)); \ diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 1e3de77ea6b3..6474e2cf22c9 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -25,9 +25,6 @@ enum tp_func_state { extern tracepoint_ptr_t __start___tracepoints_ptrs[]; extern tracepoint_ptr_t __stop___tracepoints_ptrs[]; -DEFINE_SRCU(tracepoint_srcu); -EXPORT_SYMBOL_GPL(tracepoint_srcu); - enum tp_transition_sync { TP_TRANSITION_SYNC_1_0_1, TP_TRANSITION_SYNC_N_2_1, @@ -37,7 +34,6 @@ enum tp_transition_sync { struct tp_transition_snapshot { unsigned long rcu; - unsigned long srcu; bool ongoing; }; @@ -50,7 +46,6 @@ static void tp_rcu_get_state(enum tp_transition_sync sync) /* Keep the latest get_state snapshot. */ snapshot->rcu = get_state_synchronize_rcu(); - snapshot->srcu = start_poll_synchronize_srcu(&tracepoint_srcu); snapshot->ongoing = true; } @@ -61,8 +56,6 @@ static void tp_rcu_cond_sync(enum tp_transition_sync sync) if (!snapshot->ongoing) return; cond_synchronize_rcu(snapshot->rcu); - if (!poll_state_synchronize_srcu(&tracepoint_srcu, snapshot->srcu)) - synchronize_srcu(&tracepoint_srcu); snapshot->ongoing = false; } @@ -85,9 +78,6 @@ static LIST_HEAD(tracepoint_module_list); */ static DEFINE_MUTEX(tracepoints_mutex); -static struct rcu_head *early_probes; -static bool ok_to_free_tracepoints; - /* * Note about RCU : * It is used to delay the free of multiple probes array until a quiescent @@ -111,56 +101,17 @@ static inline void *allocate_probes(int count) return p == NULL ? NULL : p->probes; } -static void srcu_free_old_probes(struct rcu_head *head) -{ - kfree(container_of(head, struct tp_probes, rcu)); -} - static void rcu_free_old_probes(struct rcu_head *head) { - call_srcu(&tracepoint_srcu, head, srcu_free_old_probes); -} - -static __init int release_early_probes(void) -{ - struct rcu_head *tmp; - - ok_to_free_tracepoints = true; - - while (early_probes) { - tmp = early_probes; - early_probes = tmp->next; - call_rcu(tmp, rcu_free_old_probes); - } - - return 0; + kfree(container_of(head, struct tp_probes, rcu)); } -/* SRCU is initialized at core_initcall */ -postcore_initcall(release_early_probes); - static inline void release_probes(struct tracepoint_func *old) { if (old) { struct tp_probes *tp_probes = container_of(old, struct tp_probes, probes[0]); - /* - * We can't free probes if SRCU is not initialized yet. - * Postpone the freeing till after SRCU is initialized. - */ - if (unlikely(!ok_to_free_tracepoints)) { - tp_probes->rcu.next = early_probes; - early_probes = &tp_probes->rcu; - return; - } - - /* - * Tracepoint probes are protected by both sched RCU and SRCU, - * by calling the SRCU callback in the sched RCU callback we - * cover both cases. So let us chain the SRCU and sched RCU - * callbacks to wait for both grace periods. - */ call_rcu(&tp_probes->rcu, rcu_free_old_probes); } } -- cgit v1.2.3 From db03488897a70367aeafe82d07a78943d2a6068e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 9 Oct 2024 08:33:05 +0200 Subject: Revert "wifi: cfg80211: unexport wireless_nlevent_flush()" Revert this, I neglected to take into account the fact that cfg80211 itself can be a module, but wext is always builtin. Fixes: aee809aaa2d1 ("wifi: cfg80211: unexport wireless_nlevent_flush()") Signed-off-by: Johannes Berg --- include/net/iw_handler.h | 6 ++++++ net/wireless/wext-compat.h | 6 ------ net/wireless/wext-core.c | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h index c9b46b996197..b80e474cb0aa 100644 --- a/include/net/iw_handler.h +++ b/include/net/iw_handler.h @@ -410,6 +410,12 @@ struct iw_spy_data { /* Send a single event to user space */ void wireless_send_event(struct net_device *dev, unsigned int cmd, union iwreq_data *wrqu, const char *extra); +#ifdef CONFIG_WEXT_CORE +/* flush all previous wext events - if work is done from netdev notifiers */ +void wireless_nlevent_flush(void); +#else +static inline void wireless_nlevent_flush(void) {} +#endif /* We may need a function to send a stream of events to user space. * More on that later... */ diff --git a/net/wireless/wext-compat.h b/net/wireless/wext-compat.h index f680dd134582..8251ca5df8ae 100644 --- a/net/wireless/wext-compat.h +++ b/net/wireless/wext-compat.h @@ -5,12 +5,6 @@ #include #include -#ifdef CONFIG_WEXT_CORE -void wireless_nlevent_flush(void); -#else -static inline void wireless_nlevent_flush(void) {} -#endif - int cfg80211_ibss_wext_siwfreq(struct net_device *dev, struct iw_request_info *info, struct iw_freq *wextfreq, char *extra); diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index 00c640b3e86e..3bb04b05c5ce 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -20,7 +20,6 @@ #include #include #include -#include "wext-compat.h" typedef int (*wext_ioctl_func)(struct net_device *, struct iwreq *, unsigned int, struct iw_request_info *, @@ -357,6 +356,7 @@ void wireless_nlevent_flush(void) } up_read(&net_rwsem); } +EXPORT_SYMBOL_GPL(wireless_nlevent_flush); static int wext_netdev_notifier_call(struct notifier_block *nb, unsigned long state, void *ptr) -- cgit v1.2.3 From f7a870d0be12e3ae38cbe899d858994c5b51f22b Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 9 Oct 2024 17:15:35 +0900 Subject: ata: libata: Remove unused macro definitions ATA_TMOUT_BOOT and ATA_TMOUT_BOOT_QUICK are not used anywhere. Delete these definitions. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20241009081535.376994-1-dlemoal@kernel.org Signed-off-by: Niklas Cassel --- include/linux/libata.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 9b4a6ff03235..c1a85d46eba6 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -270,9 +270,7 @@ enum { /* bits 24:31 of host->flags are reserved for LLD specific flags */ - /* various lengths of time */ - ATA_TMOUT_BOOT = 30000, /* heuristic */ - ATA_TMOUT_BOOT_QUICK = 7000, /* heuristic */ + /* Various lengths of time */ ATA_TMOUT_INTERNAL_QUICK = 5000, ATA_TMOUT_MAX_PARK = 30000, -- cgit v1.2.3 From d12586e1072d92070783c854819a0ca8c82c5439 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Sat, 5 Oct 2024 23:38:25 +0200 Subject: platform/x86: wmi: Implement proper shutdown handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When performing a system shutdown under Windows, all WMI clients are terminated. This means that the ACPI BIOS might expect all WMI devices to be disabled when shutting down. Emulate this behaviour by disabling all active WMI devices during shutdown. Also introduce a new WMI driver callback to allow WMI drivers to perform any device-specific actions before disabling the WMI device. Tested on a Dell Inspiron 3505. Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20241005213825.701887-2-W_Armin@gmx.de Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- Documentation/wmi/driver-development-guide.rst | 7 ++++++- drivers/platform/x86/wmi.c | 27 ++++++++++++++++++++++++++ include/linux/wmi.h | 2 ++ 3 files changed, 35 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/Documentation/wmi/driver-development-guide.rst b/Documentation/wmi/driver-development-guide.rst index 429137b2f632..676873c98680 100644 --- a/Documentation/wmi/driver-development-guide.rst +++ b/Documentation/wmi/driver-development-guide.rst @@ -64,6 +64,7 @@ to matching WMI devices using a struct wmi_device_id table: .id_table = foo_id_table, .probe = foo_probe, .remove = foo_remove, /* optional, devres is preferred */ + .shutdown = foo_shutdown, /* optional, called during shutdown */ .notify = foo_notify, /* optional, for event handling */ .no_notify_data = true, /* optional, enables events containing no additional data */ .no_singleton = true, /* required for new WMI drivers */ @@ -79,6 +80,10 @@ to unregister interfaces to other kernel subsystems and release resources, devre This simplifies error handling during probe and often allows to omit this callback entirely, see Documentation/driver-api/driver-model/devres.rst for details. +The shutdown() callback is called during shutdown, reboot or kexec. Its sole purpose is to disable +the WMI device and put it in a well-known state for the WMI driver to pick up later after reboot +or kexec. Most WMI drivers need no special shutdown handling and can thus omit this callback. + Please note that new WMI drivers are required to be able to be instantiated multiple times, and are forbidden from using any deprecated GUID-based WMI functions. This means that the WMI driver should be prepared for the scenario that multiple matching WMI devices are present @@ -123,7 +128,7 @@ ACPI object is being done by the WMI subsystem, not the driver. The WMI driver core will take care that the notify() callback will only be called after the probe() callback has been called, and that no events are being received by the driver -right before and after calling its remove() callback. +right before and after calling its remove() or shutdown() callback. However WMI driver developers should be aware that multiple WMI events can be received concurrently, so any locking (if necessary) needs to be provided by the WMI driver itself. diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index 3cbe180c3fc0..a01223c23d10 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -884,6 +884,32 @@ static void wmi_dev_remove(struct device *dev) dev_warn(dev, "failed to disable device\n"); } +static void wmi_dev_shutdown(struct device *dev) +{ + struct wmi_driver *wdriver; + struct wmi_block *wblock; + + if (dev->driver) { + wdriver = drv_to_wdrv(dev->driver); + wblock = dev_to_wblock(dev); + + /* + * Some machines return bogus WMI event data when disabling + * the WMI event. Because of this we must prevent the associated + * WMI driver from receiving new WMI events before disabling it. + */ + down_write(&wblock->notify_lock); + wblock->driver_ready = false; + up_write(&wblock->notify_lock); + + if (wdriver->shutdown) + wdriver->shutdown(to_wmi_device(dev)); + + if (ACPI_FAILURE(wmi_method_enable(wblock, false))) + dev_warn(dev, "Failed to disable device\n"); + } +} + static struct class wmi_bus_class = { .name = "wmi_bus", }; @@ -895,6 +921,7 @@ static const struct bus_type wmi_bus_type = { .uevent = wmi_dev_uevent, .probe = wmi_dev_probe, .remove = wmi_dev_remove, + .shutdown = wmi_dev_shutdown, }; static const struct device_type wmi_type_event = { diff --git a/include/linux/wmi.h b/include/linux/wmi.h index 3275470b5531..120019677fc6 100644 --- a/include/linux/wmi.h +++ b/include/linux/wmi.h @@ -56,6 +56,7 @@ u8 wmidev_instance_count(struct wmi_device *wdev); * @no_singleton: Driver can be instantiated multiple times * @probe: Callback for device binding * @remove: Callback for device unbinding + * @shutdown: Callback for device shutdown * @notify: Callback for receiving WMI events * * This represents WMI drivers which handle WMI devices. @@ -68,6 +69,7 @@ struct wmi_driver { int (*probe)(struct wmi_device *wdev, const void *context); void (*remove)(struct wmi_device *wdev); + void (*shutdown)(struct wmi_device *wdev); void (*notify)(struct wmi_device *device, union acpi_object *data); }; -- cgit v1.2.3 From f042365a26b07006064c2cfa2293e16cad243b0d Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 4 Oct 2024 11:20:56 +0100 Subject: net: pcs: xpcs: provide a helper to get the phylink pcs given xpcs Provide a helper to provide the pointer to the phylink_pcs struct given a valid xpcs pointer. This will be necessary when we make struct dw_xpcs private to pcs-xpcs.c Signed-off-by: Russell King (Oracle) Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 2 +- drivers/net/pcs/pcs-xpcs.c | 6 ++++++ include/linux/pcs/pcs-xpcs.h | 1 + 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index 83ad7c7935e3..48acba5eb178 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -451,7 +451,7 @@ static struct phylink_pcs *intel_mgbe_select_pcs(struct stmmac_priv *priv, * should always be an XPCS. The original code would always * return this if present. */ - return &priv->hw->xpcs->pcs; + return xpcs_to_phylink_pcs(priv->hw->xpcs); } static int intel_mgbe_common_data(struct pci_dev *pdev, diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index 8bde87ab971f..a7f6d56183a7 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -132,6 +132,12 @@ xpcs_find_compat(struct dw_xpcs *xpcs, phy_interface_t interface) return NULL; } +struct phylink_pcs *xpcs_to_phylink_pcs(struct dw_xpcs *xpcs) +{ + return &xpcs->pcs; +} +EXPORT_SYMBOL_GPL(xpcs_to_phylink_pcs); + int xpcs_get_an_mode(struct dw_xpcs *xpcs, phy_interface_t interface) { const struct dw_xpcs_compat *compat; diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index abda475111d1..868515f3cc88 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -64,6 +64,7 @@ struct dw_xpcs { bool need_reset; }; +struct phylink_pcs *xpcs_to_phylink_pcs(struct dw_xpcs *xpcs); int xpcs_get_an_mode(struct dw_xpcs *xpcs, phy_interface_t interface); void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces); int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns, -- cgit v1.2.3 From accd5f5cd2e1b0ed6805a7c24765648d213561e5 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 4 Oct 2024 11:21:01 +0100 Subject: net: pcs: xpcs: move definition of struct dw_xpcs to private header There should be no reason for anything outside the XPCS code to know the contents of struct dw_xpcs - this is a private structure to XPCS. Move the definition to the private pcs-xpcs.h header, leaving a declaration in the global pcs/pcs-xpcs.h Signed-off-by: Russell King (Oracle) Signed-off-by: David S. Miller --- drivers/net/pcs/pcs-xpcs.h | 18 ++++++++++++++++++ include/linux/pcs/pcs-xpcs.h | 18 +----------------- 2 files changed, 19 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/drivers/net/pcs/pcs-xpcs.h b/drivers/net/pcs/pcs-xpcs.h index fa05adfae220..1b546eae8280 100644 --- a/drivers/net/pcs/pcs-xpcs.h +++ b/drivers/net/pcs/pcs-xpcs.h @@ -123,6 +123,24 @@ #define DW_XPCS_INFO_DECLARE(_name, _pcs, _pma) \ static const struct dw_xpcs_info _name = { .pcs = _pcs, .pma = _pma } +struct dw_xpcs_desc; + +enum dw_xpcs_clock { + DW_XPCS_CORE_CLK, + DW_XPCS_PAD_CLK, + DW_XPCS_NUM_CLKS, +}; + +struct dw_xpcs { + struct dw_xpcs_info info; + const struct dw_xpcs_desc *desc; + struct mdio_device *mdiodev; + struct clk_bulk_data clks[DW_XPCS_NUM_CLKS]; + struct phylink_pcs pcs; + phy_interface_t interface; + bool need_reset; +}; + int xpcs_read(struct dw_xpcs *xpcs, int dev, u32 reg); int xpcs_write(struct dw_xpcs *xpcs, int dev, u32 reg, u16 val); int xpcs_read_vpcs(struct dw_xpcs *xpcs, int reg); diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index 868515f3cc88..b5b5d17998b8 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -21,8 +21,6 @@ #define DW_AN_C37_1000BASEX 4 #define DW_10GBASER 5 -struct dw_xpcs_desc; - enum dw_xpcs_pcs_id { DW_XPCS_ID_NATIVE = 0, NXP_SJA1105_XPCS_ID = 0x00000010, @@ -48,21 +46,7 @@ struct dw_xpcs_info { u32 pma; }; -enum dw_xpcs_clock { - DW_XPCS_CORE_CLK, - DW_XPCS_PAD_CLK, - DW_XPCS_NUM_CLKS, -}; - -struct dw_xpcs { - struct dw_xpcs_info info; - const struct dw_xpcs_desc *desc; - struct mdio_device *mdiodev; - struct clk_bulk_data clks[DW_XPCS_NUM_CLKS]; - struct phylink_pcs pcs; - phy_interface_t interface; - bool need_reset; -}; +struct dw_xpcs; struct phylink_pcs *xpcs_to_phylink_pcs(struct dw_xpcs *xpcs); int xpcs_get_an_mode(struct dw_xpcs *xpcs, phy_interface_t interface); -- cgit v1.2.3 From 20503272422693d793b84f88bf23fe4e955d3a33 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 6 Oct 2024 08:17:58 +0100 Subject: ptp: Add support for the AMZNC10C 'vmclock' device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The vmclock device addresses the problem of live migration with precision clocks. The tolerances of a hardware counter (e.g. TSC) are typically around ±50PPM. A guest will use NTP/PTP/PPS to discipline that counter against an external source of 'real' time, and track the precise frequency of the counter as it changes with environmental conditions. When a guest is live migrated, anything it knows about the frequency of the underlying counter becomes invalid. It may move from a host where the counter running at -50PPM of its nominal frequency, to a host where it runs at +50PPM. There will also be a step change in the value of the counter, as the correctness of its absolute value at migration is limited by the accuracy of the source and destination host's time synchronization. In its simplest form, the device merely advertises a 'disruption_marker' which indicates that the guest should throw away any NTP synchronization it thinks it has, and start again. Because the shared memory region can be exposed all the way to userspace through the /dev/vmclock0 node, applications can still use time from a fast vDSO 'system call', and check the disruption marker to be sure that their timestamp is indeed truthful. The structure also allows for the precise time, as known by the host, to be exposed directly to guests so that they don't have to wait for NTP to resync from scratch. The PTP driver consumes this information if present. Like the KVM PTP clock, this PTP driver can convert TSC-based cross timestamps into KVM clock values. Unlike the KVM PTP clock, it does so only when such is actually helpful. The values and fields are based on the nascent virtio-rtc specification, and the intent is that a version (hopefully precisely this version) of this structure will be included as an optional part of that spec. In the meantime, this driver supports the simple ACPI form of the device which is being shipped in certain commercial hypervisors (and submitted for inclusion in QEMU). Signed-off-by: David Woodhouse Acked-by: Richard Cochran Signed-off-by: David S. Miller --- MAINTAINERS | 7 + drivers/ptp/Kconfig | 13 + drivers/ptp/Makefile | 1 + drivers/ptp/ptp_vmclock.c | 615 +++++++++++++++++++++++++++++++++++++++ include/uapi/linux/vmclock-abi.h | 182 ++++++++++++ 5 files changed, 818 insertions(+) create mode 100644 drivers/ptp/ptp_vmclock.c create mode 100644 include/uapi/linux/vmclock-abi.h (limited to 'include') diff --git a/MAINTAINERS b/MAINTAINERS index af635dc60cfe..1389704c7d8d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18683,6 +18683,13 @@ S: Maintained F: drivers/ptp/ptp_vclock.c F: net/ethtool/phc_vclocks.c +PTP VMCLOCK SUPPORT +M: David Woodhouse +L: netdev@vger.kernel.org +S: Maintained +F: drivers/ptp/ptp_vmclock.c +F: include/uapi/linux/vmclock-abi.h + PTRACE SUPPORT M: Oleg Nesterov S: Maintained diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig index 604541dcb320..e98c9767e0ef 100644 --- a/drivers/ptp/Kconfig +++ b/drivers/ptp/Kconfig @@ -131,6 +131,19 @@ config PTP_1588_CLOCK_KVM To compile this driver as a module, choose M here: the module will be called ptp_kvm. +config PTP_1588_CLOCK_VMCLOCK + tristate "Virtual machine PTP clock" + depends on X86_TSC || ARM_ARCH_TIMER + depends on PTP_1588_CLOCK && ACPI && ARCH_SUPPORTS_INT128 + default y + help + This driver adds support for using a virtual precision clock + advertised by the hypervisor. This clock is only useful in virtual + machines where such a device is present. + + To compile this driver as a module, choose M here: the module + will be called ptp_vmclock. + config PTP_1588_CLOCK_IDT82P33 tristate "IDT 82P33xxx PTP clock" depends on PTP_1588_CLOCK && I2C diff --git a/drivers/ptp/Makefile b/drivers/ptp/Makefile index 68bf02078053..01b5cd91eb61 100644 --- a/drivers/ptp/Makefile +++ b/drivers/ptp/Makefile @@ -11,6 +11,7 @@ obj-$(CONFIG_PTP_1588_CLOCK_DTE) += ptp_dte.o obj-$(CONFIG_PTP_1588_CLOCK_INES) += ptp_ines.o obj-$(CONFIG_PTP_1588_CLOCK_PCH) += ptp_pch.o obj-$(CONFIG_PTP_1588_CLOCK_KVM) += ptp_kvm.o +obj-$(CONFIG_PTP_1588_CLOCK_VMCLOCK) += ptp_vmclock.o obj-$(CONFIG_PTP_1588_CLOCK_QORIQ) += ptp-qoriq.o ptp-qoriq-y += ptp_qoriq.o ptp-qoriq-$(CONFIG_DEBUG_FS) += ptp_qoriq_debugfs.o diff --git a/drivers/ptp/ptp_vmclock.c b/drivers/ptp/ptp_vmclock.c new file mode 100644 index 000000000000..cdca8a3ad1aa --- /dev/null +++ b/drivers/ptp/ptp_vmclock.c @@ -0,0 +1,615 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Virtual PTP 1588 clock for use with LM-safe VMclock device. + * + * Copyright © 2024 Amazon.com, Inc. or its affiliates. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#ifdef CONFIG_X86 +#include +#include +#endif + +#ifdef CONFIG_KVM_GUEST +#define SUPPORT_KVMCLOCK +#endif + +static DEFINE_IDA(vmclock_ida); + +ACPI_MODULE_NAME("vmclock"); + +struct vmclock_state { + struct resource res; + struct vmclock_abi *clk; + struct miscdevice miscdev; + struct ptp_clock_info ptp_clock_info; + struct ptp_clock *ptp_clock; + enum clocksource_ids cs_id, sys_cs_id; + int index; + char *name; +}; + +#define VMCLOCK_MAX_WAIT ms_to_ktime(100) + +/* Require at least the flags field to be present. All else can be optional. */ +#define VMCLOCK_MIN_SIZE offsetof(struct vmclock_abi, pad) + +#define VMCLOCK_FIELD_PRESENT(_c, _f) \ + (le32_to_cpu((_c)->size) >= (offsetof(struct vmclock_abi, _f) + \ + sizeof((_c)->_f))) + +/* + * Multiply a 64-bit count by a 64-bit tick 'period' in units of seconds >> 64 + * and add the fractional second part of the reference time. + * + * The result is a 128-bit value, the top 64 bits of which are seconds, and + * the low 64 bits are (seconds >> 64). + */ +static uint64_t mul_u64_u64_shr_add_u64(uint64_t *res_hi, uint64_t delta, + uint64_t period, uint8_t shift, + uint64_t frac_sec) +{ + unsigned __int128 res = (unsigned __int128)delta * period; + + res >>= shift; + res += frac_sec; + *res_hi = res >> 64; + return (uint64_t)res; +} + +static bool tai_adjust(struct vmclock_abi *clk, uint64_t *sec) +{ + if (likely(clk->time_type == VMCLOCK_TIME_UTC)) + return true; + + if (clk->time_type == VMCLOCK_TIME_TAI && + (le64_to_cpu(clk->flags) & VMCLOCK_FLAG_TAI_OFFSET_VALID)) { + if (sec) + *sec += (int16_t)le16_to_cpu(clk->tai_offset_sec); + return true; + } + return false; +} + +static int vmclock_get_crosststamp(struct vmclock_state *st, + struct ptp_system_timestamp *sts, + struct system_counterval_t *system_counter, + struct timespec64 *tspec) +{ + ktime_t deadline = ktime_add(ktime_get(), VMCLOCK_MAX_WAIT); + struct system_time_snapshot systime_snapshot; + uint64_t cycle, delta, seq, frac_sec; + +#ifdef CONFIG_X86 + /* + * We'd expect the hypervisor to know this and to report the clock + * status as VMCLOCK_STATUS_UNRELIABLE. But be paranoid. + */ + if (check_tsc_unstable()) + return -EINVAL; +#endif + + while (1) { + seq = le32_to_cpu(st->clk->seq_count) & ~1ULL; + + /* + * This pairs with a write barrier in the hypervisor + * which populates this structure. + */ + virt_rmb(); + + if (st->clk->clock_status == VMCLOCK_STATUS_UNRELIABLE) + return -EINVAL; + + /* + * When invoked for gettimex64(), fill in the pre/post system + * times. The simple case is when system time is based on the + * same counter as st->cs_id, in which case all three times + * will be derived from the *same* counter value. + * + * If the system isn't using the same counter, then the value + * from ktime_get_snapshot() will still be used as pre_ts, and + * ptp_read_system_postts() is called to populate postts after + * calling get_cycles(). + * + * The conversion to timespec64 happens further down, outside + * the seq_count loop. + */ + if (sts) { + ktime_get_snapshot(&systime_snapshot); + if (systime_snapshot.cs_id == st->cs_id) { + cycle = systime_snapshot.cycles; + } else { + cycle = get_cycles(); + ptp_read_system_postts(sts); + } + } else { + cycle = get_cycles(); + } + + delta = cycle - le64_to_cpu(st->clk->counter_value); + + frac_sec = mul_u64_u64_shr_add_u64(&tspec->tv_sec, delta, + le64_to_cpu(st->clk->counter_period_frac_sec), + st->clk->counter_period_shift, + le64_to_cpu(st->clk->time_frac_sec)); + tspec->tv_nsec = mul_u64_u64_shr(frac_sec, NSEC_PER_SEC, 64); + tspec->tv_sec += le64_to_cpu(st->clk->time_sec); + + if (!tai_adjust(st->clk, &tspec->tv_sec)) + return -EINVAL; + + /* + * This pairs with a write barrier in the hypervisor + * which populates this structure. + */ + virt_rmb(); + if (seq == le32_to_cpu(st->clk->seq_count)) + break; + + if (ktime_after(ktime_get(), deadline)) + return -ETIMEDOUT; + } + + if (system_counter) { + system_counter->cycles = cycle; + system_counter->cs_id = st->cs_id; + } + + if (sts) { + sts->pre_ts = ktime_to_timespec64(systime_snapshot.real); + if (systime_snapshot.cs_id == st->cs_id) + sts->post_ts = sts->pre_ts; + } + + return 0; +} + +#ifdef SUPPORT_KVMCLOCK +/* + * In the case where the system is using the KVM clock for timekeeping, convert + * the TSC value into a KVM clock time in order to return a paired reading that + * get_device_system_crosststamp() can cope with. + */ +static int vmclock_get_crosststamp_kvmclock(struct vmclock_state *st, + struct ptp_system_timestamp *sts, + struct system_counterval_t *system_counter, + struct timespec64 *tspec) +{ + struct pvclock_vcpu_time_info *pvti = this_cpu_pvti(); + unsigned int pvti_ver; + int ret; + + preempt_disable_notrace(); + + do { + pvti_ver = pvclock_read_begin(pvti); + + ret = vmclock_get_crosststamp(st, sts, system_counter, tspec); + if (ret) + break; + + system_counter->cycles = __pvclock_read_cycles(pvti, + system_counter->cycles); + system_counter->cs_id = CSID_X86_KVM_CLK; + + /* + * This retry should never really happen; if the TSC is + * stable and reliable enough across vCPUS that it is sane + * for the hypervisor to expose a VMCLOCK device which uses + * it as the reference counter, then the KVM clock sohuld be + * in 'master clock mode' and basically never changed. But + * the KVM clock is a fickle and often broken thing, so do + * it "properly" just in case. + */ + } while (pvclock_read_retry(pvti, pvti_ver)); + + preempt_enable_notrace(); + + return ret; +} +#endif + +static int ptp_vmclock_get_time_fn(ktime_t *device_time, + struct system_counterval_t *system_counter, + void *ctx) +{ + struct vmclock_state *st = ctx; + struct timespec64 tspec; + int ret; + +#ifdef SUPPORT_KVMCLOCK + if (READ_ONCE(st->sys_cs_id) == CSID_X86_KVM_CLK) + ret = vmclock_get_crosststamp_kvmclock(st, NULL, system_counter, + &tspec); + else +#endif + ret = vmclock_get_crosststamp(st, NULL, system_counter, &tspec); + + if (!ret) + *device_time = timespec64_to_ktime(tspec); + + return ret; +} + +static int ptp_vmclock_getcrosststamp(struct ptp_clock_info *ptp, + struct system_device_crosststamp *xtstamp) +{ + struct vmclock_state *st = container_of(ptp, struct vmclock_state, + ptp_clock_info); + int ret = get_device_system_crosststamp(ptp_vmclock_get_time_fn, st, + NULL, xtstamp); +#ifdef SUPPORT_KVMCLOCK + /* + * On x86, the KVM clock may be used for the system time. We can + * actually convert a TSC reading to that, and return a paired + * timestamp that get_device_system_crosststamp() *can* handle. + */ + if (ret == -ENODEV) { + struct system_time_snapshot systime_snapshot; + + ktime_get_snapshot(&systime_snapshot); + + if (systime_snapshot.cs_id == CSID_X86_TSC || + systime_snapshot.cs_id == CSID_X86_KVM_CLK) { + WRITE_ONCE(st->sys_cs_id, systime_snapshot.cs_id); + ret = get_device_system_crosststamp(ptp_vmclock_get_time_fn, + st, NULL, xtstamp); + } + } +#endif + return ret; +} + +/* + * PTP clock operations + */ + +static int ptp_vmclock_adjfine(struct ptp_clock_info *ptp, long delta) +{ + return -EOPNOTSUPP; +} + +static int ptp_vmclock_adjtime(struct ptp_clock_info *ptp, s64 delta) +{ + return -EOPNOTSUPP; +} + +static int ptp_vmclock_settime(struct ptp_clock_info *ptp, + const struct timespec64 *ts) +{ + return -EOPNOTSUPP; +} + +static int ptp_vmclock_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts, + struct ptp_system_timestamp *sts) +{ + struct vmclock_state *st = container_of(ptp, struct vmclock_state, + ptp_clock_info); + + return vmclock_get_crosststamp(st, sts, NULL, ts); +} + +static int ptp_vmclock_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, int on) +{ + return -EOPNOTSUPP; +} + +static const struct ptp_clock_info ptp_vmclock_info = { + .owner = THIS_MODULE, + .max_adj = 0, + .n_ext_ts = 0, + .n_pins = 0, + .pps = 0, + .adjfine = ptp_vmclock_adjfine, + .adjtime = ptp_vmclock_adjtime, + .gettimex64 = ptp_vmclock_gettimex, + .settime64 = ptp_vmclock_settime, + .enable = ptp_vmclock_enable, + .getcrosststamp = ptp_vmclock_getcrosststamp, +}; + +static struct ptp_clock *vmclock_ptp_register(struct device *dev, + struct vmclock_state *st) +{ + enum clocksource_ids cs_id; + + if (IS_ENABLED(CONFIG_ARM64) && + st->clk->counter_id == VMCLOCK_COUNTER_ARM_VCNT) { + /* Can we check it's the virtual counter? */ + cs_id = CSID_ARM_ARCH_COUNTER; + } else if (IS_ENABLED(CONFIG_X86) && + st->clk->counter_id == VMCLOCK_COUNTER_X86_TSC) { + cs_id = CSID_X86_TSC; + } else { + return NULL; + } + + /* Only UTC, or TAI with offset */ + if (!tai_adjust(st->clk, NULL)) { + dev_info(dev, "vmclock does not provide unambiguous UTC\n"); + return NULL; + } + + st->sys_cs_id = cs_id; + st->cs_id = cs_id; + st->ptp_clock_info = ptp_vmclock_info; + strscpy(st->ptp_clock_info.name, st->name); + + return ptp_clock_register(&st->ptp_clock_info, dev); +} + +static int vmclock_miscdev_mmap(struct file *fp, struct vm_area_struct *vma) +{ + struct vmclock_state *st = container_of(fp->private_data, + struct vmclock_state, miscdev); + + if ((vma->vm_flags & (VM_READ|VM_WRITE)) != VM_READ) + return -EROFS; + + if (vma->vm_end - vma->vm_start != PAGE_SIZE || vma->vm_pgoff) + return -EINVAL; + + if (io_remap_pfn_range(vma, vma->vm_start, + st->res.start >> PAGE_SHIFT, PAGE_SIZE, + vma->vm_page_prot)) + return -EAGAIN; + + return 0; +} + +static ssize_t vmclock_miscdev_read(struct file *fp, char __user *buf, + size_t count, loff_t *ppos) +{ + struct vmclock_state *st = container_of(fp->private_data, + struct vmclock_state, miscdev); + ktime_t deadline = ktime_add(ktime_get(), VMCLOCK_MAX_WAIT); + size_t max_count; + uint32_t seq; + + if (*ppos >= PAGE_SIZE) + return 0; + + max_count = PAGE_SIZE - *ppos; + if (count > max_count) + count = max_count; + + while (1) { + seq = le32_to_cpu(st->clk->seq_count) & ~1U; + /* Pairs with hypervisor wmb */ + virt_rmb(); + + if (copy_to_user(buf, ((char *)st->clk) + *ppos, count)) + return -EFAULT; + + /* Pairs with hypervisor wmb */ + virt_rmb(); + if (seq == le32_to_cpu(st->clk->seq_count)) + break; + + if (ktime_after(ktime_get(), deadline)) + return -ETIMEDOUT; + } + + *ppos += count; + return count; +} + +static const struct file_operations vmclock_miscdev_fops = { + .mmap = vmclock_miscdev_mmap, + .read = vmclock_miscdev_read, +}; + +/* module operations */ + +static void vmclock_remove(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct vmclock_state *st = dev_get_drvdata(dev); + + if (st->ptp_clock) + ptp_clock_unregister(st->ptp_clock); + + if (st->miscdev.minor != MISC_DYNAMIC_MINOR) + misc_deregister(&st->miscdev); +} + +static acpi_status vmclock_acpi_resources(struct acpi_resource *ares, void *data) +{ + struct vmclock_state *st = data; + struct resource_win win; + struct resource *res = &win.res; + + if (ares->type == ACPI_RESOURCE_TYPE_END_TAG) + return AE_OK; + + /* There can be only one */ + if (resource_type(&st->res) == IORESOURCE_MEM) + return AE_ERROR; + + if (acpi_dev_resource_memory(ares, res) || + acpi_dev_resource_address_space(ares, &win)) { + + if (resource_type(res) != IORESOURCE_MEM || + resource_size(res) < sizeof(st->clk)) + return AE_ERROR; + + st->res = *res; + return AE_OK; + } + + return AE_ERROR; +} + +static int vmclock_probe_acpi(struct device *dev, struct vmclock_state *st) +{ + struct acpi_device *adev = ACPI_COMPANION(dev); + acpi_status status; + + /* + * This should never happen as this function is only called when + * has_acpi_companion(dev) is true, but the logic is sufficiently + * complex that Coverity can't see the tautology. + */ + if (!adev) + return -ENODEV; + + status = acpi_walk_resources(adev->handle, METHOD_NAME__CRS, + vmclock_acpi_resources, st); + if (ACPI_FAILURE(status) || resource_type(&st->res) != IORESOURCE_MEM) { + dev_err(dev, "failed to get resources\n"); + return -ENODEV; + } + + return 0; +} + +static void vmclock_put_idx(void *data) +{ + struct vmclock_state *st = data; + + ida_free(&vmclock_ida, st->index); +} + +static int vmclock_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct vmclock_state *st; + int ret; + + st = devm_kzalloc(dev, sizeof(*st), GFP_KERNEL); + if (!st) + return -ENOMEM; + + if (has_acpi_companion(dev)) + ret = vmclock_probe_acpi(dev, st); + else + ret = -EINVAL; /* Only ACPI for now */ + + if (ret) { + dev_info(dev, "Failed to obtain physical address: %d\n", ret); + goto out; + } + + if (resource_size(&st->res) < VMCLOCK_MIN_SIZE) { + dev_info(dev, "Region too small (0x%llx)\n", + resource_size(&st->res)); + ret = -EINVAL; + goto out; + } + st->clk = devm_memremap(dev, st->res.start, resource_size(&st->res), + MEMREMAP_WB | MEMREMAP_DEC); + if (IS_ERR(st->clk)) { + ret = PTR_ERR(st->clk); + dev_info(dev, "failed to map shared memory\n"); + st->clk = NULL; + goto out; + } + + if (le32_to_cpu(st->clk->magic) != VMCLOCK_MAGIC || + le32_to_cpu(st->clk->size) > resource_size(&st->res) || + le16_to_cpu(st->clk->version) != 1) { + dev_info(dev, "vmclock magic fields invalid\n"); + ret = -EINVAL; + goto out; + } + + ret = ida_alloc(&vmclock_ida, GFP_KERNEL); + if (ret < 0) + goto out; + + st->index = ret; + ret = devm_add_action_or_reset(&pdev->dev, vmclock_put_idx, st); + if (ret) + goto out; + + st->name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "vmclock%d", st->index); + if (!st->name) { + ret = -ENOMEM; + goto out; + } + + /* + * If the structure is big enough, it can be mapped to userspace. + * Theoretically a guest OS even using larger pages could still + * use 4KiB PTEs to map smaller MMIO regions like this, but let's + * cross that bridge if/when we come to it. + */ + if (le32_to_cpu(st->clk->size) >= PAGE_SIZE) { + st->miscdev.minor = MISC_DYNAMIC_MINOR; + st->miscdev.fops = &vmclock_miscdev_fops; + st->miscdev.name = st->name; + + ret = misc_register(&st->miscdev); + if (ret) + goto out; + } + + /* If there is valid clock information, register a PTP clock */ + if (VMCLOCK_FIELD_PRESENT(st->clk, time_frac_sec)) { + /* Can return a silent NULL, or an error. */ + st->ptp_clock = vmclock_ptp_register(dev, st); + if (IS_ERR(st->ptp_clock)) { + ret = PTR_ERR(st->ptp_clock); + st->ptp_clock = NULL; + vmclock_remove(pdev); + goto out; + } + } + + if (!st->miscdev.minor && !st->ptp_clock) { + /* Neither miscdev nor PTP registered */ + dev_info(dev, "vmclock: Neither miscdev nor PTP available; not registering\n"); + ret = -ENODEV; + goto out; + } + + dev_info(dev, "%s: registered %s%s%s\n", st->name, + st->miscdev.minor ? "miscdev" : "", + (st->miscdev.minor && st->ptp_clock) ? ", " : "", + st->ptp_clock ? "PTP" : ""); + + dev_set_drvdata(dev, st); + + out: + return ret; +} + +static const struct acpi_device_id vmclock_acpi_ids[] = { + { "AMZNC10C", 0 }, + {} +}; +MODULE_DEVICE_TABLE(acpi, vmclock_acpi_ids); + +static struct platform_driver vmclock_platform_driver = { + .probe = vmclock_probe, + .remove_new = vmclock_remove, + .driver = { + .name = "vmclock", + .acpi_match_table = vmclock_acpi_ids, + }, +}; + +module_platform_driver(vmclock_platform_driver) + +MODULE_AUTHOR("David Woodhouse "); +MODULE_DESCRIPTION("PTP clock using VMCLOCK"); +MODULE_LICENSE("GPL"); diff --git a/include/uapi/linux/vmclock-abi.h b/include/uapi/linux/vmclock-abi.h new file mode 100644 index 000000000000..2d99b29ac44a --- /dev/null +++ b/include/uapi/linux/vmclock-abi.h @@ -0,0 +1,182 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ + +/* + * This structure provides a vDSO-style clock to VM guests, exposing the + * relationship (or lack thereof) between the CPU clock (TSC, timebase, arch + * counter, etc.) and real time. It is designed to address the problem of + * live migration, which other clock enlightenments do not. + * + * When a guest is live migrated, this affects the clock in two ways. + * + * First, even between identical hosts the actual frequency of the underlying + * counter will change within the tolerances of its specification (typically + * ±50PPM, or 4 seconds a day). This frequency also varies over time on the + * same host, but can be tracked by NTP as it generally varies slowly. With + * live migration there is a step change in the frequency, with no warning. + * + * Second, there may be a step change in the value of the counter itself, as + * its accuracy is limited by the precision of the NTP synchronization on the + * source and destination hosts. + * + * So any calibration (NTP, PTP, etc.) which the guest has done on the source + * host before migration is invalid, and needs to be redone on the new host. + * + * In its most basic mode, this structure provides only an indication to the + * guest that live migration has occurred. This allows the guest to know that + * its clock is invalid and take remedial action. For applications that need + * reliable accurate timestamps (e.g. distributed databases), the structure + * can be mapped all the way to userspace. This allows the application to see + * directly for itself that the clock is disrupted and take appropriate + * action, even when using a vDSO-style method to get the time instead of a + * system call. + * + * In its more advanced mode. this structure can also be used to expose the + * precise relationship of the CPU counter to real time, as calibrated by the + * host. This means that userspace applications can have accurate time + * immediately after live migration, rather than having to pause operations + * and wait for NTP to recover. This mode does, of course, rely on the + * counter being reliable and consistent across CPUs. + * + * Note that this must be true UTC, never with smeared leap seconds. If a + * guest wishes to construct a smeared clock, it can do so. Presenting a + * smeared clock through this interface would be problematic because it + * actually messes with the apparent counter *period*. A linear smearing + * of 1 ms per second would effectively tweak the counter period by 1000PPM + * at the start/end of the smearing period, while a sinusoidal smear would + * basically be impossible to represent. + * + * This structure is offered with the intent that it be adopted into the + * nascent virtio-rtc standard, as a virtio-rtc that does not address the live + * migration problem seems a little less than fit for purpose. For that + * reason, certain fields use precisely the same numeric definitions as in + * the virtio-rtc proposal. The structure can also be exposed through an ACPI + * device with the CID "VMCLOCK", modelled on the "VMGENID" device except for + * the fact that it uses a real _CRS to convey the address of the structure + * (which should be a full page, to allow for mapping directly to userspace). + */ + +#ifndef __VMCLOCK_ABI_H__ +#define __VMCLOCK_ABI_H__ + +#include + +struct vmclock_abi { + /* CONSTANT FIELDS */ + __le32 magic; +#define VMCLOCK_MAGIC 0x4b4c4356 /* "VCLK" */ + __le32 size; /* Size of region containing this structure */ + __le16 version; /* 1 */ + __u8 counter_id; /* Matches VIRTIO_RTC_COUNTER_xxx except INVALID */ +#define VMCLOCK_COUNTER_ARM_VCNT 0 +#define VMCLOCK_COUNTER_X86_TSC 1 +#define VMCLOCK_COUNTER_INVALID 0xff + __u8 time_type; /* Matches VIRTIO_RTC_TYPE_xxx */ +#define VMCLOCK_TIME_UTC 0 /* Since 1970-01-01 00:00:00z */ +#define VMCLOCK_TIME_TAI 1 /* Since 1970-01-01 00:00:00z */ +#define VMCLOCK_TIME_MONOTONIC 2 /* Since undefined epoch */ +#define VMCLOCK_TIME_INVALID_SMEARED 3 /* Not supported */ +#define VMCLOCK_TIME_INVALID_MAYBE_SMEARED 4 /* Not supported */ + + /* NON-CONSTANT FIELDS PROTECTED BY SEQCOUNT LOCK */ + __le32 seq_count; /* Low bit means an update is in progress */ + /* + * This field changes to another non-repeating value when the CPU + * counter is disrupted, for example on live migration. This lets + * the guest know that it should discard any calibration it has + * performed of the counter against external sources (NTP/PTP/etc.). + */ + __le64 disruption_marker; + __le64 flags; + /* Indicates that the tai_offset_sec field is valid */ +#define VMCLOCK_FLAG_TAI_OFFSET_VALID (1 << 0) + /* + * Optionally used to notify guests of pending maintenance events. + * A guest which provides latency-sensitive services may wish to + * remove itself from service if an event is coming up. Two flags + * indicate the approximate imminence of the event. + */ +#define VMCLOCK_FLAG_DISRUPTION_SOON (1 << 1) /* About a day */ +#define VMCLOCK_FLAG_DISRUPTION_IMMINENT (1 << 2) /* About an hour */ +#define VMCLOCK_FLAG_PERIOD_ESTERROR_VALID (1 << 3) +#define VMCLOCK_FLAG_PERIOD_MAXERROR_VALID (1 << 4) +#define VMCLOCK_FLAG_TIME_ESTERROR_VALID (1 << 5) +#define VMCLOCK_FLAG_TIME_MAXERROR_VALID (1 << 6) + /* + * If the MONOTONIC flag is set then (other than leap seconds) it is + * guaranteed that the time calculated according this structure at + * any given moment shall never appear to be later than the time + * calculated via the structure at any *later* moment. + * + * In particular, a timestamp based on a counter reading taken + * immediately after setting the low bit of seq_count (and the + * associated memory barrier), using the previously-valid time and + * period fields, shall never be later than a timestamp based on + * a counter reading taken immediately before *clearing* the low + * bit again after the update, using the about-to-be-valid fields. + */ +#define VMCLOCK_FLAG_TIME_MONOTONIC (1 << 7) + + __u8 pad[2]; + __u8 clock_status; +#define VMCLOCK_STATUS_UNKNOWN 0 +#define VMCLOCK_STATUS_INITIALIZING 1 +#define VMCLOCK_STATUS_SYNCHRONIZED 2 +#define VMCLOCK_STATUS_FREERUNNING 3 +#define VMCLOCK_STATUS_UNRELIABLE 4 + + /* + * The time exposed through this device is never smeared. This field + * corresponds to the 'subtype' field in virtio-rtc, which indicates + * the smearing method. However in this case it provides a *hint* to + * the guest operating system, such that *if* the guest OS wants to + * provide its users with an alternative clock which does not follow + * UTC, it may do so in a fashion consistent with the other systems + * in the nearby environment. + */ + __u8 leap_second_smearing_hint; /* Matches VIRTIO_RTC_SUBTYPE_xxx */ +#define VMCLOCK_SMEARING_STRICT 0 +#define VMCLOCK_SMEARING_NOON_LINEAR 1 +#define VMCLOCK_SMEARING_UTC_SLS 2 + __le16 tai_offset_sec; /* Actually two's complement signed */ + __u8 leap_indicator; + /* + * This field is based on the VIRTIO_RTC_LEAP_xxx values as defined + * in the current draft of virtio-rtc, but since smearing cannot be + * used with the shared memory device, some values are not used. + * + * The _POST_POS and _POST_NEG values allow the guest to perform + * its own smearing during the day or so after a leap second when + * such smearing may need to continue being applied for a leap + * second which is now theoretically "historical". + */ +#define VMCLOCK_LEAP_NONE 0x00 /* No known nearby leap second */ +#define VMCLOCK_LEAP_PRE_POS 0x01 /* Positive leap second at EOM */ +#define VMCLOCK_LEAP_PRE_NEG 0x02 /* Negative leap second at EOM */ +#define VMCLOCK_LEAP_POS 0x03 /* Set during 23:59:60 second */ +#define VMCLOCK_LEAP_POST_POS 0x04 +#define VMCLOCK_LEAP_POST_NEG 0x05 + + /* Bit shift for counter_period_frac_sec and its error rate */ + __u8 counter_period_shift; + /* + * Paired values of counter and UTC at a given point in time. + */ + __le64 counter_value; + /* + * Counter period, and error margin of same. The unit of these + * fields is 1/2^(64 + counter_period_shift) of a second. + */ + __le64 counter_period_frac_sec; + __le64 counter_period_esterror_rate_frac_sec; + __le64 counter_period_maxerror_rate_frac_sec; + + /* + * Time according to time_type field above. + */ + __le64 time_sec; /* Seconds since time_type epoch */ + __le64 time_frac_sec; /* Units of 1/2^64 of a second */ + __le64 time_esterror_nanosec; + __le64 time_maxerror_nanosec; +}; + +#endif /* __VMCLOCK_ABI_H__ */ -- cgit v1.2.3 From 57e3707eb5e3d9a45eef9151f0378313b1d39a17 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 5 Aug 2024 11:39:36 +0200 Subject: bpf: Constify ctl_table argument of filter function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysctl core is moving to allow "struct ctl_table" in read-only memory. As a preparation for that all functions handling "struct ctl_table" need to be able to work with "const struct ctl_table". As __cgroup_bpf_run_filter_sysctl() does not modify its table, it can be adapted trivially. Signed-off-by: Thomas Weißschuh Signed-off-by: Joel Granados --- include/linux/bpf-cgroup.h | 2 +- kernel/bpf/cgroup.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index ce91d9b2acb9..4dd17128b204 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -138,7 +138,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, short access, enum cgroup_bpf_attach_type atype); int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, - struct ctl_table *table, int write, + const struct ctl_table *table, int write, char **buf, size_t *pcount, loff_t *ppos, enum cgroup_bpf_attach_type atype); diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index e7113d700b87..9c951b4212a2 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1691,7 +1691,7 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = { * returned value != 1 during execution. In all other cases 0 is returned. */ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, - struct ctl_table *table, int write, + const struct ctl_table *table, int write, char **buf, size_t *pcount, loff_t *ppos, enum cgroup_bpf_attach_type atype) { -- cgit v1.2.3 From 29e1095bb1ad149b5c417719338d9c81d58bf12b Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 5 Aug 2024 11:39:37 +0200 Subject: sysctl: move internal interfaces to const struct ctl_table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As a preparation to make all the core sysctl code work with const struct ctl_table switch over the internal function to use the const variant. Some pointers to "struct ctl_table" need to stay non-const as they are newly allocated and modified before registration. Signed-off-by: Thomas Weißschuh Signed-off-by: Joel Granados --- fs/proc/internal.h | 2 +- fs/proc/proc_sysctl.c | 81 +++++++++++++++++++++++++------------------------- include/linux/sysctl.h | 2 +- 3 files changed, 43 insertions(+), 42 deletions(-) (limited to 'include') diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 87e4d6282025..1695509370b8 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -102,7 +102,7 @@ struct proc_inode { union proc_op op; struct proc_dir_entry *pde; struct ctl_table_header *sysctl; - struct ctl_table *sysctl_entry; + const struct ctl_table *sysctl_entry; struct hlist_node sibling_inodes; const struct proc_ns_operations *ns_ops; struct inode vfs_inode; diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index d11ebc055ce0..713abccbfcf9 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -88,7 +88,7 @@ static DEFINE_SPINLOCK(sysctl_lock); static void drop_sysctl_table(struct ctl_table_header *header); static int sysctl_follow_link(struct ctl_table_header **phead, - struct ctl_table **pentry); + const struct ctl_table **pentry); static int insert_links(struct ctl_table_header *head); static void put_links(struct ctl_table_header *header); @@ -110,11 +110,11 @@ static int namecmp(const char *name1, int len1, const char *name2, int len2) } /* Called under sysctl_lock */ -static struct ctl_table *find_entry(struct ctl_table_header **phead, +static const struct ctl_table *find_entry(struct ctl_table_header **phead, struct ctl_dir *dir, const char *name, int namelen) { struct ctl_table_header *head; - struct ctl_table *entry; + const struct ctl_table *entry; struct rb_node *node = dir->root.rb_node; while (node) @@ -141,7 +141,7 @@ static struct ctl_table *find_entry(struct ctl_table_header **phead, return NULL; } -static int insert_entry(struct ctl_table_header *head, struct ctl_table *entry) +static int insert_entry(struct ctl_table_header *head, const struct ctl_table *entry) { struct rb_node *node = &head->node[entry - head->ctl_table].node; struct rb_node **p = &head->parent->root.rb_node; @@ -151,7 +151,7 @@ static int insert_entry(struct ctl_table_header *head, struct ctl_table *entry) while (*p) { struct ctl_table_header *parent_head; - struct ctl_table *parent_entry; + const struct ctl_table *parent_entry; struct ctl_node *parent_node; const char *parent_name; int cmp; @@ -180,7 +180,7 @@ static int insert_entry(struct ctl_table_header *head, struct ctl_table *entry) return 0; } -static void erase_entry(struct ctl_table_header *head, struct ctl_table *entry) +static void erase_entry(struct ctl_table_header *head, const struct ctl_table *entry) { struct rb_node *node = &head->node[entry - head->ctl_table].node; @@ -189,7 +189,7 @@ static void erase_entry(struct ctl_table_header *head, struct ctl_table *entry) static void init_header(struct ctl_table_header *head, struct ctl_table_root *root, struct ctl_table_set *set, - struct ctl_node *node, struct ctl_table *table, size_t table_size) + struct ctl_node *node, const struct ctl_table *table, size_t table_size) { head->ctl_table = table; head->ctl_table_size = table_size; @@ -204,7 +204,7 @@ static void init_header(struct ctl_table_header *head, head->node = node; INIT_HLIST_HEAD(&head->inodes); if (node) { - struct ctl_table *entry; + const struct ctl_table *entry; list_for_each_table_entry(entry, head) { node->header = head; @@ -217,7 +217,7 @@ static void init_header(struct ctl_table_header *head, static void erase_header(struct ctl_table_header *head) { - struct ctl_table *entry; + const struct ctl_table *entry; list_for_each_table_entry(entry, head) erase_entry(head, entry); @@ -225,7 +225,7 @@ static void erase_header(struct ctl_table_header *head) static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header) { - struct ctl_table *entry; + const struct ctl_table *entry; struct ctl_table_header *dir_h = &dir->header; int err; @@ -344,12 +344,12 @@ lookup_header_set(struct ctl_table_root *root) return set; } -static struct ctl_table *lookup_entry(struct ctl_table_header **phead, - struct ctl_dir *dir, - const char *name, int namelen) +static const struct ctl_table *lookup_entry(struct ctl_table_header **phead, + struct ctl_dir *dir, + const char *name, int namelen) { struct ctl_table_header *head; - struct ctl_table *entry; + const struct ctl_table *entry; spin_lock(&sysctl_lock); entry = find_entry(&head, dir, name, namelen); @@ -374,10 +374,10 @@ static struct ctl_node *first_usable_entry(struct rb_node *node) } static void first_entry(struct ctl_dir *dir, - struct ctl_table_header **phead, struct ctl_table **pentry) + struct ctl_table_header **phead, const struct ctl_table **pentry) { struct ctl_table_header *head = NULL; - struct ctl_table *entry = NULL; + const struct ctl_table *entry = NULL; struct ctl_node *ctl_node; spin_lock(&sysctl_lock); @@ -391,10 +391,10 @@ static void first_entry(struct ctl_dir *dir, *pentry = entry; } -static void next_entry(struct ctl_table_header **phead, struct ctl_table **pentry) +static void next_entry(struct ctl_table_header **phead, const struct ctl_table **pentry) { struct ctl_table_header *head = *phead; - struct ctl_table *entry = *pentry; + const struct ctl_table *entry = *pentry; struct ctl_node *ctl_node = &head->node[entry - head->ctl_table]; spin_lock(&sysctl_lock); @@ -427,7 +427,7 @@ static int test_perm(int mode, int op) return -EACCES; } -static int sysctl_perm(struct ctl_table_header *head, struct ctl_table *table, int op) +static int sysctl_perm(struct ctl_table_header *head, const struct ctl_table *table, int op) { struct ctl_table_root *root = head->root; int mode; @@ -441,7 +441,7 @@ static int sysctl_perm(struct ctl_table_header *head, struct ctl_table *table, i } static struct inode *proc_sys_make_inode(struct super_block *sb, - struct ctl_table_header *head, struct ctl_table *table) + struct ctl_table_header *head, const struct ctl_table *table) { struct ctl_table_root *root = head->root; struct inode *inode; @@ -512,7 +512,7 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, struct ctl_table_header *head = grab_header(dir); struct ctl_table_header *h = NULL; const struct qstr *name = &dentry->d_name; - struct ctl_table *p; + const struct ctl_table *p; struct inode *inode; struct dentry *err = ERR_PTR(-ENOENT); struct ctl_dir *ctl_dir; @@ -550,7 +550,7 @@ static ssize_t proc_sys_call_handler(struct kiocb *iocb, struct iov_iter *iter, { struct inode *inode = file_inode(iocb->ki_filp); struct ctl_table_header *head = grab_header(inode); - struct ctl_table *table = PROC_I(inode)->sysctl_entry; + const struct ctl_table *table = PROC_I(inode)->sysctl_entry; size_t count = iov_iter_count(iter); char *kbuf; ssize_t error; @@ -624,7 +624,7 @@ static ssize_t proc_sys_write(struct kiocb *iocb, struct iov_iter *iter) static int proc_sys_open(struct inode *inode, struct file *filp) { struct ctl_table_header *head = grab_header(inode); - struct ctl_table *table = PROC_I(inode)->sysctl_entry; + const struct ctl_table *table = PROC_I(inode)->sysctl_entry; /* sysctl was unregistered */ if (IS_ERR(head)) @@ -642,7 +642,7 @@ static __poll_t proc_sys_poll(struct file *filp, poll_table *wait) { struct inode *inode = file_inode(filp); struct ctl_table_header *head = grab_header(inode); - struct ctl_table *table = PROC_I(inode)->sysctl_entry; + const struct ctl_table *table = PROC_I(inode)->sysctl_entry; __poll_t ret = DEFAULT_POLLMASK; unsigned long event; @@ -673,7 +673,7 @@ out: static bool proc_sys_fill_cache(struct file *file, struct dir_context *ctx, struct ctl_table_header *head, - struct ctl_table *table) + const struct ctl_table *table) { struct dentry *child, *dir = file->f_path.dentry; struct inode *inode; @@ -717,7 +717,7 @@ static bool proc_sys_fill_cache(struct file *file, static bool proc_sys_link_fill_cache(struct file *file, struct dir_context *ctx, struct ctl_table_header *head, - struct ctl_table *table) + const struct ctl_table *table) { bool ret = true; @@ -735,7 +735,7 @@ out: return ret; } -static int scan(struct ctl_table_header *head, struct ctl_table *table, +static int scan(struct ctl_table_header *head, const struct ctl_table *table, unsigned long *pos, struct file *file, struct dir_context *ctx) { @@ -759,7 +759,7 @@ static int proc_sys_readdir(struct file *file, struct dir_context *ctx) { struct ctl_table_header *head = grab_header(file_inode(file)); struct ctl_table_header *h = NULL; - struct ctl_table *entry; + const struct ctl_table *entry; struct ctl_dir *ctl_dir; unsigned long pos; @@ -792,7 +792,7 @@ static int proc_sys_permission(struct mnt_idmap *idmap, * are _NOT_ writeable, capabilities or not. */ struct ctl_table_header *head; - struct ctl_table *table; + const struct ctl_table *table; int error; /* Executable files are not allowed under /proc/sys/ */ @@ -836,7 +836,7 @@ static int proc_sys_getattr(struct mnt_idmap *idmap, { struct inode *inode = d_inode(path->dentry); struct ctl_table_header *head = grab_header(inode); - struct ctl_table *table = PROC_I(inode)->sysctl_entry; + const struct ctl_table *table = PROC_I(inode)->sysctl_entry; if (IS_ERR(head)) return PTR_ERR(head); @@ -935,7 +935,7 @@ static struct ctl_dir *find_subdir(struct ctl_dir *dir, const char *name, int namelen) { struct ctl_table_header *head; - struct ctl_table *entry; + const struct ctl_table *entry; entry = find_entry(&head, dir, name, namelen); if (!entry) @@ -1046,12 +1046,12 @@ static struct ctl_dir *xlate_dir(struct ctl_table_set *set, struct ctl_dir *dir) } static int sysctl_follow_link(struct ctl_table_header **phead, - struct ctl_table **pentry) + const struct ctl_table **pentry) { struct ctl_table_header *head; + const struct ctl_table *entry; struct ctl_table_root *root; struct ctl_table_set *set; - struct ctl_table *entry; struct ctl_dir *dir; int ret; @@ -1078,7 +1078,7 @@ static int sysctl_follow_link(struct ctl_table_header **phead, return ret; } -static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...) +static int sysctl_err(const char *path, const struct ctl_table *table, char *fmt, ...) { struct va_format vaf; va_list args; @@ -1094,7 +1094,7 @@ static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...) return -EINVAL; } -static int sysctl_check_table_array(const char *path, struct ctl_table *table) +static int sysctl_check_table_array(const char *path, const struct ctl_table *table) { unsigned int extra; int err = 0; @@ -1133,7 +1133,7 @@ static int sysctl_check_table_array(const char *path, struct ctl_table *table) static int sysctl_check_table(const char *path, struct ctl_table_header *header) { - struct ctl_table *entry; + const struct ctl_table *entry; int err = 0; list_for_each_table_entry(entry, header) { if (!entry->procname) @@ -1169,8 +1169,9 @@ static int sysctl_check_table(const char *path, struct ctl_table_header *header) static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table_header *head) { - struct ctl_table *link_table, *entry, *link; + struct ctl_table *link_table, *link; struct ctl_table_header *links; + const struct ctl_table *entry; struct ctl_node *node; char *link_name; int name_bytes; @@ -1215,7 +1216,7 @@ static bool get_links(struct ctl_dir *dir, struct ctl_table_root *link_root) { struct ctl_table_header *tmp_head; - struct ctl_table *entry, *link; + const struct ctl_table *entry, *link; if (header->ctl_table_size == 0 || sysctl_is_perm_empty_ctl_header(header)) @@ -1466,7 +1467,7 @@ static void put_links(struct ctl_table_header *header) struct ctl_table_root *root = header->root; struct ctl_dir *parent = header->parent; struct ctl_dir *core_parent; - struct ctl_table *entry; + const struct ctl_table *entry; if (header->set == root_set) return; @@ -1477,7 +1478,7 @@ static void put_links(struct ctl_table_header *header) list_for_each_table_entry(entry, header) { struct ctl_table_header *link_head; - struct ctl_table *link; + const struct ctl_table *link; const char *name = entry->procname; link = find_entry(&link_head, core_parent, name, strlen(name)); diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index aa4c6d44aaa0..a473deaf5a91 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -162,7 +162,7 @@ struct ctl_node { struct ctl_table_header { union { struct { - struct ctl_table *ctl_table; + const struct ctl_table *ctl_table; int ctl_table_size; int used; int count; -- cgit v1.2.3 From 7abc9b53bd515d7953d1f4e069b062ec4b5ba9e7 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 5 Aug 2024 11:39:38 +0200 Subject: sysctl: allow registration of const struct ctl_table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Putting structure, especially those containing function pointers, into read-only memory makes the safer and easier to reason about. Change the sysctl registration APIs to allow registration of "const struct ctl_table". Signed-off-by: Thomas Weißschuh Acked-by: Kees Cook Reviewed-by: Kees Cook # security/* Signed-off-by: Joel Granados --- fs/proc/proc_sysctl.c | 6 +++--- include/linux/sysctl.h | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 713abccbfcf9..968f8dcffd8f 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1359,7 +1359,7 @@ static struct ctl_dir *sysctl_mkdir_p(struct ctl_dir *dir, const char *path) */ struct ctl_table_header *__register_sysctl_table( struct ctl_table_set *set, - const char *path, struct ctl_table *table, size_t table_size) + const char *path, const struct ctl_table *table, size_t table_size) { struct ctl_table_root *root = set->dir.header.root; struct ctl_table_header *header; @@ -1420,7 +1420,7 @@ fail: * * See __register_sysctl_table for more details. */ -struct ctl_table_header *register_sysctl_sz(const char *path, struct ctl_table *table, +struct ctl_table_header *register_sysctl_sz(const char *path, const struct ctl_table *table, size_t table_size) { return __register_sysctl_table(&sysctl_table_root.default_set, @@ -1449,7 +1449,7 @@ EXPORT_SYMBOL(register_sysctl_sz); * * Context: if your base directory does not exist it will be created for you. */ -void __init __register_sysctl_init(const char *path, struct ctl_table *table, +void __init __register_sysctl_init(const char *path, const struct ctl_table *table, const char *table_name, size_t table_size) { struct ctl_table_header *hdr = register_sysctl_sz(path, table, table_size); diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index a473deaf5a91..202855befa8b 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -223,13 +223,13 @@ extern void retire_sysctl_set(struct ctl_table_set *set); struct ctl_table_header *__register_sysctl_table( struct ctl_table_set *set, - const char *path, struct ctl_table *table, size_t table_size); -struct ctl_table_header *register_sysctl_sz(const char *path, struct ctl_table *table, + const char *path, const struct ctl_table *table, size_t table_size); +struct ctl_table_header *register_sysctl_sz(const char *path, const struct ctl_table *table, size_t table_size); void unregister_sysctl_table(struct ctl_table_header * table); extern int sysctl_init_bases(void); -extern void __register_sysctl_init(const char *path, struct ctl_table *table, +extern void __register_sysctl_init(const char *path, const struct ctl_table *table, const char *table_name, size_t table_size); #define register_sysctl_init(path, table) \ __register_sysctl_init(path, table, #table, ARRAY_SIZE(table)) @@ -251,7 +251,7 @@ extern int no_unaligned_warning; #else /* CONFIG_SYSCTL */ -static inline void register_sysctl_init(const char *path, struct ctl_table *table) +static inline void register_sysctl_init(const char *path, const struct ctl_table *table) { } @@ -261,7 +261,7 @@ static inline struct ctl_table_header *register_sysctl_mount_point(const char *p } static inline struct ctl_table_header *register_sysctl_sz(const char *path, - struct ctl_table *table, + const struct ctl_table *table, size_t table_size) { return NULL; -- cgit v1.2.3 From 6607c17c6c5e029da03a90085db22daf518232bf Mon Sep 17 00:00:00 2001 From: Shradha Gupta Date: Tue, 8 Oct 2024 00:06:15 -0700 Subject: net: mana: Enable debugfs files for MANA device Implement debugfs in MANA driver to be able to view RX,TX,EQ queue specific attributes and dump their gdma queues. These dumps can be used by other userspace utilities to improve debuggability and troubleshooting Following files are added in debugfs: /sys/kernel/debug/mana/ |-------------- 1 |--------------- EQs | |------- eq0 | | |---head | | |---tail | | |---eq_dump | |------- eq1 | . | . | |--------------- adapter-MTU |--------------- vport0 |------- RX-0 | |---cq_budget | |---cq_dump | |---cq_head | |---cq_tail | |---rq_head | |---rq_nbuf | |---rq_tail | |---rxq_dump |------- RX-1 . . |------- TX-0 | |---cq_budget | |---cq_dump | |---cq_head | |---cq_tail | |---sq_head | |---sq_pend_skb_qlen | |---sq_tail | |---txq_dump |------- TX-1 . . Signed-off-by: Shradha Gupta Reviewed-by: Haiyang Zhang Signed-off-by: David S. Miller --- drivers/net/ethernet/microsoft/mana/gdma_main.c | 43 +++++++++- drivers/net/ethernet/microsoft/mana/mana_en.c | 105 +++++++++++++++++++++++- include/net/mana/gdma.h | 6 +- include/net/mana/mana.h | 8 ++ 4 files changed, 159 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index ca4ed58f1206..e97af7ac2bb2 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* Copyright (c) 2021, Microsoft Corporation. */ +#include #include #include #include @@ -8,6 +9,8 @@ #include +struct dentry *mana_debugfs_root; + static u32 mana_gd_r32(struct gdma_context *g, u64 offset) { return readl(g->bar0_va + offset); @@ -1516,6 +1519,12 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent) gc->bar0_va = bar0_va; gc->dev = &pdev->dev; + if (gc->is_pf) + gc->mana_pci_debugfs = debugfs_create_dir("0", mana_debugfs_root); + else + gc->mana_pci_debugfs = debugfs_create_dir(pci_slot_name(pdev->slot), + mana_debugfs_root); + err = mana_gd_setup(pdev); if (err) goto unmap_bar; @@ -1529,6 +1538,13 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent) cleanup_gd: mana_gd_cleanup(pdev); unmap_bar: + /* + * at this point we know that the other debugfs child dir/files + * are either not yet created or are already cleaned up. + * The pci debugfs folder clean-up now, will only be cleaning up + * adapter-MTU file and apc->mana_pci_debugfs folder. + */ + debugfs_remove_recursive(gc->mana_pci_debugfs); pci_iounmap(pdev, bar0_va); free_gc: pci_set_drvdata(pdev, NULL); @@ -1549,6 +1565,8 @@ static void mana_gd_remove(struct pci_dev *pdev) mana_gd_cleanup(pdev); + debugfs_remove_recursive(gc->mana_pci_debugfs); + pci_iounmap(pdev, gc->bar0_va); vfree(gc); @@ -1600,6 +1618,8 @@ static void mana_gd_shutdown(struct pci_dev *pdev) mana_gd_cleanup(pdev); + debugfs_remove_recursive(gc->mana_pci_debugfs); + pci_disable_device(pdev); } @@ -1619,7 +1639,28 @@ static struct pci_driver mana_driver = { .shutdown = mana_gd_shutdown, }; -module_pci_driver(mana_driver); +static int __init mana_driver_init(void) +{ + int err; + + mana_debugfs_root = debugfs_create_dir("mana", NULL); + + err = pci_register_driver(&mana_driver); + if (err) + debugfs_remove(mana_debugfs_root); + + return err; +} + +static void __exit mana_driver_exit(void) +{ + debugfs_remove(mana_debugfs_root); + + pci_unregister_driver(&mana_driver); +} + +module_init(mana_driver_init); +module_exit(mana_driver_exit); MODULE_DEVICE_TABLE(pci, mana_id_table); diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index c47266d1c7c2..57ac732e7707 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -3,6 +3,7 @@ #include +#include #include #include #include @@ -30,6 +31,21 @@ static void mana_adev_idx_free(int idx) ida_free(&mana_adev_ida, idx); } +static ssize_t mana_dbg_q_read(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct gdma_queue *gdma_q = filp->private_data; + + return simple_read_from_buffer(buf, count, pos, gdma_q->queue_mem_ptr, + gdma_q->queue_size); +} + +static const struct file_operations mana_dbg_q_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = mana_dbg_q_read, +}; + /* Microsoft Azure Network Adapter (MANA) functions */ static int mana_open(struct net_device *ndev) @@ -721,6 +737,13 @@ static const struct net_device_ops mana_devops = { static void mana_cleanup_port_context(struct mana_port_context *apc) { + /* + * at this point all dir/files under the vport directory + * are already cleaned up. + * We are sure the apc->mana_port_debugfs remove will not + * cause any freed memory access issues + */ + debugfs_remove(apc->mana_port_debugfs); kfree(apc->rxqs); apc->rxqs = NULL; } @@ -943,6 +966,8 @@ static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver, else gc->adapter_mtu = ETH_FRAME_LEN; + debugfs_create_u16("adapter-MTU", 0400, gc->mana_pci_debugfs, &gc->adapter_mtu); + return 0; } @@ -1228,6 +1253,8 @@ static void mana_destroy_eq(struct mana_context *ac) if (!ac->eqs) return; + debugfs_remove_recursive(ac->mana_eqs_debugfs); + for (i = 0; i < gc->max_num_queues; i++) { eq = ac->eqs[i].eq; if (!eq) @@ -1240,6 +1267,18 @@ static void mana_destroy_eq(struct mana_context *ac) ac->eqs = NULL; } +static void mana_create_eq_debugfs(struct mana_context *ac, int i) +{ + struct mana_eq eq = ac->eqs[i]; + char eqnum[32]; + + sprintf(eqnum, "eq%d", i); + eq.mana_eq_debugfs = debugfs_create_dir(eqnum, ac->mana_eqs_debugfs); + debugfs_create_u32("head", 0400, eq.mana_eq_debugfs, &eq.eq->head); + debugfs_create_u32("tail", 0400, eq.mana_eq_debugfs, &eq.eq->tail); + debugfs_create_file("eq_dump", 0400, eq.mana_eq_debugfs, eq.eq, &mana_dbg_q_fops); +} + static int mana_create_eq(struct mana_context *ac) { struct gdma_dev *gd = ac->gdma_dev; @@ -1260,11 +1299,14 @@ static int mana_create_eq(struct mana_context *ac) spec.eq.context = ac->eqs; spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE; + ac->mana_eqs_debugfs = debugfs_create_dir("EQs", gc->mana_pci_debugfs); + for (i = 0; i < gc->max_num_queues; i++) { spec.eq.msix_index = (i + 1) % gc->num_msix_usable; err = mana_gd_create_mana_eq(gd, &spec, &ac->eqs[i].eq); if (err) goto out; + mana_create_eq_debugfs(ac, i); } return 0; @@ -1871,6 +1913,8 @@ static void mana_destroy_txq(struct mana_port_context *apc) return; for (i = 0; i < apc->num_queues; i++) { + debugfs_remove_recursive(apc->tx_qp[i].mana_tx_debugfs); + napi = &apc->tx_qp[i].tx_cq.napi; if (apc->tx_qp[i].txq.napi_initialized) { napi_synchronize(napi); @@ -1889,6 +1933,31 @@ static void mana_destroy_txq(struct mana_port_context *apc) apc->tx_qp = NULL; } +static void mana_create_txq_debugfs(struct mana_port_context *apc, int idx) +{ + struct mana_tx_qp *tx_qp = &apc->tx_qp[idx]; + char qnum[32]; + + sprintf(qnum, "TX-%d", idx); + tx_qp->mana_tx_debugfs = debugfs_create_dir(qnum, apc->mana_port_debugfs); + debugfs_create_u32("sq_head", 0400, tx_qp->mana_tx_debugfs, + &tx_qp->txq.gdma_sq->head); + debugfs_create_u32("sq_tail", 0400, tx_qp->mana_tx_debugfs, + &tx_qp->txq.gdma_sq->tail); + debugfs_create_u32("sq_pend_skb_qlen", 0400, tx_qp->mana_tx_debugfs, + &tx_qp->txq.pending_skbs.qlen); + debugfs_create_u32("cq_head", 0400, tx_qp->mana_tx_debugfs, + &tx_qp->tx_cq.gdma_cq->head); + debugfs_create_u32("cq_tail", 0400, tx_qp->mana_tx_debugfs, + &tx_qp->tx_cq.gdma_cq->tail); + debugfs_create_u32("cq_budget", 0400, tx_qp->mana_tx_debugfs, + &tx_qp->tx_cq.budget); + debugfs_create_file("txq_dump", 0400, tx_qp->mana_tx_debugfs, + tx_qp->txq.gdma_sq, &mana_dbg_q_fops); + debugfs_create_file("cq_dump", 0400, tx_qp->mana_tx_debugfs, + tx_qp->tx_cq.gdma_cq, &mana_dbg_q_fops); +} + static int mana_create_txq(struct mana_port_context *apc, struct net_device *net) { @@ -2000,6 +2069,8 @@ static int mana_create_txq(struct mana_port_context *apc, gc->cq_table[cq->gdma_id] = cq->gdma_cq; + mana_create_txq_debugfs(apc, i); + netif_napi_add_tx(net, &cq->napi, mana_poll); napi_enable(&cq->napi); txq->napi_initialized = true; @@ -2027,6 +2098,8 @@ static void mana_destroy_rxq(struct mana_port_context *apc, if (!rxq) return; + debugfs_remove_recursive(rxq->mana_rx_debugfs); + napi = &rxq->rx_cq.napi; if (napi_initialized) { @@ -2308,6 +2381,28 @@ out: return NULL; } +static void mana_create_rxq_debugfs(struct mana_port_context *apc, int idx) +{ + struct mana_rxq *rxq; + char qnum[32]; + + rxq = apc->rxqs[idx]; + + sprintf(qnum, "RX-%d", idx); + rxq->mana_rx_debugfs = debugfs_create_dir(qnum, apc->mana_port_debugfs); + debugfs_create_u32("rq_head", 0400, rxq->mana_rx_debugfs, &rxq->gdma_rq->head); + debugfs_create_u32("rq_tail", 0400, rxq->mana_rx_debugfs, &rxq->gdma_rq->tail); + debugfs_create_u32("rq_nbuf", 0400, rxq->mana_rx_debugfs, &rxq->num_rx_buf); + debugfs_create_u32("cq_head", 0400, rxq->mana_rx_debugfs, + &rxq->rx_cq.gdma_cq->head); + debugfs_create_u32("cq_tail", 0400, rxq->mana_rx_debugfs, + &rxq->rx_cq.gdma_cq->tail); + debugfs_create_u32("cq_budget", 0400, rxq->mana_rx_debugfs, &rxq->rx_cq.budget); + debugfs_create_file("rxq_dump", 0400, rxq->mana_rx_debugfs, rxq->gdma_rq, &mana_dbg_q_fops); + debugfs_create_file("cq_dump", 0400, rxq->mana_rx_debugfs, rxq->rx_cq.gdma_cq, + &mana_dbg_q_fops); +} + static int mana_add_rx_queues(struct mana_port_context *apc, struct net_device *ndev) { @@ -2326,6 +2421,8 @@ static int mana_add_rx_queues(struct mana_port_context *apc, u64_stats_init(&rxq->stats.syncp); apc->rxqs[i] = rxq; + + mana_create_rxq_debugfs(apc, i); } apc->default_rxobj = apc->rxqs[0]->rxobj; @@ -2518,14 +2615,19 @@ void mana_query_gf_stats(struct mana_port_context *apc) static int mana_init_port(struct net_device *ndev) { struct mana_port_context *apc = netdev_priv(ndev); + struct gdma_dev *gd = apc->ac->gdma_dev; u32 max_txq, max_rxq, max_queues; int port_idx = apc->port_idx; + struct gdma_context *gc; + char vport[32]; int err; err = mana_init_port_context(apc); if (err) return err; + gc = gd->gdma_context; + err = mana_query_vport_cfg(apc, port_idx, &max_txq, &max_rxq, &apc->indir_table_sz); if (err) { @@ -2542,7 +2644,8 @@ static int mana_init_port(struct net_device *ndev) apc->num_queues = apc->max_queues; eth_hw_addr_set(ndev, apc->mac_addr); - + sprintf(vport, "vport%d", port_idx); + apc->mana_port_debugfs = debugfs_create_dir(vport, gc->mana_pci_debugfs); return 0; reset_apc: diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index de47fa533b15..90f56656b572 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -267,7 +267,8 @@ struct gdma_event { struct gdma_queue; struct mana_eq { - struct gdma_queue *eq; + struct gdma_queue *eq; + struct dentry *mana_eq_debugfs; }; typedef void gdma_eq_callback(void *context, struct gdma_queue *q, @@ -365,6 +366,7 @@ struct gdma_irq_context { struct gdma_context { struct device *dev; + struct dentry *mana_pci_debugfs; /* Per-vPort max number of queues */ unsigned int max_num_queues; @@ -878,5 +880,7 @@ int mana_gd_send_request(struct gdma_context *gc, u32 req_len, const void *req, u32 resp_len, void *resp); int mana_gd_destroy_dma_region(struct gdma_context *gc, u64 dma_region_handle); +void mana_register_debugfs(void); +void mana_unregister_debugfs(void); #endif /* _GDMA_H */ diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 9b0faa24b758..0d00b24eacaf 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -350,6 +350,7 @@ struct mana_rxq { int xdp_rc; /* XDP redirect return code */ struct page_pool *page_pool; + struct dentry *mana_rx_debugfs; /* MUST BE THE LAST MEMBER: * Each receive buffer has an associated mana_recv_buf_oob. @@ -363,6 +364,8 @@ struct mana_tx_qp { struct mana_cq tx_cq; mana_handle_t tx_object; + + struct dentry *mana_tx_debugfs; }; struct mana_ethtool_stats { @@ -407,6 +410,7 @@ struct mana_context { u16 num_ports; struct mana_eq *eqs; + struct dentry *mana_eqs_debugfs; struct net_device *ports[MAX_PORTS_IN_MANA_DEV]; }; @@ -468,6 +472,9 @@ struct mana_port_context { bool port_st_save; /* Saved port state */ struct mana_ethtool_stats eth_stats; + + /* Debugfs */ + struct dentry *mana_port_debugfs; }; netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev); @@ -494,6 +501,7 @@ int mana_pre_alloc_rxbufs(struct mana_port_context *apc, int mtu, int num_queues void mana_pre_dealloc_rxbufs(struct mana_port_context *apc); extern const struct ethtool_ops mana_ethtool_ops; +extern struct dentry *mana_debugfs_root; /* A CQ can be created not associated with any EQ */ #define GDMA_CQ_NO_EQ 0xffff -- cgit v1.2.3 From 823a566221a5639f6c69424897218e5d6431a970 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Wed, 9 Oct 2024 11:20:31 +0200 Subject: locking/ww_mutex: Adjust to lockdep nest_lock requirements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When using mutex_acquire_nest() with a nest_lock, lockdep refcounts the number of acquired lockdep_maps of mutexes of the same class, and also keeps a pointer to the first acquired lockdep_map of a class. That pointer is then used for various comparison-, printing- and checking purposes, but there is no mechanism to actively ensure that lockdep_map stays in memory. Instead, a warning is printed if the lockdep_map is freed and there are still held locks of the same lock class, even if the lockdep_map itself has been released. In the context of WW/WD transactions that means that if a user unlocks and frees a ww_mutex from within an ongoing ww transaction, and that mutex happens to be the first ww_mutex grabbed in the transaction, such a warning is printed and there might be a risk of a UAF. Note that this is only problem when lockdep is enabled and affects only dereferences of struct lockdep_map. Adjust to this by adding a fake lockdep_map to the acquired context and make sure it is the first acquired lockdep map of the associated ww_mutex class. Then hold it for the duration of the WW/WD transaction. This has the side effect that trying to lock a ww mutex *without* a ww_acquire_context but where a such context has been acquire, we'd see a lockdep splat. The test-ww_mutex.c selftest attempts to do that, so modify that particular test to not acquire a ww_acquire_context if it is not going to be used. Signed-off-by: Thomas Hellström Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20241009092031.6356-1-thomas.hellstrom@linux.intel.com --- include/linux/ww_mutex.h | 14 ++++++++++++++ kernel/locking/test-ww_mutex.c | 8 +++++--- 2 files changed, 19 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index bb763085479a..a401a2f31a77 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -65,6 +65,16 @@ struct ww_acquire_ctx { #endif #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; + /** + * @first_lock_dep_map: fake lockdep_map for first locked ww_mutex. + * + * lockdep requires the lockdep_map for the first locked ww_mutex + * in a ww transaction to remain in memory until all ww_mutexes of + * the transaction have been unlocked. Ensure this by keeping a + * fake locked ww_mutex lockdep map between ww_acquire_init() and + * ww_acquire_fini(). + */ + struct lockdep_map first_lock_dep_map; #endif #ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH unsigned int deadlock_inject_interval; @@ -146,7 +156,10 @@ static inline void ww_acquire_init(struct ww_acquire_ctx *ctx, debug_check_no_locks_freed((void *)ctx, sizeof(*ctx)); lockdep_init_map(&ctx->dep_map, ww_class->acquire_name, &ww_class->acquire_key, 0); + lockdep_init_map(&ctx->first_lock_dep_map, ww_class->mutex_name, + &ww_class->mutex_key, 0); mutex_acquire(&ctx->dep_map, 0, 0, _RET_IP_); + mutex_acquire_nest(&ctx->first_lock_dep_map, 0, 0, &ctx->dep_map, _RET_IP_); #endif #ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH ctx->deadlock_inject_interval = 1; @@ -185,6 +198,7 @@ static inline void ww_acquire_done(struct ww_acquire_ctx *ctx) static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx) { #ifdef CONFIG_DEBUG_LOCK_ALLOC + mutex_release(&ctx->first_lock_dep_map, _THIS_IP_); mutex_release(&ctx->dep_map, _THIS_IP_); #endif #ifdef DEBUG_WW_MUTEXES diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c index 10a5736a21c2..5d58b2c0ef98 100644 --- a/kernel/locking/test-ww_mutex.c +++ b/kernel/locking/test-ww_mutex.c @@ -62,7 +62,8 @@ static int __test_mutex(unsigned int flags) int ret; ww_mutex_init(&mtx.mutex, &ww_class); - ww_acquire_init(&ctx, &ww_class); + if (flags & TEST_MTX_CTX) + ww_acquire_init(&ctx, &ww_class); INIT_WORK_ONSTACK(&mtx.work, test_mutex_work); init_completion(&mtx.ready); @@ -90,7 +91,8 @@ static int __test_mutex(unsigned int flags) ret = wait_for_completion_timeout(&mtx.done, TIMEOUT); } ww_mutex_unlock(&mtx.mutex); - ww_acquire_fini(&ctx); + if (flags & TEST_MTX_CTX) + ww_acquire_fini(&ctx); if (ret) { pr_err("%s(flags=%x): mutual exclusion failure\n", @@ -679,7 +681,7 @@ static int __init test_ww_mutex_init(void) if (ret) return ret; - ret = stress(2047, hweight32(STRESS_ALL)*ncpus, STRESS_ALL); + ret = stress(2046, hweight32(STRESS_ALL)*ncpus, STRESS_ALL); if (ret) return ret; -- cgit v1.2.3 From 5461f3fd74a89757f95f351eb0bc26aafc2a2e91 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 20 Sep 2024 00:27:58 +0100 Subject: backlight: Remove notifier backlight_register_notifier and backlight_unregister_notifier have been unused since commit 6cb634d0dc85 ("ACPI: video: Remove code to unregister acpi_video backlight when a native backlight registers") With those not being called, it means that the backlight_notifier list is always empty. Remove the functions, the list itself and the enum used in the notifications. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Daniel Thompson Reviewed-by: Simona Vetter Link: https://lore.kernel.org/r/20240919232758.639925-1-linux@treblig.org Signed-off-by: Lee Jones --- drivers/video/backlight/backlight.c | 42 ------------------------------------- include/linux/backlight.h | 20 ------------------ 2 files changed, 62 deletions(-) (limited to 'include') diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c index a82934694d05..f699e5827ccb 100644 --- a/drivers/video/backlight/backlight.c +++ b/drivers/video/backlight/backlight.c @@ -65,7 +65,6 @@ static struct list_head backlight_dev_list; static struct mutex backlight_dev_list_mutex; -static struct blocking_notifier_head backlight_notifier; static const char *const backlight_types[] = { [BACKLIGHT_RAW] = "raw", @@ -467,9 +466,6 @@ struct backlight_device *backlight_device_register(const char *name, list_add(&new_bd->entry, &backlight_dev_list); mutex_unlock(&backlight_dev_list_mutex); - blocking_notifier_call_chain(&backlight_notifier, - BACKLIGHT_REGISTERED, new_bd); - return new_bd; } EXPORT_SYMBOL(backlight_device_register); @@ -539,9 +535,6 @@ void backlight_device_unregister(struct backlight_device *bd) mutex_unlock(&pmac_backlight_mutex); #endif - blocking_notifier_call_chain(&backlight_notifier, - BACKLIGHT_UNREGISTERED, bd); - mutex_lock(&bd->ops_lock); bd->ops = NULL; mutex_unlock(&bd->ops_lock); @@ -566,40 +559,6 @@ static int devm_backlight_device_match(struct device *dev, void *res, return *r == data; } -/** - * backlight_register_notifier - get notified of backlight (un)registration - * @nb: notifier block with the notifier to call on backlight (un)registration - * - * Register a notifier to get notified when backlight devices get registered - * or unregistered. - * - * RETURNS: - * - * 0 on success, otherwise a negative error code - */ -int backlight_register_notifier(struct notifier_block *nb) -{ - return blocking_notifier_chain_register(&backlight_notifier, nb); -} -EXPORT_SYMBOL(backlight_register_notifier); - -/** - * backlight_unregister_notifier - unregister a backlight notifier - * @nb: notifier block to unregister - * - * Register a notifier to get notified when backlight devices get registered - * or unregistered. - * - * RETURNS: - * - * 0 on success, otherwise a negative error code - */ -int backlight_unregister_notifier(struct notifier_block *nb) -{ - return blocking_notifier_chain_unregister(&backlight_notifier, nb); -} -EXPORT_SYMBOL(backlight_unregister_notifier); - /** * devm_backlight_device_register - register a new backlight device * @dev: the device to register @@ -767,7 +726,6 @@ static int __init backlight_class_init(void) INIT_LIST_HEAD(&backlight_dev_list); mutex_init(&backlight_dev_list_mutex); - BLOCKING_INIT_NOTIFIER_HEAD(&backlight_notifier); return 0; } diff --git a/include/linux/backlight.h b/include/linux/backlight.h index ea9c1bc8148e..f5652e5a9060 100644 --- a/include/linux/backlight.h +++ b/include/linux/backlight.h @@ -66,24 +66,6 @@ enum backlight_type { BACKLIGHT_TYPE_MAX, }; -/** - * enum backlight_notification - the type of notification - * - * The notifications that is used for notification sent to the receiver - * that registered notifications using backlight_register_notifier(). - */ -enum backlight_notification { - /** - * @BACKLIGHT_REGISTERED: The backlight device is registered. - */ - BACKLIGHT_REGISTERED, - - /** - * @BACKLIGHT_UNREGISTERED: The backlight revice is unregistered. - */ - BACKLIGHT_UNREGISTERED, -}; - /** enum backlight_scale - the type of scale used for brightness values * * The type of scale used for brightness values. @@ -421,8 +403,6 @@ void devm_backlight_device_unregister(struct device *dev, struct backlight_device *bd); void backlight_force_update(struct backlight_device *bd, enum backlight_update_reason reason); -int backlight_register_notifier(struct notifier_block *nb); -int backlight_unregister_notifier(struct notifier_block *nb); struct backlight_device *backlight_device_get_by_name(const char *name); struct backlight_device *backlight_device_get_by_type(enum backlight_type type); int backlight_device_set_brightness(struct backlight_device *bd, -- cgit v1.2.3 From 6f1067cfbee72b04fc42234f7f1588f838cec0b6 Mon Sep 17 00:00:00 2001 From: Thomas Richard Date: Tue, 1 Oct 2024 13:53:27 +0200 Subject: mfd: Add Congatec Board Controller driver Add core MFD driver for the Board Controller found on some Congatec SMARC module. This Board Controller provides functions like watchdog, GPIO, and I2C busses. This commit adds support only for the conga-SA7 module. Signed-off-by: Thomas Richard Link: https://lore.kernel.org/r/20241001-congatec-board-controller-v3-1-39ceceed5c47@bootlin.com Signed-off-by: Lee Jones --- drivers/mfd/Kconfig | 12 ++ drivers/mfd/Makefile | 1 + drivers/mfd/cgbc-core.c | 411 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/mfd/cgbc.h | 44 +++++ 4 files changed, 468 insertions(+) create mode 100644 drivers/mfd/cgbc-core.c create mode 100644 include/linux/mfd/cgbc.h (limited to 'include') diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index f9325bcce1b9..03c1e4e3eea4 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -236,6 +236,18 @@ config MFD_AXP20X_RSB components like regulators or the PEK (Power Enable Key) under the corresponding menus. +config MFD_CGBC + tristate "Congatec Board Controller" + select MFD_CORE + depends on X86 + help + This is the core driver of the Board Controller found on some Congatec + SMARC modules. The Board Controller provides functions like watchdog, + I2C busses, and GPIO controller. + + To compile this driver as a module, choose M here: the module will be + called cgbc-core. + config MFD_CROS_EC_DEV tristate "ChromeOS Embedded Controller multifunction device" select MFD_CORE diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index 2a9f91e81af8..e057d6d6faef 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -13,6 +13,7 @@ obj-$(CONFIG_MFD_SM501) += sm501.o obj-$(CONFIG_ARCH_BCM2835) += bcm2835-pm.o obj-$(CONFIG_MFD_BCM590XX) += bcm590xx.o obj-$(CONFIG_MFD_BD9571MWV) += bd9571mwv.o +obj-$(CONFIG_MFD_CGBC) += cgbc-core.o obj-$(CONFIG_MFD_CROS_EC_DEV) += cros_ec_dev.o obj-$(CONFIG_MFD_CS42L43) += cs42l43.o obj-$(CONFIG_MFD_CS42L43_I2C) += cs42l43-i2c.o diff --git a/drivers/mfd/cgbc-core.c b/drivers/mfd/cgbc-core.c new file mode 100644 index 000000000000..93004a6b29c1 --- /dev/null +++ b/drivers/mfd/cgbc-core.c @@ -0,0 +1,411 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Congatec Board Controller core driver. + * + * The x86 Congatec modules have an embedded micro controller named Board + * Controller. This Board Controller has a Watchdog timer, some GPIOs, and two + * I2C busses. + * + * Copyright (C) 2024 Bootlin + * + * Author: Thomas Richard + */ + +#include +#include +#include +#include +#include +#include +#include + +#define CGBC_IO_SESSION_BASE 0x0E20 +#define CGBC_IO_SESSION_END 0x0E30 +#define CGBC_IO_CMD_BASE 0x0E00 +#define CGBC_IO_CMD_END 0x0E10 + +#define CGBC_MASK_STATUS (BIT(6) | BIT(7)) +#define CGBC_MASK_DATA_COUNT 0x1F +#define CGBC_MASK_ERROR_CODE 0x1F + +#define CGBC_STATUS_DATA_READY 0x00 +#define CGBC_STATUS_CMD_READY BIT(6) +#define CGBC_STATUS_ERROR (BIT(6) | BIT(7)) + +#define CGBC_SESSION_CMD 0x00 +#define CGBC_SESSION_CMD_IDLE 0x00 +#define CGBC_SESSION_CMD_REQUEST 0x01 +#define CGBC_SESSION_DATA 0x01 +#define CGBC_SESSION_STATUS 0x02 +#define CGBC_SESSION_STATUS_FREE 0x03 +#define CGBC_SESSION_ACCESS 0x04 +#define CGBC_SESSION_ACCESS_GAINED 0x00 + +#define CGBC_SESSION_VALID_MIN 0x02 +#define CGBC_SESSION_VALID_MAX 0xFE + +#define CGBC_CMD_STROBE 0x00 +#define CGBC_CMD_INDEX 0x02 +#define CGBC_CMD_INDEX_CBM_MAN8 0x00 +#define CGBC_CMD_INDEX_CBM_AUTO32 0x03 +#define CGBC_CMD_DATA 0x04 +#define CGBC_CMD_ACCESS 0x0C + +#define CGBC_CMD_GET_FW_REV 0x21 + +static struct platform_device *cgbc_pdev; + +/* Wait the Board Controller is ready to receive some session commands */ +static int cgbc_wait_device(struct cgbc_device_data *cgbc) +{ + u16 status; + int ret; + + ret = readx_poll_timeout(ioread16, cgbc->io_session + CGBC_SESSION_STATUS, status, + status == CGBC_SESSION_STATUS_FREE, 0, 500000); + + if (ret || ioread32(cgbc->io_session + CGBC_SESSION_ACCESS)) + ret = -ENODEV; + + return ret; +} + +static int cgbc_session_command(struct cgbc_device_data *cgbc, u8 cmd) +{ + int ret; + u8 val; + + ret = readx_poll_timeout(ioread8, cgbc->io_session + CGBC_SESSION_CMD, val, + val == CGBC_SESSION_CMD_IDLE, 0, 100000); + if (ret) + return ret; + + iowrite8(cmd, cgbc->io_session + CGBC_SESSION_CMD); + + ret = readx_poll_timeout(ioread8, cgbc->io_session + CGBC_SESSION_CMD, val, + val == CGBC_SESSION_CMD_IDLE, 0, 100000); + if (ret) + return ret; + + ret = (int)ioread8(cgbc->io_session + CGBC_SESSION_DATA); + + iowrite8(CGBC_SESSION_STATUS_FREE, cgbc->io_session + CGBC_SESSION_STATUS); + + return ret; +} + +static int cgbc_session_request(struct cgbc_device_data *cgbc) +{ + unsigned int ret; + + ret = cgbc_wait_device(cgbc); + + if (ret) + return dev_err_probe(cgbc->dev, ret, "device not found or not ready\n"); + + cgbc->session = cgbc_session_command(cgbc, CGBC_SESSION_CMD_REQUEST); + + /* The Board Controller sent us a wrong session handle, we cannot communicate with it */ + if (cgbc->session < CGBC_SESSION_VALID_MIN || cgbc->session > CGBC_SESSION_VALID_MAX) + return dev_err_probe(cgbc->dev, -ECONNREFUSED, + "failed to get a valid session handle\n"); + + return 0; +} + +static void cgbc_session_release(struct cgbc_device_data *cgbc) +{ + if (cgbc_session_command(cgbc, cgbc->session) != cgbc->session) + dev_warn(cgbc->dev, "failed to release session\n"); +} + +static bool cgbc_command_lock(struct cgbc_device_data *cgbc) +{ + iowrite8(cgbc->session, cgbc->io_cmd + CGBC_CMD_ACCESS); + + return ioread8(cgbc->io_cmd + CGBC_CMD_ACCESS) == cgbc->session; +} + +static void cgbc_command_unlock(struct cgbc_device_data *cgbc) +{ + iowrite8(cgbc->session, cgbc->io_cmd + CGBC_CMD_ACCESS); +} + +int cgbc_command(struct cgbc_device_data *cgbc, void *cmd, unsigned int cmd_size, void *data, + unsigned int data_size, u8 *status) +{ + u8 checksum = 0, data_checksum = 0, istatus = 0, val; + u8 *_data = (u8 *)data; + u8 *_cmd = (u8 *)cmd; + int mode_change = -1; + bool lock; + int ret, i; + + mutex_lock(&cgbc->lock); + + /* Request access */ + ret = readx_poll_timeout(cgbc_command_lock, cgbc, lock, lock, 0, 100000); + if (ret) + goto out; + + /* Wait board controller is ready */ + ret = readx_poll_timeout(ioread8, cgbc->io_cmd + CGBC_CMD_STROBE, val, + val == CGBC_CMD_STROBE, 0, 100000); + if (ret) + goto release; + + /* Write command packet */ + if (cmd_size <= 2) { + iowrite8(CGBC_CMD_INDEX_CBM_MAN8, cgbc->io_cmd + CGBC_CMD_INDEX); + } else { + iowrite8(CGBC_CMD_INDEX_CBM_AUTO32, cgbc->io_cmd + CGBC_CMD_INDEX); + if ((cmd_size % 4) != 0x03) + mode_change = (cmd_size & 0xFFFC) - 1; + } + + for (i = 0; i < cmd_size; i++) { + iowrite8(_cmd[i], cgbc->io_cmd + CGBC_CMD_DATA + (i % 4)); + checksum ^= _cmd[i]; + if (mode_change == i) + iowrite8((i + 1) | CGBC_CMD_INDEX_CBM_MAN8, cgbc->io_cmd + CGBC_CMD_INDEX); + } + + /* Append checksum byte */ + iowrite8(checksum, cgbc->io_cmd + CGBC_CMD_DATA + (i % 4)); + + /* Perform command strobe */ + iowrite8(cgbc->session, cgbc->io_cmd + CGBC_CMD_STROBE); + + /* Rewind cmd buffer index */ + iowrite8(CGBC_CMD_INDEX_CBM_AUTO32, cgbc->io_cmd + CGBC_CMD_INDEX); + + /* Wait command completion */ + ret = read_poll_timeout(ioread8, val, val == CGBC_CMD_STROBE, 0, 100000, false, + cgbc->io_cmd + CGBC_CMD_STROBE); + if (ret) + goto release; + + istatus = ioread8(cgbc->io_cmd + CGBC_CMD_DATA); + checksum = istatus; + + /* Check command status */ + switch (istatus & CGBC_MASK_STATUS) { + case CGBC_STATUS_DATA_READY: + if (istatus > data_size) + istatus = data_size; + for (i = 0; i < istatus; i++) { + _data[i] = ioread8(cgbc->io_cmd + CGBC_CMD_DATA + ((i + 1) % 4)); + checksum ^= _data[i]; + } + data_checksum = ioread8(cgbc->io_cmd + CGBC_CMD_DATA + ((i + 1) % 4)); + istatus &= CGBC_MASK_DATA_COUNT; + break; + case CGBC_STATUS_ERROR: + case CGBC_STATUS_CMD_READY: + data_checksum = ioread8(cgbc->io_cmd + CGBC_CMD_DATA + 1); + if ((istatus & CGBC_MASK_STATUS) == CGBC_STATUS_ERROR) + ret = -EIO; + istatus = istatus & CGBC_MASK_ERROR_CODE; + break; + default: + data_checksum = ioread8(cgbc->io_cmd + CGBC_CMD_DATA + 1); + istatus &= CGBC_MASK_ERROR_CODE; + ret = -EIO; + break; + } + + /* Checksum verification */ + if (ret == 0 && data_checksum != checksum) + ret = -EIO; + +release: + cgbc_command_unlock(cgbc); + +out: + mutex_unlock(&cgbc->lock); + + if (status) + *status = istatus; + + return ret; +} +EXPORT_SYMBOL_GPL(cgbc_command); + +static struct mfd_cell cgbc_devs[] = { + { .name = "cgbc-wdt" }, + { .name = "cgbc-gpio" }, + { .name = "cgbc-i2c", .id = 1 }, + { .name = "cgbc-i2c", .id = 2 }, +}; + +static int cgbc_map(struct cgbc_device_data *cgbc) +{ + struct device *dev = cgbc->dev; + struct platform_device *pdev = to_platform_device(dev); + struct resource *ioport; + + ioport = platform_get_resource(pdev, IORESOURCE_IO, 0); + if (!ioport) + return -EINVAL; + + cgbc->io_session = devm_ioport_map(dev, ioport->start, resource_size(ioport)); + if (!cgbc->io_session) + return -ENOMEM; + + ioport = platform_get_resource(pdev, IORESOURCE_IO, 1); + if (!ioport) + return -EINVAL; + + cgbc->io_cmd = devm_ioport_map(dev, ioport->start, resource_size(ioport)); + if (!cgbc->io_cmd) + return -ENOMEM; + + return 0; +} + +static const struct resource cgbc_resources[] = { + { + .start = CGBC_IO_SESSION_BASE, + .end = CGBC_IO_SESSION_END, + .flags = IORESOURCE_IO, + }, + { + .start = CGBC_IO_CMD_BASE, + .end = CGBC_IO_CMD_END, + .flags = IORESOURCE_IO, + }, +}; + +static ssize_t cgbc_version_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cgbc_device_data *cgbc = dev_get_drvdata(dev); + + return sysfs_emit(buf, "CGBCP%c%c%c\n", cgbc->version.feature, cgbc->version.major, + cgbc->version.minor); +} + +static DEVICE_ATTR_RO(cgbc_version); + +static struct attribute *cgbc_attrs[] = { + &dev_attr_cgbc_version.attr, + NULL +}; + +ATTRIBUTE_GROUPS(cgbc); + +static int cgbc_get_version(struct cgbc_device_data *cgbc) +{ + u8 cmd = CGBC_CMD_GET_FW_REV; + u8 data[4]; + int ret; + + ret = cgbc_command(cgbc, &cmd, 1, &data, sizeof(data), NULL); + if (ret) + return ret; + + cgbc->version.feature = data[0]; + cgbc->version.major = data[1]; + cgbc->version.minor = data[2]; + + return 0; +} + +static int cgbc_init_device(struct cgbc_device_data *cgbc) +{ + int ret; + + ret = cgbc_session_request(cgbc); + if (ret) + return ret; + + ret = cgbc_get_version(cgbc); + if (ret) + return ret; + + return mfd_add_devices(cgbc->dev, -1, cgbc_devs, ARRAY_SIZE(cgbc_devs), NULL, 0, NULL); +} + +static int cgbc_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct cgbc_device_data *cgbc; + int ret; + + cgbc = devm_kzalloc(dev, sizeof(*cgbc), GFP_KERNEL); + if (!cgbc) + return -ENOMEM; + + cgbc->dev = dev; + + ret = cgbc_map(cgbc); + if (ret) + return ret; + + mutex_init(&cgbc->lock); + + platform_set_drvdata(pdev, cgbc); + + return cgbc_init_device(cgbc); +} + +static void cgbc_remove(struct platform_device *pdev) +{ + struct cgbc_device_data *cgbc = platform_get_drvdata(pdev); + + cgbc_session_release(cgbc); + + mfd_remove_devices(&pdev->dev); +} + +static struct platform_driver cgbc_driver = { + .driver = { + .name = "cgbc", + .dev_groups = cgbc_groups, + }, + .probe = cgbc_probe, + .remove_new = cgbc_remove, +}; + +static const struct dmi_system_id cgbc_dmi_table[] __initconst = { + { + .ident = "SA7", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "congatec"), + DMI_MATCH(DMI_BOARD_NAME, "conga-SA7"), + }, + }, + {} +}; +MODULE_DEVICE_TABLE(dmi, cgbc_dmi_table); + +static int __init cgbc_init(void) +{ + const struct dmi_system_id *id; + int ret = -ENODEV; + + id = dmi_first_match(cgbc_dmi_table); + if (IS_ERR_OR_NULL(id)) + return ret; + + cgbc_pdev = platform_device_register_simple("cgbc", PLATFORM_DEVID_NONE, cgbc_resources, + ARRAY_SIZE(cgbc_resources)); + if (IS_ERR(cgbc_pdev)) + return PTR_ERR(cgbc_pdev); + + return platform_driver_register(&cgbc_driver); +} + +static void __exit cgbc_exit(void) +{ + platform_device_unregister(cgbc_pdev); + platform_driver_unregister(&cgbc_driver); +} + +module_init(cgbc_init); +module_exit(cgbc_exit); + +MODULE_DESCRIPTION("Congatec Board Controller Core Driver"); +MODULE_AUTHOR("Thomas Richard "); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:cgbc-core"); diff --git a/include/linux/mfd/cgbc.h b/include/linux/mfd/cgbc.h new file mode 100644 index 000000000000..badbec4c7033 --- /dev/null +++ b/include/linux/mfd/cgbc.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Congatec Board Controller driver definitions + * + * Copyright (C) 2024 Bootlin + * Author: Thomas Richard + */ + +#ifndef _LINUX_MFD_CGBC_H_ + +/** + * struct cgbc_version - Board Controller device version structure + * @feature: Board Controller feature number + * @major: Board Controller major revision + * @minor: Board Controller minor revision + */ +struct cgbc_version { + unsigned char feature; + unsigned char major; + unsigned char minor; +}; + +/** + * struct cgbc_device_data - Internal representation of the Board Controller device + * @io_session: Pointer to the session IO memory + * @io_cmd: Pointer to the command IO memory + * @session: Session id returned by the Board Controller + * @dev: Pointer to kernel device structure + * @cgbc_version: Board Controller version structure + * @mutex: Board Controller mutex + */ +struct cgbc_device_data { + void __iomem *io_session; + void __iomem *io_cmd; + u8 session; + struct device *dev; + struct cgbc_version version; + struct mutex lock; +}; + +int cgbc_command(struct cgbc_device_data *cgbc, void *cmd, unsigned int cmd_size, + void *data, unsigned int data_size, u8 *status); + +#endif /*_LINUX_MFD_CGBC_H_*/ -- cgit v1.2.3 From 0e6caab8db8bc9359d1a8363e1ee9b2205ed704d Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 8 Oct 2024 21:07:11 -0400 Subject: tracing: Declare system call tracepoints with TRACE_EVENT_SYSCALL In preparation for allowing system call tracepoints to handle page faults, introduce TRACE_EVENT_SYSCALL to declare the sys_enter/sys_exit tracepoints. Move the common code between __DECLARE_TRACE and __DECLARE_TRACE_SYSCALL into __DECLARE_TRACE_COMMON. This change is not meant to alter the generated code, and only prepares the following modifications. Cc: Michael Jeanson Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Alexei Starovoitov Cc: Yonghong Song Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Mark Rutland Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Andrii Nakryiko Cc: bpf@vger.kernel.org Cc: Joel Fernandes Link: https://lore.kernel.org/20241009010718.2050182-2-mathieu.desnoyers@efficios.com Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) --- include/linux/tracepoint.h | 53 +++++++++++++++++++++++++++++++---------- include/trace/bpf_probe.h | 3 +++ include/trace/define_trace.h | 5 ++++ include/trace/events/syscalls.h | 4 ++-- include/trace/perf.h | 3 +++ include/trace/trace_events.h | 28 ++++++++++++++++++++++ 6 files changed, 81 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 3d33b9872cec..76e441b39a96 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -197,7 +197,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) * it_func[0] is never NULL because there is at least one element in the array * when the array itself is non NULL. */ -#define __DO_TRACE(name, args, cond) \ +#define __DO_TRACE(name, args, cond, syscall) \ do { \ int __maybe_unused __idx = 0; \ \ @@ -222,21 +222,10 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) * site if it is not watching, as it will need to be active when the * tracepoint is enabled. */ -#define __DECLARE_TRACE(name, proto, args, cond, data_proto) \ +#define __DECLARE_TRACE_COMMON(name, proto, args, cond, data_proto) \ extern int __traceiter_##name(data_proto); \ DECLARE_STATIC_CALL(tp_func_##name, __traceiter_##name); \ extern struct tracepoint __tracepoint_##name; \ - static inline void trace_##name(proto) \ - { \ - if (static_branch_unlikely(&__tracepoint_##name.key)) \ - __DO_TRACE(name, \ - TP_ARGS(args), \ - TP_CONDITION(cond)); \ - if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \ - WARN_ONCE(!rcu_is_watching(), \ - "RCU not watching for tracepoint"); \ - } \ - } \ static inline int \ register_trace_##name(void (*probe)(data_proto), void *data) \ { \ @@ -266,6 +255,34 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) return static_branch_unlikely(&__tracepoint_##name.key);\ } +#define __DECLARE_TRACE(name, proto, args, cond, data_proto) \ + __DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), cond, PARAMS(data_proto)) \ + static inline void trace_##name(proto) \ + { \ + if (static_branch_unlikely(&__tracepoint_##name.key)) \ + __DO_TRACE(name, \ + TP_ARGS(args), \ + TP_CONDITION(cond), 0); \ + if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \ + WARN_ONCE(!rcu_is_watching(), \ + "RCU not watching for tracepoint"); \ + } \ + } + +#define __DECLARE_TRACE_SYSCALL(name, proto, args, cond, data_proto) \ + __DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), cond, PARAMS(data_proto)) \ + static inline void trace_##name(proto) \ + { \ + if (static_branch_unlikely(&__tracepoint_##name.key)) \ + __DO_TRACE(name, \ + TP_ARGS(args), \ + TP_CONDITION(cond), 1); \ + if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \ + WARN_ONCE(!rcu_is_watching(), \ + "RCU not watching for tracepoint"); \ + } \ + } + /* * We have no guarantee that gcc and the linker won't up-align the tracepoint * structures, so we create an array of pointers that will be used for iteration @@ -348,6 +365,8 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) return false; \ } +#define __DECLARE_TRACE_SYSCALL __DECLARE_TRACE + #define DEFINE_TRACE_FN(name, reg, unreg, proto, args) #define DEFINE_TRACE(name, proto, args) #define EXPORT_TRACEPOINT_SYMBOL_GPL(name) @@ -409,6 +428,11 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) cpu_online(raw_smp_processor_id()) && (PARAMS(cond)), \ PARAMS(void *__data, proto)) +#define DECLARE_TRACE_SYSCALL(name, proto, args) \ + __DECLARE_TRACE_SYSCALL(name, PARAMS(proto), PARAMS(args), \ + cpu_online(raw_smp_processor_id()), \ + PARAMS(void *__data, proto)) + #define TRACE_EVENT_FLAGS(event, flag) #define TRACE_EVENT_PERF_PERM(event, expr...) @@ -546,6 +570,9 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) struct, assign, print) \ DECLARE_TRACE_CONDITION(name, PARAMS(proto), \ PARAMS(args), PARAMS(cond)) +#define TRACE_EVENT_SYSCALL(name, proto, args, struct, assign, \ + print, reg, unreg) \ + DECLARE_TRACE_SYSCALL(name, PARAMS(proto), PARAMS(args)) #define TRACE_EVENT_FLAGS(event, flag) diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h index a2ea11cc912e..c85bbce5aaa5 100644 --- a/include/trace/bpf_probe.h +++ b/include/trace/bpf_probe.h @@ -53,6 +53,9 @@ __bpf_trace_##call(void *__data, proto) \ #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ __BPF_DECLARE_TRACE(call, PARAMS(proto), PARAMS(args)) +#undef DECLARE_EVENT_SYSCALL_CLASS +#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS + /* * This part is compiled out, it is only here as a build time check * to make sure that if the tracepoint handling changes, the diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index 00723935dcc7..ff5fa17a6259 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -46,6 +46,10 @@ assign, print, reg, unreg) \ DEFINE_TRACE_FN(name, reg, unreg, PARAMS(proto), PARAMS(args)) +#undef TRACE_EVENT_SYSCALL +#define TRACE_EVENT_SYSCALL(name, proto, args, struct, assign, print, reg, unreg) \ + DEFINE_TRACE_FN(name, reg, unreg, PARAMS(proto), PARAMS(args)) + #undef TRACE_EVENT_NOP #define TRACE_EVENT_NOP(name, proto, args, struct, assign, print) @@ -107,6 +111,7 @@ #undef TRACE_EVENT #undef TRACE_EVENT_FN #undef TRACE_EVENT_FN_COND +#undef TRACE_EVENT_SYSCALL #undef TRACE_EVENT_CONDITION #undef TRACE_EVENT_NOP #undef DEFINE_EVENT_NOP diff --git a/include/trace/events/syscalls.h b/include/trace/events/syscalls.h index b6e0cbc2c71f..f31ff446b468 100644 --- a/include/trace/events/syscalls.h +++ b/include/trace/events/syscalls.h @@ -15,7 +15,7 @@ #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS -TRACE_EVENT_FN(sys_enter, +TRACE_EVENT_SYSCALL(sys_enter, TP_PROTO(struct pt_regs *regs, long id), @@ -41,7 +41,7 @@ TRACE_EVENT_FN(sys_enter, TRACE_EVENT_FLAGS(sys_enter, TRACE_EVENT_FL_CAP_ANY) -TRACE_EVENT_FN(sys_exit, +TRACE_EVENT_SYSCALL(sys_exit, TP_PROTO(struct pt_regs *regs, long ret), diff --git a/include/trace/perf.h b/include/trace/perf.h index 2c11181c82e0..ded997af481e 100644 --- a/include/trace/perf.h +++ b/include/trace/perf.h @@ -55,6 +55,9 @@ perf_trace_##call(void *__data, proto) \ head, __task); \ } +#undef DECLARE_EVENT_SYSCALL_CLASS +#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS + /* * This part is compiled out, it is only here as a build time check * to make sure that if the tracepoint handling changes, the diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index c2f9cabf154d..8bcbb9ee44de 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -45,6 +45,16 @@ PARAMS(print)); \ DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args)); +#undef TRACE_EVENT_SYSCALL +#define TRACE_EVENT_SYSCALL(name, proto, args, tstruct, assign, print, reg, unreg) \ + DECLARE_EVENT_SYSCALL_CLASS(name, \ + PARAMS(proto), \ + PARAMS(args), \ + PARAMS(tstruct), \ + PARAMS(assign), \ + PARAMS(print)); \ + DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args)); + #include "stages/stage1_struct_define.h" #undef DECLARE_EVENT_CLASS @@ -57,6 +67,9 @@ \ static struct trace_event_class event_class_##name; +#undef DECLARE_EVENT_SYSCALL_CLASS +#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS + #undef DEFINE_EVENT #define DEFINE_EVENT(template, name, proto, args) \ static struct trace_event_call __used \ @@ -117,6 +130,9 @@ tstruct; \ }; +#undef DECLARE_EVENT_SYSCALL_CLASS +#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS + #undef DEFINE_EVENT #define DEFINE_EVENT(template, name, proto, args) @@ -208,6 +224,9 @@ static struct trace_event_functions trace_event_type_funcs_##call = { \ .trace = trace_raw_output_##call, \ }; +#undef DECLARE_EVENT_SYSCALL_CLASS +#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS + #undef DEFINE_EVENT_PRINT #define DEFINE_EVENT_PRINT(template, call, proto, args, print) \ static notrace enum print_line_t \ @@ -265,6 +284,9 @@ static inline notrace int trace_event_get_offsets_##call( \ return __data_size; \ } +#undef DECLARE_EVENT_SYSCALL_CLASS +#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -409,6 +431,9 @@ trace_event_raw_event_##call(void *__data, proto) \ * fail to compile unless it too is updated. */ +#undef DECLARE_EVENT_SYSCALL_CLASS +#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS + #undef DEFINE_EVENT #define DEFINE_EVENT(template, call, proto, args) \ static inline void ftrace_test_probe_##call(void) \ @@ -434,6 +459,9 @@ static struct trace_event_class __used __refdata event_class_##call = { \ _TRACE_PERF_INIT(call) \ }; +#undef DECLARE_EVENT_SYSCALL_CLASS +#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS + #undef DEFINE_EVENT #define DEFINE_EVENT(template, call, proto, args) \ \ -- cgit v1.2.3 From 13d750c2c03e9861e15268574ed2c239cca9c9d5 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 8 Oct 2024 21:07:12 -0400 Subject: tracing/ftrace: disable preemption in syscall probe In preparation for allowing system call enter/exit instrumentation to handle page faults, make sure that ftrace can handle this change by explicitly disabling preemption within the ftrace system call tracepoint probes to respect the current expectations within ftrace ring buffer code. This change does not yet allow ftrace to take page faults per se within its probe, but allows its existing probes to adapt to the upcoming change. Cc: Michael Jeanson Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Alexei Starovoitov Cc: Yonghong Song Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Mark Rutland Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Andrii Nakryiko Cc: bpf@vger.kernel.org Cc: Joel Fernandes Link: https://lore.kernel.org/20241009010718.2050182-3-mathieu.desnoyers@efficios.com Acked-by: Masami Hiramatsu (Google) Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) --- include/trace/trace_events.h | 39 ++++++++++++++++++++++++++++++++------- kernel/trace/trace_syscalls.c | 12 ++++++++++++ 2 files changed, 44 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index 8bcbb9ee44de..63071aa5923d 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -263,6 +263,9 @@ static struct trace_event_fields trace_event_fields_##call[] = { \ tstruct \ {} }; +#undef DECLARE_EVENT_SYSCALL_CLASS +#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS + #undef DEFINE_EVENT_PRINT #define DEFINE_EVENT_PRINT(template, name, proto, args, print) @@ -396,11 +399,11 @@ static inline notrace int trace_event_get_offsets_##call( \ #include "stages/stage6_event_callback.h" -#undef DECLARE_EVENT_CLASS -#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ - \ + +#undef __DECLARE_EVENT_CLASS +#define __DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ static notrace void \ -trace_event_raw_event_##call(void *__data, proto) \ +do_trace_event_raw_event_##call(void *__data, proto) \ { \ struct trace_event_file *trace_file = __data; \ struct trace_event_data_offsets_##call __maybe_unused __data_offsets;\ @@ -425,15 +428,35 @@ trace_event_raw_event_##call(void *__data, proto) \ \ trace_event_buffer_commit(&fbuffer); \ } + +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ +__DECLARE_EVENT_CLASS(call, PARAMS(proto), PARAMS(args), PARAMS(tstruct), \ + PARAMS(assign), PARAMS(print)) \ +static notrace void \ +trace_event_raw_event_##call(void *__data, proto) \ +{ \ + do_trace_event_raw_event_##call(__data, args); \ +} + +#undef DECLARE_EVENT_SYSCALL_CLASS +#define DECLARE_EVENT_SYSCALL_CLASS(call, proto, args, tstruct, assign, print) \ +__DECLARE_EVENT_CLASS(call, PARAMS(proto), PARAMS(args), PARAMS(tstruct), \ + PARAMS(assign), PARAMS(print)) \ +static notrace void \ +trace_event_raw_event_##call(void *__data, proto) \ +{ \ + preempt_disable_notrace(); \ + do_trace_event_raw_event_##call(__data, args); \ + preempt_enable_notrace(); \ +} + /* * The ftrace_test_probe is compiled out, it is only here as a build time check * to make sure that if the tracepoint handling changes, the ftrace probe will * fail to compile unless it too is updated. */ -#undef DECLARE_EVENT_SYSCALL_CLASS -#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS - #undef DEFINE_EVENT #define DEFINE_EVENT(template, call, proto, args) \ static inline void ftrace_test_probe_##call(void) \ @@ -443,6 +466,8 @@ static inline void ftrace_test_probe_##call(void) \ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) +#undef __DECLARE_EVENT_CLASS + #include "stages/stage7_class_define.h" #undef DECLARE_EVENT_CLASS diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 785733245ead..f9b21bac9d45 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -299,6 +299,12 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) int syscall_nr; int size; + /* + * Syscall probe called with preemption enabled, but the ring + * buffer and per-cpu data require preemption to be disabled. + */ + guard(preempt_notrace)(); + syscall_nr = trace_get_syscall_nr(current, regs); if (syscall_nr < 0 || syscall_nr >= NR_syscalls) return; @@ -338,6 +344,12 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) struct trace_event_buffer fbuffer; int syscall_nr; + /* + * Syscall probe called with preemption enabled, but the ring + * buffer and per-cpu data require preemption to be disabled. + */ + guard(preempt_notrace)(); + syscall_nr = trace_get_syscall_nr(current, regs); if (syscall_nr < 0 || syscall_nr >= NR_syscalls) return; -- cgit v1.2.3 From 65e7462a16cea593025ca3b34c5d74e69b027ee0 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 8 Oct 2024 21:07:13 -0400 Subject: tracing/perf: disable preemption in syscall probe In preparation for allowing system call enter/exit instrumentation to handle page faults, make sure that perf can handle this change by explicitly disabling preemption within the perf system call tracepoint probes to respect the current expectations within perf ring buffer code. This change does not yet allow perf to take page faults per se within its probe, but allows its existing probes to adapt to the upcoming change. Cc: Michael Jeanson Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Alexei Starovoitov Cc: Yonghong Song Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Mark Rutland Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Andrii Nakryiko Cc: bpf@vger.kernel.org Cc: Joel Fernandes Link: https://lore.kernel.org/20241009010718.2050182-4-mathieu.desnoyers@efficios.com Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) --- include/trace/perf.h | 42 ++++++++++++++++++++++++++++++++++++++---- kernel/trace/trace_syscalls.c | 12 ++++++++++++ 2 files changed, 50 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/trace/perf.h b/include/trace/perf.h index ded997af481e..15cde7eac8b4 100644 --- a/include/trace/perf.h +++ b/include/trace/perf.h @@ -12,10 +12,10 @@ #undef __perf_task #define __perf_task(t) (__task = (t)) -#undef DECLARE_EVENT_CLASS -#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ +#undef __DECLARE_EVENT_CLASS +#define __DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ static notrace void \ -perf_trace_##call(void *__data, proto) \ +do_perf_trace_##call(void *__data, proto) \ { \ struct trace_event_call *event_call = __data; \ struct trace_event_data_offsets_##call __maybe_unused __data_offsets;\ @@ -55,8 +55,39 @@ perf_trace_##call(void *__data, proto) \ head, __task); \ } +/* + * Define unused __count and __task variables to use @args to pass + * arguments to do_perf_trace_##call. This is needed because the + * macros __perf_count and __perf_task introduce the side-effect to + * store copies into those local variables. + */ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ +__DECLARE_EVENT_CLASS(call, PARAMS(proto), PARAMS(args), PARAMS(tstruct), \ + PARAMS(assign), PARAMS(print)) \ +static notrace void \ +perf_trace_##call(void *__data, proto) \ +{ \ + u64 __count __attribute__((unused)); \ + struct task_struct *__task __attribute__((unused)); \ + \ + do_perf_trace_##call(__data, args); \ +} + #undef DECLARE_EVENT_SYSCALL_CLASS -#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS +#define DECLARE_EVENT_SYSCALL_CLASS(call, proto, args, tstruct, assign, print) \ +__DECLARE_EVENT_CLASS(call, PARAMS(proto), PARAMS(args), PARAMS(tstruct), \ + PARAMS(assign), PARAMS(print)) \ +static notrace void \ +perf_trace_##call(void *__data, proto) \ +{ \ + u64 __count __attribute__((unused)); \ + struct task_struct *__task __attribute__((unused)); \ + \ + preempt_disable_notrace(); \ + do_perf_trace_##call(__data, args); \ + preempt_enable_notrace(); \ +} /* * This part is compiled out, it is only here as a build time check @@ -76,4 +107,7 @@ static inline void perf_test_probe_##call(void) \ DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +#undef __DECLARE_EVENT_CLASS + #endif /* CONFIG_PERF_EVENTS */ diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index f9b21bac9d45..b1cc19806f3d 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -596,6 +596,12 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) int rctx; int size; + /* + * Syscall probe called with preemption enabled, but the ring + * buffer and per-cpu data require preemption to be disabled. + */ + guard(preempt_notrace)(); + syscall_nr = trace_get_syscall_nr(current, regs); if (syscall_nr < 0 || syscall_nr >= NR_syscalls) return; @@ -698,6 +704,12 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) int rctx; int size; + /* + * Syscall probe called with preemption enabled, but the ring + * buffer and per-cpu data require preemption to be disabled. + */ + guard(preempt_notrace)(); + syscall_nr = trace_get_syscall_nr(current, regs); if (syscall_nr < 0 || syscall_nr >= NR_syscalls) return; -- cgit v1.2.3 From 4aadde89d81fbf4cf3105a61dbc48888b819ecfb Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 8 Oct 2024 21:07:14 -0400 Subject: tracing/bpf: disable preemption in syscall probe In preparation for allowing system call enter/exit instrumentation to handle page faults, make sure that bpf can handle this change by explicitly disabling preemption within the bpf system call tracepoint probes to respect the current expectations within bpf tracing code. This change does not yet allow bpf to take page faults per se within its probe, but allows its existing probes to adapt to the upcoming change. Cc: Michael Jeanson Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Alexei Starovoitov Cc: Yonghong Song Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Mark Rutland Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Andrii Nakryiko Cc: bpf@vger.kernel.org Cc: Joel Fernandes Link: https://lore.kernel.org/20241009010718.2050182-5-mathieu.desnoyers@efficios.com Acked-by: Andrii Nakryiko Tested-by: Andrii Nakryiko # BPF parts Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) --- include/trace/bpf_probe.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h index c85bbce5aaa5..fec97c93e1c9 100644 --- a/include/trace/bpf_probe.h +++ b/include/trace/bpf_probe.h @@ -53,8 +53,18 @@ __bpf_trace_##call(void *__data, proto) \ #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ __BPF_DECLARE_TRACE(call, PARAMS(proto), PARAMS(args)) +#define __BPF_DECLARE_TRACE_SYSCALL(call, proto, args) \ +static notrace void \ +__bpf_trace_##call(void *__data, proto) \ +{ \ + preempt_disable_notrace(); \ + CONCATENATE(bpf_trace_run, COUNT_ARGS(args))(__data, CAST_TO_U64(args)); \ + preempt_enable_notrace(); \ +} + #undef DECLARE_EVENT_SYSCALL_CLASS -#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS +#define DECLARE_EVENT_SYSCALL_CLASS(call, proto, args, tstruct, assign, print) \ + __BPF_DECLARE_TRACE_SYSCALL(call, PARAMS(proto), PARAMS(args)) /* * This part is compiled out, it is only here as a build time check -- cgit v1.2.3 From a363d27cdbc2bc2d1899b5a1520b64e3590fcd9a Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 8 Oct 2024 21:07:15 -0400 Subject: tracing: Allow system call tracepoints to handle page faults Use Tasks Trace RCU to protect iteration of system call enter/exit tracepoint probes to allow those probes to handle page faults. In preparation for this change, all tracers registering to system call enter/exit tracepoints should expect those to be called with preemption enabled. This allows tracers to fault-in userspace system call arguments such as path strings within their probe callbacks. Cc: Michael Jeanson Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Alexei Starovoitov Cc: Yonghong Song Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Mark Rutland Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Andrii Nakryiko Cc: bpf@vger.kernel.org Cc: Joel Fernandes Link: https://lore.kernel.org/20241009010718.2050182-6-mathieu.desnoyers@efficios.com Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) --- include/linux/tracepoint.h | 18 ++++++++++++++++-- init/Kconfig | 1 + 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 76e441b39a96..0dc67fad706c 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -107,6 +108,7 @@ void for_each_tracepoint_in_module(struct module *mod, #ifdef CONFIG_TRACEPOINTS static inline void tracepoint_synchronize_unregister(void) { + synchronize_rcu_tasks_trace(); synchronize_rcu(); } #else @@ -196,6 +198,12 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) /* * it_func[0] is never NULL because there is at least one element in the array * when the array itself is non NULL. + * + * With @syscall=0, the tracepoint callback array dereference is + * protected by disabling preemption. + * With @syscall=1, the tracepoint callback array dereference is + * protected by Tasks Trace RCU, which allows probes to handle page + * faults. */ #define __DO_TRACE(name, args, cond, syscall) \ do { \ @@ -204,11 +212,17 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) if (!(cond)) \ return; \ \ - preempt_disable_notrace(); \ + if (syscall) \ + rcu_read_lock_trace(); \ + else \ + preempt_disable_notrace(); \ \ __DO_TRACE_CALL(name, TP_ARGS(args)); \ \ - preempt_enable_notrace(); \ + if (syscall) \ + rcu_read_unlock_trace(); \ + else \ + preempt_enable_notrace(); \ } while (0) /* diff --git a/init/Kconfig b/init/Kconfig index 530a382ee0fe..4ac3d1b48278 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1985,6 +1985,7 @@ config BINDGEN_VERSION_TEXT # config TRACEPOINTS bool + select TASKS_TRACE_RCU source "kernel/Kconfig.kexec" -- cgit v1.2.3 From a3204c740a59bebb3b37a294d83f4e353303a52c Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 8 Oct 2024 21:07:16 -0400 Subject: tracing/ftrace: Add might_fault check to syscall probes Add a might_fault() check to validate that the ftrace sys_enter/sys_exit probe callbacks are indeed called from a context where page faults can be handled. Cc: Michael Jeanson Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Alexei Starovoitov Cc: Yonghong Song Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Mark Rutland Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Andrii Nakryiko Cc: bpf@vger.kernel.org Cc: Joel Fernandes Link: https://lore.kernel.org/20241009010718.2050182-7-mathieu.desnoyers@efficios.com Acked-by: Masami Hiramatsu (Google) Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) --- include/trace/trace_events.h | 1 + kernel/trace/trace_syscalls.c | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index 63071aa5923d..4f22136fd465 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -446,6 +446,7 @@ __DECLARE_EVENT_CLASS(call, PARAMS(proto), PARAMS(args), PARAMS(tstruct), \ static notrace void \ trace_event_raw_event_##call(void *__data, proto) \ { \ + might_fault(); \ preempt_disable_notrace(); \ do_trace_event_raw_event_##call(__data, args); \ preempt_enable_notrace(); \ diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index b1cc19806f3d..6d6bbd56ed92 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -303,6 +303,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) * Syscall probe called with preemption enabled, but the ring * buffer and per-cpu data require preemption to be disabled. */ + might_fault(); guard(preempt_notrace)(); syscall_nr = trace_get_syscall_nr(current, regs); @@ -348,6 +349,7 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) * Syscall probe called with preemption enabled, but the ring * buffer and per-cpu data require preemption to be disabled. */ + might_fault(); guard(preempt_notrace)(); syscall_nr = trace_get_syscall_nr(current, regs); -- cgit v1.2.3 From cdb537ac417938408ee819992f432c410f2d01a2 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 8 Oct 2024 21:07:17 -0400 Subject: tracing/perf: Add might_fault check to syscall probes Add a might_fault() check to validate that the perf sys_enter/sys_exit probe callbacks are indeed called from a context where page faults can be handled. Cc: Michael Jeanson Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Alexei Starovoitov Cc: Yonghong Song Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Mark Rutland Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Andrii Nakryiko Cc: bpf@vger.kernel.org Cc: Joel Fernandes Link: https://lore.kernel.org/20241009010718.2050182-8-mathieu.desnoyers@efficios.com Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) --- include/trace/perf.h | 1 + kernel/trace/trace_syscalls.c | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/trace/perf.h b/include/trace/perf.h index 15cde7eac8b4..a1754b73a8f5 100644 --- a/include/trace/perf.h +++ b/include/trace/perf.h @@ -84,6 +84,7 @@ perf_trace_##call(void *__data, proto) \ u64 __count __attribute__((unused)); \ struct task_struct *__task __attribute__((unused)); \ \ + might_fault(); \ preempt_disable_notrace(); \ do_perf_trace_##call(__data, args); \ preempt_enable_notrace(); \ diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 6d6bbd56ed92..46aab0ab9350 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -602,6 +602,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) * Syscall probe called with preemption enabled, but the ring * buffer and per-cpu data require preemption to be disabled. */ + might_fault(); guard(preempt_notrace)(); syscall_nr = trace_get_syscall_nr(current, regs); @@ -710,6 +711,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) * Syscall probe called with preemption enabled, but the ring * buffer and per-cpu data require preemption to be disabled. */ + might_fault(); guard(preempt_notrace)(); syscall_nr = trace_get_syscall_nr(current, regs); -- cgit v1.2.3 From 0850e1bc88b1bdc30f7f0b223a92eb22e5f06be0 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 8 Oct 2024 21:07:18 -0400 Subject: tracing/bpf: Add might_fault check to syscall probes Add a might_fault() check to validate that the bpf sys_enter/sys_exit probe callbacks are indeed called from a context where page faults can be handled. Cc: Michael Jeanson Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Alexei Starovoitov Cc: Yonghong Song Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Mark Rutland Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Andrii Nakryiko Cc: bpf@vger.kernel.org Cc: Joel Fernandes Link: https://lore.kernel.org/20241009010718.2050182-9-mathieu.desnoyers@efficios.com Acked-by: Andrii Nakryiko Tested-by: Andrii Nakryiko # BPF parts Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) --- include/trace/bpf_probe.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h index fec97c93e1c9..183fa2aa2935 100644 --- a/include/trace/bpf_probe.h +++ b/include/trace/bpf_probe.h @@ -57,6 +57,7 @@ __bpf_trace_##call(void *__data, proto) \ static notrace void \ __bpf_trace_##call(void *__data, proto) \ { \ + might_fault(); \ preempt_disable_notrace(); \ CONCATENATE(bpf_trace_run, COUNT_ARGS(args))(__data, CAST_TO_U64(args)); \ preempt_enable_notrace(); \ -- cgit v1.2.3 From 21291491e3f39d1dac6453e376f7619b21239b5e Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 9 Oct 2024 01:35:52 +0100 Subject: clk: Remove unused clk_hw_rate_is_protected clk_hw_rate_is_protected() was added in 2017's commit e55a839a7a1c ("clk: add clock protection mechanism to clk core") but has been unused. Remove it. Signed-off-by: Dr. David Alan Gilbert Link: https://lore.kernel.org/r/20241009003552.254675-1-linux@treblig.org Signed-off-by: Stephen Boyd --- drivers/clk/clk.c | 6 ------ include/linux/clk-provider.h | 1 - 2 files changed, 7 deletions(-) (limited to 'include') diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index d02451f951cf..9b45fa005030 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -608,12 +608,6 @@ bool clk_hw_is_prepared(const struct clk_hw *hw) } EXPORT_SYMBOL_GPL(clk_hw_is_prepared); -bool clk_hw_rate_is_protected(const struct clk_hw *hw) -{ - return clk_core_rate_is_protected(hw->core); -} -EXPORT_SYMBOL_GPL(clk_hw_rate_is_protected); - bool clk_hw_is_enabled(const struct clk_hw *hw) { return clk_core_is_enabled(hw->core); diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 7e43caabb54b..5cbd112d1187 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -1360,7 +1360,6 @@ unsigned long clk_hw_get_flags(const struct clk_hw *hw); (clk_hw_get_flags((hw)) & CLK_SET_RATE_PARENT) bool clk_hw_is_prepared(const struct clk_hw *hw); -bool clk_hw_rate_is_protected(const struct clk_hw *hw); bool clk_hw_is_enabled(const struct clk_hw *hw); bool __clk_is_enabled(struct clk *clk); struct clk *__clk_lookup(const char *name); -- cgit v1.2.3 From a82fcb16d977fa8ac52bdf37d5feb240366722ed Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 21 Aug 2024 17:24:28 -0700 Subject: clk: test: Add test managed of_clk_add_hw_provider() Add a test managed version of of_clk_add_hw_provider() that automatically unregisters the clk_hw provider upon test conclusion. Cc: Brendan Higgins Cc: David Gow Cc: Rae Moar Cc: Peng Fan Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20240822002433.1163814-2-sboyd@kernel.org --- drivers/clk/clk_kunit_helpers.c | 30 ++++++++++++++++++++++++++++++ include/kunit/clk.h | 4 ++++ 2 files changed, 34 insertions(+) (limited to 'include') diff --git a/drivers/clk/clk_kunit_helpers.c b/drivers/clk/clk_kunit_helpers.c index 52fd25594c96..68a28e70bb61 100644 --- a/drivers/clk/clk_kunit_helpers.c +++ b/drivers/clk/clk_kunit_helpers.c @@ -203,5 +203,35 @@ int of_clk_hw_register_kunit(struct kunit *test, struct device_node *node, struc } EXPORT_SYMBOL_GPL(of_clk_hw_register_kunit); +KUNIT_DEFINE_ACTION_WRAPPER(of_clk_del_provider_wrapper, + of_clk_del_provider, struct device_node *); + +/** + * of_clk_add_hw_provider_kunit() - Test managed of_clk_add_hw_provider() + * @test: The test context + * @np: Device node pointer associated with clock provider + * @get: Callback for decoding clk_hw + * @data: Context pointer for @get callback. + * + * Just like of_clk_add_hw_provider(), except the clk_hw provider is managed by + * the test case and is automatically unregistered after the test case + * concludes. + * + * Return: 0 on success or a negative errno value on failure. + */ +int of_clk_add_hw_provider_kunit(struct kunit *test, struct device_node *np, + struct clk_hw *(*get)(struct of_phandle_args *clkspec, void *data), + void *data) +{ + int ret; + + ret = of_clk_add_hw_provider(np, get, data); + if (ret) + return ret; + + return kunit_add_action_or_reset(test, of_clk_del_provider_wrapper, np); +} +EXPORT_SYMBOL_GPL(of_clk_add_hw_provider_kunit); + MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("KUnit helpers for clk providers and consumers"); diff --git a/include/kunit/clk.h b/include/kunit/clk.h index 73bc99cefe7b..0afae7688157 100644 --- a/include/kunit/clk.h +++ b/include/kunit/clk.h @@ -25,4 +25,8 @@ int clk_hw_register_kunit(struct kunit *test, struct device *dev, struct clk_hw int of_clk_hw_register_kunit(struct kunit *test, struct device_node *node, struct clk_hw *hw); +int of_clk_add_hw_provider_kunit(struct kunit *test, struct device_node *np, + struct clk_hw *(*get)(struct of_phandle_args *clkspec, void *data), + void *data); + #endif -- cgit v1.2.3 From 00977af42106e82bb2971c59e20d5e02c52e2a65 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 21 Aug 2024 17:24:29 -0700 Subject: of: kunit: Extract some overlay boiler plate into macros Make the lives of __of_overlay_apply_kunit() callers easier by extracting some of the boiler plate involved in referencing the DT overlays. Cc: Brendan Higgins Cc: David Gow Cc: Rae Moar Cc: Peng Fan Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20240822002433.1163814-3-sboyd@kernel.org --- include/kunit/of.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/kunit/of.h b/include/kunit/of.h index 48d4e70c9666..75a760a4e2a5 100644 --- a/include/kunit/of.h +++ b/include/kunit/of.h @@ -62,6 +62,13 @@ static inline int __of_overlay_apply_kunit(struct kunit *test, &unused); } +#define of_overlay_begin(overlay_name) __dtbo_##overlay_name##_begin +#define of_overlay_end(overlay_name) __dtbo_##overlay_name##_end + +#define OF_OVERLAY_DECLARE(overlay_name) \ + extern uint8_t of_overlay_begin(overlay_name)[]; \ + extern uint8_t of_overlay_end(overlay_name)[] \ + /** * of_overlay_apply_kunit() - Test managed of_overlay_fdt_apply() for built-in overlays * @test: test context @@ -104,12 +111,11 @@ static inline int __of_overlay_apply_kunit(struct kunit *test, */ #define of_overlay_apply_kunit(test, overlay_name) \ ({ \ - extern uint8_t __dtbo_##overlay_name##_begin[]; \ - extern uint8_t __dtbo_##overlay_name##_end[]; \ + OF_OVERLAY_DECLARE(overlay_name); \ \ __of_overlay_apply_kunit((test), \ - __dtbo_##overlay_name##_begin, \ - __dtbo_##overlay_name##_end); \ + of_overlay_begin(overlay_name), \ + of_overlay_end(overlay_name)); \ }) #endif -- cgit v1.2.3 From 2b78d30620d7f8a9f9ce312ad21200ec7a554bd9 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 7 Oct 2024 20:24:29 +0200 Subject: ipv4: Convert ip_route_use_hint() to dscp_t. Pass a dscp_t variable to ip_route_use_hint(), instead of a plain u8, to prevent accidental setting of ECN bits in ->flowi4_tos. Only ip_rcv_finish_core() actually calls ip_route_use_hint(). Use the ip4h_dscp() helper to get the DSCP from the IPv4 header. While there, modify the declaration of ip_route_use_hint() in include/net/route.h so that it matches the prototype of its implementation in net/ipv4/route.c. Signed-off-by: Guillaume Nault Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Reviewed-by: David Ahern Link: https://patch.msgid.link/c40994fdf804db7a363d04fdee01bf48dddda676.1728302212.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- include/net/route.h | 4 ++-- net/ipv4/ip_input.c | 4 ++-- net/ipv4/route.c | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/route.h b/include/net/route.h index 5e4374d66927..c219c0fecdcf 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -203,8 +203,8 @@ int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, struct in_device *in_dev, u32 *itag); int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, dscp_t dscp, struct net_device *dev); -int ip_route_use_hint(struct sk_buff *skb, __be32 dst, __be32 src, - u8 tos, struct net_device *devin, +int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, + dscp_t dscp, struct net_device *dev, const struct sk_buff *hint); static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index c0a2490eb7c1..89bb63da6852 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -325,8 +325,8 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk, drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; if (ip_can_use_hint(skb, iph, hint)) { - err = ip_route_use_hint(skb, iph->daddr, iph->saddr, iph->tos, - dev, hint); + err = ip_route_use_hint(skb, iph->daddr, iph->saddr, + ip4h_dscp(iph), dev, hint); if (unlikely(err)) goto drop_error; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6e1cd0065b87..ac03916cfcde 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2136,7 +2136,7 @@ static int ip_mkroute_input(struct sk_buff *skb, * Uses the provided hint instead of performing a route lookup. */ int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev, + dscp_t dscp, struct net_device *dev, const struct sk_buff *hint) { struct in_device *in_dev = __in_dev_get_rcu(dev); @@ -2160,8 +2160,8 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (rt->rt_type != RTN_LOCAL) goto skip_validate_source; - tos &= INET_DSCP_MASK; - err = fib_validate_source(skb, saddr, daddr, tos, 0, dev, in_dev, &tag); + err = fib_validate_source(skb, saddr, daddr, inet_dscp_to_dsfield(dscp), + 0, dev, in_dev, &tag); if (err < 0) goto martian_source; -- cgit v1.2.3 From d32976408744a589f04b5c939f8f01f7167e5167 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 7 Oct 2024 20:24:54 +0200 Subject: ipv4: Convert ip_mc_validate_source() to dscp_t. Pass a dscp_t variable to ip_mc_validate_source(), instead of a plain u8, to prevent accidental setting of ECN bits in ->flowi4_tos. Callers of ip_mc_validate_source() to consider are: * ip_route_input_mc() which already has a dscp_t variable to pass as parameter. We just need to remove the inet_dscp_to_dsfield() conversion. * udp_v4_early_demux() which gets the DSCP directly from the IPv4 header and can simply use the ip4h_dscp() helper. Also, stop including net/inet_dscp.h in udp.c as we don't use any of its declarations anymore. Signed-off-by: Guillaume Nault Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Reviewed-by: David Ahern Link: https://patch.msgid.link/c91b2cca04718b7ee6cf5b9c1d5b40507d65a8d4.1728302212.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- include/net/route.h | 3 ++- net/ipv4/route.c | 8 ++++---- net/ipv4/udp.c | 4 ++-- 3 files changed, 8 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/route.h b/include/net/route.h index c219c0fecdcf..586e59f7ed8a 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -198,8 +198,9 @@ static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4 fl4->fl4_gre_key = gre_key; return ip_route_output_key(net, fl4); } + int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev, + dscp_t dscp, struct net_device *dev, struct in_device *in_dev, u32 *itag); int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, dscp_t dscp, struct net_device *dev); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 527121be1ba2..1efb65e647c1 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1666,7 +1666,7 @@ EXPORT_SYMBOL(rt_dst_clone); /* called in rcu_read_lock() section */ int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev, + dscp_t dscp, struct net_device *dev, struct in_device *in_dev, u32 *itag) { int err; @@ -1687,7 +1687,8 @@ int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, ip_hdr(skb)->protocol != IPPROTO_IGMP) return -EINVAL; } else { - err = fib_validate_source(skb, saddr, 0, tos, 0, dev, + err = fib_validate_source(skb, saddr, 0, + inet_dscp_to_dsfield(dscp), 0, dev, in_dev, itag); if (err < 0) return err; @@ -1705,8 +1706,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, u32 itag = 0; int err; - err = ip_mc_validate_source(skb, daddr, saddr, - inet_dscp_to_dsfield(dscp), dev, in_dev, + err = ip_mc_validate_source(skb, daddr, saddr, dscp, dev, in_dev, &itag); if (err) return err; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 8accbf4cb295..4b74a25d0b6e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -100,6 +100,7 @@ #include #include #include +#include #include #include #include @@ -115,7 +116,6 @@ #include #include #include -#include #if IS_ENABLED(CONFIG_IPV6) #include #endif @@ -2619,7 +2619,7 @@ int udp_v4_early_demux(struct sk_buff *skb) if (!inet_sk(sk)->inet_daddr && in_dev) return ip_mc_validate_source(skb, iph->daddr, iph->saddr, - iph->tos & INET_DSCP_MASK, + ip4h_dscp(iph), skb->dev, in_dev, &itag); } return 0; -- cgit v1.2.3 From d36236ab52754ef6bd083be945e9c2e93f466022 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 7 Oct 2024 20:25:02 +0200 Subject: ipv4: Convert fib_validate_source() to dscp_t. Pass a dscp_t variable to fib_validate_source(), instead of a plain u8, to prevent accidental setting of ECN bits in ->flowi4_tos. All callers of fib_validate_source() already have a dscp_t variable to pass as parameter. We just need to remove the inet_dscp_to_dsfield() conversions. Signed-off-by: Guillaume Nault Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Reviewed-by: David Ahern Link: https://patch.msgid.link/08612a4519bc5a3578bb493fbaad82437ebb73dc.1728302212.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- include/net/ip_fib.h | 3 ++- net/ipv4/fib_frontend.c | 5 +++-- net/ipv4/route.c | 21 +++++++++------------ 3 files changed, 14 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 967e4dc555fa..06130933542d 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -449,8 +449,9 @@ int fib_gw_from_via(struct fib_config *cfg, struct nlattr *nla, __be32 fib_compute_spec_dst(struct sk_buff *skb); bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev); int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, - u8 tos, int oif, struct net_device *dev, + dscp_t dscp, int oif, struct net_device *dev, struct in_device *idev, u32 *itag); + #ifdef CONFIG_IP_ROUTE_CLASSID static inline int fib_num_tclassid_users(struct net *net) { diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 793e6781399a..d0fbc8c8c5e6 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -419,7 +419,7 @@ e_rpf: /* Ignore rp_filter for packets protected by IPsec. */ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, - u8 tos, int oif, struct net_device *dev, + dscp_t dscp, int oif, struct net_device *dev, struct in_device *idev, u32 *itag) { int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev); @@ -448,7 +448,8 @@ ok: } full_check: - return __fib_validate_source(skb, src, dst, tos, oif, dev, r, idev, itag); + return __fib_validate_source(skb, src, dst, inet_dscp_to_dsfield(dscp), + oif, dev, r, idev, itag); } static inline __be32 sk_extract_addr(struct sockaddr *addr) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 1efb65e647c1..a0b091a7df87 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1687,9 +1687,8 @@ int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, ip_hdr(skb)->protocol != IPPROTO_IGMP) return -EINVAL; } else { - err = fib_validate_source(skb, saddr, 0, - inet_dscp_to_dsfield(dscp), 0, dev, - in_dev, itag); + err = fib_validate_source(skb, saddr, 0, dscp, 0, dev, in_dev, + itag); if (err < 0) return err; } @@ -1786,8 +1785,8 @@ static int __mkroute_input(struct sk_buff *skb, const struct fib_result *res, return -EINVAL; } - err = fib_validate_source(skb, saddr, daddr, inet_dscp_to_dsfield(dscp), - FIB_RES_OIF(*res), in_dev->dev, in_dev, &itag); + err = fib_validate_source(skb, saddr, daddr, dscp, FIB_RES_OIF(*res), + in_dev->dev, in_dev, &itag); if (err < 0) { ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, saddr); @@ -2159,8 +2158,8 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (rt->rt_type != RTN_LOCAL) goto skip_validate_source; - err = fib_validate_source(skb, saddr, daddr, inet_dscp_to_dsfield(dscp), - 0, dev, in_dev, &tag); + err = fib_validate_source(skb, saddr, daddr, dscp, 0, dev, in_dev, + &tag); if (err < 0) goto martian_source; @@ -2298,8 +2297,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, } if (res->type == RTN_LOCAL) { - err = fib_validate_source(skb, saddr, daddr, - inet_dscp_to_dsfield(dscp), 0, dev, + err = fib_validate_source(skb, saddr, daddr, dscp, 0, dev, in_dev, &itag); if (err < 0) goto martian_source; @@ -2322,9 +2320,8 @@ brd_input: goto e_inval; if (!ipv4_is_zeronet(saddr)) { - err = fib_validate_source(skb, saddr, 0, - inet_dscp_to_dsfield(dscp), 0, dev, - in_dev, &itag); + err = fib_validate_source(skb, saddr, 0, dscp, 0, dev, in_dev, + &itag); if (err < 0) goto martian_source; } -- cgit v1.2.3 From 80c549cd1ab0241a7af262690a0ff9991fc74ec5 Mon Sep 17 00:00:00 2001 From: Alexander Zubkov Date: Tue, 8 Oct 2024 18:27:57 +0200 Subject: Fix misspelling of "accept*" in net Several files have "accept*" misspelled as "accpet*" in the comments. Fix all such occurrences. Signed-off-by: Alexander Zubkov Reviewed-by: Simon Horman Link: https://patch.msgid.link/20241008162756.22618-2-green@qrator.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c | 4 ++-- drivers/net/ethernet/natsemi/ns83820.c | 2 +- include/uapi/linux/udp.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c index 455a54708be4..96fd31d75dfd 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c @@ -342,8 +342,8 @@ static struct sk_buff *copy_gl_to_skb_pkt(const struct pkt_gl *gl, { struct sk_buff *skb; - /* Allocate space for cpl_pass_accpet_req which will be synthesized by - * driver. Once driver synthesizes cpl_pass_accpet_req the skb will go + /* Allocate space for cpl_pass_accept_req which will be synthesized by + * driver. Once driver synthesizes cpl_pass_accept_req the skb will go * through the regular cpl_pass_accept_req processing in TOM. */ skb = alloc_skb(gl->tot_len + sizeof(struct cpl_pass_accept_req) diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c index 998586872599..bea969dfa536 100644 --- a/drivers/net/ethernet/natsemi/ns83820.c +++ b/drivers/net/ethernet/natsemi/ns83820.c @@ -2090,7 +2090,7 @@ static int ns83820_init_one(struct pci_dev *pci_dev, */ /* Ramit : 1024 DMA is not a good idea, it ends up banging * some DELL and COMPAQ SMP systems - * Turn on ALP, only we are accpeting Jumbo Packets */ + * Turn on ALP, only we are accepting Jumbo Packets */ writel(RXCFG_AEP | RXCFG_ARP | RXCFG_AIRL | RXCFG_RX_FD | RXCFG_STRIPCRC //| RXCFG_ALP diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h index 1a0fe8b151fb..d85d671deed3 100644 --- a/include/uapi/linux/udp.h +++ b/include/uapi/linux/udp.h @@ -31,7 +31,7 @@ struct udphdr { #define UDP_CORK 1 /* Never send partially complete segments */ #define UDP_ENCAP 100 /* Set the socket to accept encapsulated packets */ #define UDP_NO_CHECK6_TX 101 /* Disable sending checksum for UDP6X */ -#define UDP_NO_CHECK6_RX 102 /* Disable accpeting checksum for UDP6 */ +#define UDP_NO_CHECK6_RX 102 /* Disable accepting checksum for UDP6 */ #define UDP_SEGMENT 103 /* Set GSO segmentation size */ #define UDP_GRO 104 /* This socket can receive UDP GRO packets */ -- cgit v1.2.3 From 87173021f1583ee37f4801fcde354729da8db3dc Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 8 Oct 2024 10:29:03 -0700 Subject: ipv4: Link IPv4 address to per-netns hash table. As a prep for per-netns RTNL conversion, we want to namespacify the IPv4 address hash table and the GC work. Let's allocate the per-netns IPv4 address hash table to net->ipv4.inet_addr_lst and link IPv4 addresses into it. The actual users will be converted later. Note that the IPv6 address hash table is already namespacified. Reviewed-by: Eric Dumazet Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20241008172906.1326-2-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/linux/inetdevice.h | 1 + include/net/netns/ipv4.h | 1 + net/ipv4/devinet.c | 22 +++++++++++++++++++--- 3 files changed, 21 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index cb5280e6cc21..d0c2bf67a9b0 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -142,6 +142,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) struct in_ifaddr { struct hlist_node hash; + struct hlist_node addr_lst; struct in_ifaddr __rcu *ifa_next; struct in_device *ifa_dev; struct rcu_head rcu_head; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 276f622f3516..29eba2eaaa26 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -270,5 +270,6 @@ struct netns_ipv4 { atomic_t rt_genid; siphash_key_t ip_id_key; + struct hlist_head *inet_addr_lst; }; #endif diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index ab76744383cf..059807a627a6 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -134,11 +134,13 @@ static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa) ASSERT_RTNL(); hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]); + hlist_add_head_rcu(&ifa->addr_lst, &net->ipv4.inet_addr_lst[hash]); } static void inet_hash_remove(struct in_ifaddr *ifa) { ASSERT_RTNL(); + hlist_del_init_rcu(&ifa->addr_lst); hlist_del_init_rcu(&ifa->hash); } @@ -228,6 +230,7 @@ static struct in_ifaddr *inet_alloc_ifa(struct in_device *in_dev) ifa->ifa_dev = in_dev; INIT_HLIST_NODE(&ifa->hash); + INIT_HLIST_NODE(&ifa->addr_lst); return ifa; } @@ -2663,14 +2666,21 @@ static struct ctl_table ctl_forward_entry[] = { static __net_init int devinet_init_net(struct net *net) { - int err; - struct ipv4_devconf *all, *dflt; #ifdef CONFIG_SYSCTL - struct ctl_table *tbl; struct ctl_table_header *forw_hdr; + struct ctl_table *tbl; #endif + struct ipv4_devconf *all, *dflt; + int err; + int i; err = -ENOMEM; + net->ipv4.inet_addr_lst = kmalloc_array(IN4_ADDR_HSIZE, + sizeof(struct hlist_head), + GFP_KERNEL); + if (!net->ipv4.inet_addr_lst) + goto err_alloc_hash; + all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL); if (!all) goto err_alloc_all; @@ -2731,6 +2741,9 @@ static __net_init int devinet_init_net(struct net *net) net->ipv4.forw_hdr = forw_hdr; #endif + for (i = 0; i < IN4_ADDR_HSIZE; i++) + INIT_HLIST_HEAD(&net->ipv4.inet_addr_lst[i]); + net->ipv4.devconf_all = all; net->ipv4.devconf_dflt = dflt; return 0; @@ -2748,6 +2761,8 @@ err_alloc_ctl: err_alloc_dflt: kfree(all); err_alloc_all: + kfree(net->ipv4.inet_addr_lst); +err_alloc_hash: return err; } @@ -2766,6 +2781,7 @@ static __net_exit void devinet_exit_net(struct net *net) #endif kfree(net->ipv4.devconf_dflt); kfree(net->ipv4.devconf_all); + kfree(net->ipv4.inet_addr_lst); } static __net_initdata struct pernet_operations devinet_ops = { -- cgit v1.2.3 From 1675f385213edc14ed849e079d6866b48e552252 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 8 Oct 2024 10:29:05 -0700 Subject: ipv4: Namespacify IPv4 address GC. Each IPv4 address could have a lifetime, which is useful for DHCP, and GC is periodically executed as check_lifetime_work. check_lifetime() does the actual GC under RTNL. 1. Acquire RTNL 2. Iterate inet_addr_lst 3. Remove IPv4 address if expired 4. Release RTNL Namespacifying the GC is required for per-netns RTNL, but using the per-netns hash table will shorten the time on the hash bucket iteration under RTNL. Let's add per-netns GC work and use the per-netns hash table. Reviewed-by: Eric Dumazet Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20241008172906.1326-4-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/netns/ipv4.h | 1 + net/ipv4/devinet.c | 32 ++++++++++++++++++-------------- 2 files changed, 19 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 29eba2eaaa26..66a4cffc44ee 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -271,5 +271,6 @@ struct netns_ipv4 { atomic_t rt_genid; siphash_key_t ip_id_key; struct hlist_head *inet_addr_lst; + struct delayed_work addr_chk_work; }; #endif diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index cf47b5ac061f..ac245944e89e 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -486,15 +486,12 @@ static void inet_del_ifa(struct in_device *in_dev, __inet_del_ifa(in_dev, ifap, destroy, NULL, 0); } -static void check_lifetime(struct work_struct *work); - -static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime); - static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, u32 portid, struct netlink_ext_ack *extack) { struct in_ifaddr __rcu **last_primary, **ifap; struct in_device *in_dev = ifa->ifa_dev; + struct net *net = dev_net(in_dev->dev); struct in_validator_info ivi; struct in_ifaddr *ifa1; int ret; @@ -563,8 +560,8 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, inet_hash_insert(dev_net(in_dev->dev), ifa); - cancel_delayed_work(&check_lifetime_work); - queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0); + cancel_delayed_work(&net->ipv4.addr_chk_work); + queue_delayed_work(system_power_efficient_wq, &net->ipv4.addr_chk_work, 0); /* Send message first, then call notifier. Notifier will trigger FIB update, so that @@ -710,16 +707,19 @@ static void check_lifetime(struct work_struct *work) unsigned long now, next, next_sec, next_sched; struct in_ifaddr *ifa; struct hlist_node *n; + struct net *net; int i; + net = container_of(to_delayed_work(work), struct net, ipv4.addr_chk_work); now = jiffies; next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY); for (i = 0; i < IN4_ADDR_HSIZE; i++) { + struct hlist_head *head = &net->ipv4.inet_addr_lst[i]; bool change_needed = false; rcu_read_lock(); - hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) { + hlist_for_each_entry_rcu(ifa, head, addr_lst) { unsigned long age, tstamp; u32 preferred_lft; u32 valid_lft; @@ -757,7 +757,7 @@ static void check_lifetime(struct work_struct *work) if (!change_needed) continue; rtnl_lock(); - hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) { + hlist_for_each_entry_safe(ifa, n, head, addr_lst) { unsigned long age; if (ifa->ifa_flags & IFA_F_PERMANENT) @@ -806,8 +806,8 @@ static void check_lifetime(struct work_struct *work) if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX)) next_sched = now + ADDRCONF_TIMER_FUZZ_MAX; - queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, - next_sched - now); + queue_delayed_work(system_power_efficient_wq, &net->ipv4.addr_chk_work, + next_sched - now); } static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft, @@ -1004,9 +1004,9 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, ifa->ifa_proto = new_proto; set_ifa_lifetime(ifa, valid_lft, prefered_lft); - cancel_delayed_work(&check_lifetime_work); + cancel_delayed_work(&net->ipv4.addr_chk_work); queue_delayed_work(system_power_efficient_wq, - &check_lifetime_work, 0); + &net->ipv4.addr_chk_work, 0); rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid); } return 0; @@ -2743,6 +2743,8 @@ static __net_init int devinet_init_net(struct net *net) for (i = 0; i < IN4_ADDR_HSIZE; i++) INIT_HLIST_HEAD(&net->ipv4.inet_addr_lst[i]); + INIT_DEFERRABLE_WORK(&net->ipv4.addr_chk_work, check_lifetime); + net->ipv4.devconf_all = all; net->ipv4.devconf_dflt = dflt; return 0; @@ -2769,7 +2771,11 @@ static __net_exit void devinet_exit_net(struct net *net) { #ifdef CONFIG_SYSCTL const struct ctl_table *tbl; +#endif + + cancel_delayed_work_sync(&net->ipv4.addr_chk_work); +#ifdef CONFIG_SYSCTL tbl = net->ipv4.forw_hdr->ctl_table_arg; unregister_net_sysctl_table(net->ipv4.forw_hdr); __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt, @@ -2806,8 +2812,6 @@ void __init devinet_init(void) register_pernet_subsys(&devinet_ops); register_netdevice_notifier(&ip_netdev_notifier); - queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0); - rtnl_af_register(&inet_af_ops); rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0); -- cgit v1.2.3 From 99ee348e6a41cf24b334a1bb7cde87239e8e2d95 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 8 Oct 2024 10:29:06 -0700 Subject: ipv4: Retire global IPv4 hash table inet_addr_lst. No one uses inet_addr_lst anymore, so let's remove it. Reviewed-by: Eric Dumazet Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20241008172906.1326-5-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/linux/inetdevice.h | 1 - net/ipv4/devinet.c | 10 ---------- 2 files changed, 11 deletions(-) (limited to 'include') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index d0c2bf67a9b0..d9c690c8c80b 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -141,7 +141,6 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) ARP_EVICT_NOCARRIER) struct in_ifaddr { - struct hlist_node hash; struct hlist_node addr_lst; struct in_ifaddr __rcu *ifa_next; struct in_device *ifa_dev; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index ac245944e89e..7c156f85b7d2 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -119,8 +119,6 @@ struct inet_fill_args { #define IN4_ADDR_HSIZE_SHIFT 8 #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT) -static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE]; - static u32 inet_addr_hash(const struct net *net, __be32 addr) { u32 val = (__force u32) addr ^ net_hash_mix(net); @@ -133,7 +131,6 @@ static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa) u32 hash = inet_addr_hash(net, ifa->ifa_local); ASSERT_RTNL(); - hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]); hlist_add_head_rcu(&ifa->addr_lst, &net->ipv4.inet_addr_lst[hash]); } @@ -141,7 +138,6 @@ static void inet_hash_remove(struct in_ifaddr *ifa) { ASSERT_RTNL(); hlist_del_init_rcu(&ifa->addr_lst); - hlist_del_init_rcu(&ifa->hash); } /** @@ -228,7 +224,6 @@ static struct in_ifaddr *inet_alloc_ifa(struct in_device *in_dev) in_dev_hold(in_dev); ifa->ifa_dev = in_dev; - INIT_HLIST_NODE(&ifa->hash); INIT_HLIST_NODE(&ifa->addr_lst); return ifa; @@ -2804,11 +2799,6 @@ static struct rtnl_af_ops inet_af_ops __read_mostly = { void __init devinet_init(void) { - int i; - - for (i = 0; i < IN4_ADDR_HSIZE; i++) - INIT_HLIST_HEAD(&inet_addr_lst[i]); - register_pernet_subsys(&devinet_ops); register_netdevice_notifier(&ip_netdev_notifier); -- cgit v1.2.3 From ee3283c608dfa21251b0821d7bb198c7ae3189f6 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 2 Oct 2024 17:27:16 -0400 Subject: timekeeping: Add interfaces for handling timestamps with a floor value Multigrain timestamps allow the kernel to use fine-grained timestamps when an inode's attributes is being actively observed via ->getattr(). With this support, it's possible for a file to get a fine-grained timestamp, and another modified after it to get a coarse-grained stamp that is earlier than the fine-grained time. If this happens then the files can appear to have been modified in reverse order, which breaks VFS ordering guarantees [1]. To prevent this, maintain a floor value for multigrain timestamps. Whenever a fine-grained timestamp is handed out, record it, and when later coarse-grained stamps are handed out, ensure they are not earlier than that value. If the coarse-grained timestamp is earlier than the fine-grained floor, return the floor value instead. Add a static singleton atomic64_t into timekeeper.c that is used to keep track of the latest fine-grained time ever handed out. This is tracked as a monotonic ktime_t value to ensure that it isn't affected by clock jumps. Because it is updated at different times than the rest of the timekeeper object, the floor value is managed independently of the timekeeper via a cmpxchg() operation, and sits on its own cacheline. Add two new public interfaces: - ktime_get_coarse_real_ts64_mg() fills a timespec64 with the later of the coarse-grained clock and the floor time - ktime_get_real_ts64_mg() gets the fine-grained clock value, and tries to swap it into the floor. A timespec64 is filled with the result. The floor value is global and updated via a single try_cmpxchg(). If that fails then the operation raced with a concurrent update. Any concurrent update must be later than the existing floor value, so any racing tasks can accept any resulting floor value without retrying. [1]: POSIX requires that files be stamped with realtime clock values, and makes no provision for dealing with backward clock jumps. If a backward realtime clock jump occurs, then files can appear to have been modified in reverse order. Signed-off-by: Jeff Layton Signed-off-by: Thomas Gleixner Tested-by: Randy Dunlap # documentation bits Acked-by: John Stultz Link: https://lore.kernel.org/all/20241002-mgtime-v10-1-d1c4717f5284@kernel.org Signed-off-by: Christian Brauner --- include/linux/timekeeping.h | 4 ++ kernel/time/timekeeping.c | 104 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+) (limited to 'include') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index fc12a9ba2c88..7aa85246c183 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -45,6 +45,10 @@ extern void ktime_get_real_ts64(struct timespec64 *tv); extern void ktime_get_coarse_ts64(struct timespec64 *ts); extern void ktime_get_coarse_real_ts64(struct timespec64 *ts); +/* Multigrain timestamp interfaces */ +extern void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts); +extern void ktime_get_real_ts64_mg(struct timespec64 *ts); + void getboottime64(struct timespec64 *ts); /* diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 7e6f409bf311..441792c907fa 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -114,6 +114,23 @@ static struct tk_fast tk_fast_raw ____cacheline_aligned = { .base[1] = FAST_TK_INIT, }; +/* + * Multigrain timestamps require tracking the latest fine-grained timestamp + * that has been issued, and never returning a coarse-grained timestamp that is + * earlier than that value. + * + * mg_floor represents the latest fine-grained time that has been handed out as + * a file timestamp on the system. This is tracked as a monotonic ktime_t, and + * converted to a realtime clock value on an as-needed basis. + * + * Maintaining mg_floor ensures the multigrain interfaces never issue a + * timestamp earlier than one that has been previously issued. + * + * The exception to this rule is when there is a backward realtime clock jump. If + * such an event occurs, a timestamp can appear to be earlier than a previous one. + */ +static __cacheline_aligned_in_smp atomic64_t mg_floor; + static inline void tk_normalize_xtime(struct timekeeper *tk) { while (tk->tkr_mono.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_mono.shift)) { @@ -2394,6 +2411,93 @@ void ktime_get_coarse_real_ts64(struct timespec64 *ts) } EXPORT_SYMBOL(ktime_get_coarse_real_ts64); +/** + * ktime_get_coarse_real_ts64_mg - return latter of coarse grained time or floor + * @ts: timespec64 to be filled + * + * Fetch the global mg_floor value, convert it to realtime and compare it + * to the current coarse-grained time. Fill @ts with whichever is + * latest. Note that this is a filesystem-specific interface and should be + * avoided outside of that context. + */ +void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts) +{ + struct timekeeper *tk = &tk_core.timekeeper; + u64 floor = atomic64_read(&mg_floor); + ktime_t f_real, offset, coarse; + unsigned int seq; + + do { + seq = read_seqcount_begin(&tk_core.seq); + *ts = tk_xtime(tk); + offset = tk_core.timekeeper.offs_real; + } while (read_seqcount_retry(&tk_core.seq, seq)); + + coarse = timespec64_to_ktime(*ts); + f_real = ktime_add(floor, offset); + if (ktime_after(f_real, coarse)) + *ts = ktime_to_timespec64(f_real); +} + +/** + * ktime_get_real_ts64_mg - attempt to update floor value and return result + * @ts: pointer to the timespec to be set + * + * Get a monotonic fine-grained time value and attempt to swap it into + * mg_floor. If that succeeds then accept the new floor value. If it fails + * then another task raced in during the interim time and updated the + * floor. Since any update to the floor must be later than the previous + * floor, either outcome is acceptable. + * + * Typically this will be called after calling ktime_get_coarse_real_ts64_mg(), + * and determining that the resulting coarse-grained timestamp did not effect + * a change in ctime. Any more recent floor value would effect a change to + * ctime, so there is no need to retry the atomic64_try_cmpxchg() on failure. + * + * @ts will be filled with the latest floor value, regardless of the outcome of + * the cmpxchg. Note that this is a filesystem specific interface and should be + * avoided outside of that context. + */ +void ktime_get_real_ts64_mg(struct timespec64 *ts) +{ + struct timekeeper *tk = &tk_core.timekeeper; + ktime_t old = atomic64_read(&mg_floor); + ktime_t offset, mono; + unsigned int seq; + u64 nsecs; + + do { + seq = read_seqcount_begin(&tk_core.seq); + + ts->tv_sec = tk->xtime_sec; + mono = tk->tkr_mono.base; + nsecs = timekeeping_get_ns(&tk->tkr_mono); + offset = tk_core.timekeeper.offs_real; + } while (read_seqcount_retry(&tk_core.seq, seq)); + + mono = ktime_add_ns(mono, nsecs); + + /* + * Attempt to update the floor with the new time value. As any + * update must be later then the existing floor, and would effect + * a change to ctime from the perspective of the current task, + * accept the resulting floor value regardless of the outcome of + * the swap. + */ + if (atomic64_try_cmpxchg(&mg_floor, &old, mono)) { + ts->tv_nsec = 0; + timespec64_add_ns(ts, nsecs); + } else { + /* + * Another task changed mg_floor since "old" was fetched. + * "old" has been updated with the latest value of "mg_floor". + * That value is newer than the previous floor value, which + * is enough to effect a change to ctime. Accept it. + */ + *ts = ktime_to_timespec64(ktime_add(old, offset)); + } +} + void ktime_get_coarse_ts64(struct timespec64 *ts) { struct timekeeper *tk = &tk_core.timekeeper; -- cgit v1.2.3 From 2a15385742c689a271345dcbb4c28b9c568bc7ce Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 2 Oct 2024 17:27:17 -0400 Subject: timekeeping: Add percpu counter for tracking floor swap events The mgtime_floor value is a global variable for tracking the latest fine-grained timestamp handed out. Because it's a global, track the number of times that a new floor value is assigned. Add a new percpu counter to the timekeeping code to track the number of floor swap events that have occurred. A later patch will add a debugfs file to display this counter alongside other stats involving multigrain timestamps. Signed-off-by: Jeff Layton Signed-off-by: Thomas Gleixner Tested-by: Randy Dunlap # documentation bits Link: https://lore.kernel.org/all/20241002-mgtime-v10-2-d1c4717f5284@kernel.org Signed-off-by: Christian Brauner --- include/linux/timekeeping.h | 1 + kernel/time/timekeeping.c | 1 + kernel/time/timekeeping_debug.c | 13 +++++++++++++ kernel/time/timekeeping_internal.h | 15 +++++++++++++++ 4 files changed, 30 insertions(+) (limited to 'include') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 7aa85246c183..84a035e86ac8 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -48,6 +48,7 @@ extern void ktime_get_coarse_real_ts64(struct timespec64 *ts); /* Multigrain timestamp interfaces */ extern void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts); extern void ktime_get_real_ts64_mg(struct timespec64 *ts); +extern unsigned long timekeeping_get_mg_floor_swaps(void); void getboottime64(struct timespec64 *ts); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 441792c907fa..962b2a31f015 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -2487,6 +2487,7 @@ void ktime_get_real_ts64_mg(struct timespec64 *ts) if (atomic64_try_cmpxchg(&mg_floor, &old, mono)) { ts->tv_nsec = 0; timespec64_add_ns(ts, nsecs); + timekeeping_inc_mg_floor_swaps(); } else { /* * Another task changed mg_floor since "old" was fetched. diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c index b73e8850e58d..badeb222eab9 100644 --- a/kernel/time/timekeeping_debug.c +++ b/kernel/time/timekeeping_debug.c @@ -17,6 +17,9 @@ #define NUM_BINS 32 +/* Incremented every time mg_floor is updated */ +DEFINE_PER_CPU(unsigned long, timekeeping_mg_floor_swaps); + static unsigned int sleep_time_bin[NUM_BINS] = {0}; static int tk_debug_sleep_time_show(struct seq_file *s, void *data) @@ -53,3 +56,13 @@ void tk_debug_account_sleep_time(const struct timespec64 *t) (s64)t->tv_sec, t->tv_nsec / NSEC_PER_MSEC); } +unsigned long timekeeping_get_mg_floor_swaps(void) +{ + unsigned long sum = 0; + int cpu; + + for_each_possible_cpu(cpu) + sum += data_race(per_cpu(timekeeping_mg_floor_swaps, cpu)); + + return sum; +} diff --git a/kernel/time/timekeeping_internal.h b/kernel/time/timekeeping_internal.h index 4ca2787d1642..0bbae825bc02 100644 --- a/kernel/time/timekeeping_internal.h +++ b/kernel/time/timekeeping_internal.h @@ -10,9 +10,24 @@ * timekeeping debug functions */ #ifdef CONFIG_DEBUG_FS + +DECLARE_PER_CPU(unsigned long, timekeeping_mg_floor_swaps); + +static inline void timekeeping_inc_mg_floor_swaps(void) +{ + this_cpu_inc(timekeeping_mg_floor_swaps); +} + extern void tk_debug_account_sleep_time(const struct timespec64 *t); + #else + #define tk_debug_account_sleep_time(x) + +static inline void timekeeping_inc_mg_floor_swaps(void) +{ +} + #endif #ifdef CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE -- cgit v1.2.3 From 7f2c86cba3c584c7227cddaabdf0ab54c8151e60 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 2 Oct 2024 17:27:20 -0400 Subject: fs: handle delegated timestamps in setattr_copy_mgtime An update to the inode ctime typically requires the latest clock value possible. The exception to this rule is when there is a nfsd write delegation and the server is proxying timestamps from the client. When nfsd gets a CB_GETATTR response, update the timestamp value in the inode to the values that the client is tracking. The client doesn't send a ctime value (since that's always determined by the exported filesystem), but it can send a mtime value. In the case where it does, update the ctime to a value commensurate with that instead of the current time. If ATTR_DELEG is set, then use ia_ctime value instead of setting the timestamp to the current time. With the addition of delegated timestamps, the server may receive a request to update only the atime, which doesn't involve a ctime update. Trust the ATTR_CTIME flag in the update and only update the ctime when it's set. Tested-by: Randy Dunlap # documentation bits Reviewed-by: Jan Kara Signed-off-by: Jeff Layton Link: https://lore.kernel.org/r/20241002-mgtime-v10-5-d1c4717f5284@kernel.org Signed-off-by: Christian Brauner --- fs/attr.c | 29 ++++++++++++++-------- fs/inode.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/fs.h | 2 ++ 3 files changed, 94 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/fs/attr.c b/fs/attr.c index 0309c2bd8afa..9caf63d20d03 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -286,16 +286,21 @@ static void setattr_copy_mgtime(struct inode *inode, const struct iattr *attr) unsigned int ia_valid = attr->ia_valid; struct timespec64 now; - /* - * If the ctime isn't being updated then nothing else should be - * either. - */ - if (!(ia_valid & ATTR_CTIME)) { - WARN_ON_ONCE(ia_valid & (ATTR_ATIME|ATTR_MTIME)); - return; + if (ia_valid & ATTR_CTIME) { + /* + * In the case of an update for a write delegation, we must respect + * the value in ia_ctime and not use the current time. + */ + if (ia_valid & ATTR_DELEG) + now = inode_set_ctime_deleg(inode, attr->ia_ctime); + else + now = inode_set_ctime_current(inode); + } else { + /* If ATTR_CTIME isn't set, then ATTR_MTIME shouldn't be either. */ + WARN_ON_ONCE(ia_valid & ATTR_MTIME); + now = current_time(inode); } - now = inode_set_ctime_current(inode); if (ia_valid & ATTR_ATIME_SET) inode_set_atime_to_ts(inode, attr->ia_atime); else if (ia_valid & ATTR_ATIME) @@ -354,8 +359,12 @@ void setattr_copy(struct mnt_idmap *idmap, struct inode *inode, inode_set_atime_to_ts(inode, attr->ia_atime); if (ia_valid & ATTR_MTIME) inode_set_mtime_to_ts(inode, attr->ia_mtime); - if (ia_valid & ATTR_CTIME) - inode_set_ctime_to_ts(inode, attr->ia_ctime); + if (ia_valid & ATTR_CTIME) { + if (ia_valid & ATTR_DELEG) + inode_set_ctime_deleg(inode, attr->ia_ctime); + else + inode_set_ctime_to_ts(inode, attr->ia_ctime); + } } EXPORT_SYMBOL(setattr_copy); diff --git a/fs/inode.c b/fs/inode.c index 53f56f6e1ff2..7d1ede60e549 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -2717,6 +2717,79 @@ out: } EXPORT_SYMBOL(inode_set_ctime_current); +/** + * inode_set_ctime_deleg - try to update the ctime on a delegated inode + * @inode: inode to update + * @update: timespec64 to set the ctime + * + * Attempt to atomically update the ctime on behalf of a delegation holder. + * + * The nfs server can call back the holder of a delegation to get updated + * inode attributes, including the mtime. When updating the mtime, update + * the ctime to a value at least equal to that. + * + * This can race with concurrent updates to the inode, in which + * case the update is skipped. + * + * Note that this works even when multigrain timestamps are not enabled, + * so it is used in either case. + */ +struct timespec64 inode_set_ctime_deleg(struct inode *inode, struct timespec64 update) +{ + struct timespec64 now, cur_ts; + u32 cur, old; + + /* pairs with try_cmpxchg below */ + cur = smp_load_acquire(&inode->i_ctime_nsec); + cur_ts.tv_nsec = cur & ~I_CTIME_QUERIED; + cur_ts.tv_sec = inode->i_ctime_sec; + + /* If the update is older than the existing value, skip it. */ + if (timespec64_compare(&update, &cur_ts) <= 0) + return cur_ts; + + ktime_get_coarse_real_ts64_mg(&now); + + /* Clamp the update to "now" if it's in the future */ + if (timespec64_compare(&update, &now) > 0) + update = now; + + update = timestamp_truncate(update, inode); + + /* No need to update if the values are already the same */ + if (timespec64_equal(&update, &cur_ts)) + return cur_ts; + + /* + * Try to swap the nsec value into place. If it fails, that means + * it raced with an update due to a write or similar activity. That + * stamp takes precedence, so just skip the update. + */ +retry: + old = cur; + if (try_cmpxchg(&inode->i_ctime_nsec, &cur, update.tv_nsec)) { + inode->i_ctime_sec = update.tv_sec; + mgtime_counter_inc(mg_ctime_swaps); + return update; + } + + /* + * Was the change due to another task marking the old ctime QUERIED? + * + * If so, then retry the swap. This can only happen once since + * the only way to clear I_CTIME_QUERIED is to stamp the inode + * with a new ctime. + */ + if (!(old & I_CTIME_QUERIED) && (cur == (old | I_CTIME_QUERIED))) + goto retry; + + /* Otherwise, it was a new timestamp. */ + cur_ts.tv_sec = inode->i_ctime_sec; + cur_ts.tv_nsec = cur & ~I_CTIME_QUERIED; + return cur_ts; +} +EXPORT_SYMBOL(inode_set_ctime_deleg); + /** * in_group_or_capable - check whether caller is CAP_FSETID privileged * @idmap: idmap of the mount @inode was found from diff --git a/include/linux/fs.h b/include/linux/fs.h index eff688e75f2f..ea7ed437d2b1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1544,6 +1544,8 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb, struct timespec64 current_time(struct inode *inode); struct timespec64 inode_set_ctime_current(struct inode *inode); +struct timespec64 inode_set_ctime_deleg(struct inode *inode, + struct timespec64 update); static inline time64_t inode_get_atime_sec(const struct inode *inode) { -- cgit v1.2.3 From c86e3c47187ad7fa17f8533a4142453991684b46 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 2 Oct 2024 17:27:21 -0400 Subject: fs: tracepoints around multigrain timestamp events Add some tracepoints around various multigrain timestamp events. Reviewed-by: Josef Bacik Reviewed-by: Darrick J. Wong Reviewed-by: Jan Kara Reviewed-by: Steven Rostedt (Google) Tested-by: Randy Dunlap # documentation bits Signed-off-by: Jeff Layton Link: https://lore.kernel.org/r/20241002-mgtime-v10-6-d1c4717f5284@kernel.org Signed-off-by: Christian Brauner --- fs/inode.c | 9 ++- fs/stat.c | 3 + include/trace/events/timestamp.h | 124 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 135 insertions(+), 1 deletion(-) create mode 100644 include/trace/events/timestamp.h (limited to 'include') diff --git a/fs/inode.c b/fs/inode.c index 7d1ede60e549..f7a25c511d6b 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -22,6 +22,9 @@ #include #include #include +#define CREATE_TRACE_POINTS +#include + #include "internal.h" /* @@ -2603,6 +2606,7 @@ EXPORT_SYMBOL(inode_nohighmem); struct timespec64 inode_set_ctime_to_ts(struct inode *inode, struct timespec64 ts) { + trace_inode_set_ctime_to_ts(inode, &ts); set_normalized_timespec64(&ts, ts.tv_sec, ts.tv_nsec); inode->i_ctime_sec = ts.tv_sec; inode->i_ctime_nsec = ts.tv_nsec; @@ -2689,14 +2693,17 @@ struct timespec64 inode_set_ctime_current(struct inode *inode) } /* No need to cmpxchg if it's exactly the same */ - if (cns == now.tv_nsec && inode->i_ctime_sec == now.tv_sec) + if (cns == now.tv_nsec && inode->i_ctime_sec == now.tv_sec) { + trace_ctime_xchg_skip(inode, &now); goto out; + } cur = cns; retry: /* Try to swap the nsec value into place. */ if (try_cmpxchg(&inode->i_ctime_nsec, &cur, now.tv_nsec)) { /* If swap occurred, then we're (mostly) done */ inode->i_ctime_sec = now.tv_sec; + trace_ctime_ns_xchg(inode, cns, now.tv_nsec, cur); } else { /* * Was the change due to someone marking the old ctime QUERIED? diff --git a/fs/stat.c b/fs/stat.c index dd480bf51a2a..6eb6c39d0037 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -23,6 +23,8 @@ #include #include +#include + #include "internal.h" #include "mount.h" @@ -56,6 +58,7 @@ void fill_mg_cmtime(struct kstat *stat, u32 request_mask, struct inode *inode) if (!(stat->ctime.tv_nsec & I_CTIME_QUERIED)) stat->ctime.tv_nsec = ((u32)atomic_fetch_or(I_CTIME_QUERIED, pcn)); stat->ctime.tv_nsec &= ~I_CTIME_QUERIED; + trace_fill_mg_cmtime(inode, &stat->ctime, &stat->mtime); } EXPORT_SYMBOL(fill_mg_cmtime); diff --git a/include/trace/events/timestamp.h b/include/trace/events/timestamp.h new file mode 100644 index 000000000000..c9e5ec930054 --- /dev/null +++ b/include/trace/events/timestamp.h @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM timestamp + +#if !defined(_TRACE_TIMESTAMP_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_TIMESTAMP_H + +#include +#include + +#define CTIME_QUERIED_FLAGS \ + { I_CTIME_QUERIED, "Q" } + +DECLARE_EVENT_CLASS(ctime, + TP_PROTO(struct inode *inode, + struct timespec64 *ctime), + + TP_ARGS(inode, ctime), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(time64_t, ctime_s) + __field(u32, ctime_ns) + __field(u32, gen) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->gen = inode->i_generation; + __entry->ctime_s = ctime->tv_sec; + __entry->ctime_ns = ctime->tv_nsec; + ), + + TP_printk("ino=%d:%d:%ld:%u ctime=%lld.%u", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->gen, + __entry->ctime_s, __entry->ctime_ns + ) +); + +DEFINE_EVENT(ctime, inode_set_ctime_to_ts, + TP_PROTO(struct inode *inode, + struct timespec64 *ctime), + TP_ARGS(inode, ctime)); + +DEFINE_EVENT(ctime, ctime_xchg_skip, + TP_PROTO(struct inode *inode, + struct timespec64 *ctime), + TP_ARGS(inode, ctime)); + +TRACE_EVENT(ctime_ns_xchg, + TP_PROTO(struct inode *inode, + u32 old, + u32 new, + u32 cur), + + TP_ARGS(inode, old, new, cur), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(u32, gen) + __field(u32, old) + __field(u32, new) + __field(u32, cur) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->gen = inode->i_generation; + __entry->old = old; + __entry->new = new; + __entry->cur = cur; + ), + + TP_printk("ino=%d:%d:%ld:%u old=%u:%s new=%u cur=%u:%s", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->gen, + __entry->old & ~I_CTIME_QUERIED, + __print_flags(__entry->old & I_CTIME_QUERIED, "|", CTIME_QUERIED_FLAGS), + __entry->new, + __entry->cur & ~I_CTIME_QUERIED, + __print_flags(__entry->cur & I_CTIME_QUERIED, "|", CTIME_QUERIED_FLAGS) + ) +); + +TRACE_EVENT(fill_mg_cmtime, + TP_PROTO(struct inode *inode, + struct timespec64 *ctime, + struct timespec64 *mtime), + + TP_ARGS(inode, ctime, mtime), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(time64_t, ctime_s) + __field(time64_t, mtime_s) + __field(u32, ctime_ns) + __field(u32, mtime_ns) + __field(u32, gen) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->gen = inode->i_generation; + __entry->ctime_s = ctime->tv_sec; + __entry->mtime_s = mtime->tv_sec; + __entry->ctime_ns = ctime->tv_nsec; + __entry->mtime_ns = mtime->tv_nsec; + ), + + TP_printk("ino=%d:%d:%ld:%u ctime=%lld.%u mtime=%lld.%u", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->gen, + __entry->ctime_s, __entry->ctime_ns, + __entry->mtime_s, __entry->mtime_ns + ) +); +#endif /* _TRACE_TIMESTAMP_H */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3 From 440e3dcd7c739ba5b256196a89e796fb7e59c755 Mon Sep 17 00:00:00 2001 From: Sunyeal Hong Date: Wed, 9 Oct 2024 13:21:08 +0900 Subject: dt-bindings: clock: exynosautov920: add peric1, misc and hsi0/1 clock definitions Add peric1, misc and hsi0/1 clock definitions. - CMU_PERIC1 for USI, IC2 and I3C - CMU_MISC for MISC, GIC and OTP - HSI0 for PCIE - HSI1 for USB and MMC Signed-off-by: Sunyeal Hong Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20241009042110.2379903-2-sunyeal.hong@samsung.com Signed-off-by: Krzysztof Kozlowski --- include/dt-bindings/clock/samsung,exynosautov920.h | 47 ++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/samsung,exynosautov920.h b/include/dt-bindings/clock/samsung,exynosautov920.h index c720f344b6bf..0c681f2ba3d0 100644 --- a/include/dt-bindings/clock/samsung,exynosautov920.h +++ b/include/dt-bindings/clock/samsung,exynosautov920.h @@ -160,6 +160,7 @@ #define DOUT_CLKCMU_SNW_NOC 144 #define DOUT_CLKCMU_SSP_NOC 145 #define DOUT_CLKCMU_TAA_NOC 146 +#define DOUT_TCXO_DIV2 147 /* CMU_PERIC0 */ #define CLK_MOUT_PERIC0_IP_USER 1 @@ -188,4 +189,50 @@ #define CLK_DOUT_PERIC0_USI_I2C 23 #define CLK_DOUT_PERIC0_I3C 24 +/* CMU_PERIC1 */ +#define CLK_MOUT_PERIC1_IP_USER 1 +#define CLK_MOUT_PERIC1_NOC_USER 2 +#define CLK_MOUT_PERIC1_USI09_USI 3 +#define CLK_MOUT_PERIC1_USI10_USI 4 +#define CLK_MOUT_PERIC1_USI11_USI 5 +#define CLK_MOUT_PERIC1_USI12_USI 6 +#define CLK_MOUT_PERIC1_USI13_USI 7 +#define CLK_MOUT_PERIC1_USI14_USI 8 +#define CLK_MOUT_PERIC1_USI15_USI 9 +#define CLK_MOUT_PERIC1_USI16_USI 10 +#define CLK_MOUT_PERIC1_USI17_USI 11 +#define CLK_MOUT_PERIC1_USI_I2C 12 +#define CLK_MOUT_PERIC1_I3C 13 + +#define CLK_DOUT_PERIC1_USI09_USI 14 +#define CLK_DOUT_PERIC1_USI10_USI 15 +#define CLK_DOUT_PERIC1_USI11_USI 16 +#define CLK_DOUT_PERIC1_USI12_USI 17 +#define CLK_DOUT_PERIC1_USI13_USI 18 +#define CLK_DOUT_PERIC1_USI14_USI 19 +#define CLK_DOUT_PERIC1_USI15_USI 20 +#define CLK_DOUT_PERIC1_USI16_USI 21 +#define CLK_DOUT_PERIC1_USI17_USI 22 +#define CLK_DOUT_PERIC1_USI_I2C 23 +#define CLK_DOUT_PERIC1_I3C 24 + +/* CMU_MISC */ +#define CLK_MOUT_MISC_NOC_USER 1 +#define CLK_MOUT_MISC_GIC 2 + +#define CLK_DOUT_MISC_OTP 3 +#define CLK_DOUT_MISC_NOCP 4 +#define CLK_DOUT_MISC_OSC_DIV2 5 + +/* CMU_HSI0 */ +#define CLK_MOUT_HSI0_NOC_USER 1 + +#define CLK_DOUT_HSI0_PCIE_APB 2 + +/* CMU_HSI1 */ +#define CLK_MOUT_HSI1_MMC_CARD_USER 1 +#define CLK_MOUT_HSI1_NOC_USER 2 +#define CLK_MOUT_HSI1_USBDRD_USER 3 +#define CLK_MOUT_HSI1_USBDRD 4 + #endif /* _DT_BINDINGS_CLOCK_EXYNOSAUTOV920_H */ -- cgit v1.2.3 From 9e542ff8b79ae1871074d3a7dca7de9e7374eeda Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 8 Oct 2024 09:32:04 -0700 Subject: net: Remove likely from l3mdev_master_ifindex_by_index The likely() annotation in l3mdev_master_ifindex_by_index() has been found to be incorrect 100% of the time in real-world workloads (e.g., web servers). Annotated branches shows the following in these servers: correct incorrect % Function File Line 0 169053813 100 l3mdev_master_ifindex_by_index l3mdev.h 81 This is happening because l3mdev_master_ifindex_by_index() is called from __inet_check_established(), which calls l3mdev_master_ifindex_by_index() passing the socked bounded interface. l3mdev_master_ifindex_by_index(net, sk->sk_bound_dev_if); Since most sockets are not going to be bound to a network device, the likely() is giving the wrong assumption. Remove the likely() annotation to ensure more accurate branch prediction. Signed-off-by: Breno Leitao Reviewed-by: David Ahern Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241008163205.3939629-1-leitao@debian.org Signed-off-by: Paolo Abeni --- include/net/l3mdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 031c661aa14d..2d6141f28b53 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -78,7 +78,7 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex) struct net_device *dev; int rc = 0; - if (likely(ifindex)) { + if (ifindex) { rcu_read_lock(); dev = dev_get_by_index_rcu(net, ifindex); -- cgit v1.2.3 From 016f426a14f09faa8bdb68b063c2947edf3108a1 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Tue, 8 Oct 2024 21:32:09 +0300 Subject: net/mlx5: qos: Flesh out element_attributes in mlx5_ifc.h This is used for multiple purposes, depending on the scheduling element created. There are a few helper struct defined a long time ago, but they are not easy to find in the file and they are about to get new members. This commit cleans up this area a bit by: - moving the helper structs closer to where they are relevant. - defining a helper union to include all of them to help discoverability. - making use of it everywhere element_attributes is used. - using a consistent 'attr' name. Signed-off-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c | 18 +++--- include/linux/mlx5/mlx5_ifc.h | 67 +++++++++++++---------- 2 files changed, 45 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 02a3563f51ad..7154eeff4fd4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -339,7 +339,7 @@ static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group = vport->qos.group; struct mlx5_core_dev *dev = esw->dev; u32 parent_tsar_ix; - void *vport_elem; + void *attr; int err; if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT)) @@ -348,8 +348,8 @@ static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw, parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix; MLX5_SET(scheduling_context, sched_ctx, element_type, SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT); - vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes); - MLX5_SET(vport_element, vport_elem, vport_number, vport->vport); + attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes); + MLX5_SET(vport_element, attr, vport_number, vport->vport); MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_tsar_ix); MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate); MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); @@ -443,8 +443,8 @@ __esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *ex { u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; struct mlx5_esw_rate_group *group; - __be32 *attr; u32 divider; + void *attr; int err; group = kzalloc(sizeof(*group), GFP_KERNEL); @@ -453,12 +453,10 @@ __esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *ex MLX5_SET(scheduling_context, tsar_ctx, element_type, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); - - attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); - *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16); - MLX5_SET(scheduling_context, tsar_ctx, parent_element_id, esw->qos.root_tsar_ix); + attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); + MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR); err = mlx5_create_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, tsar_ctx, @@ -559,7 +557,7 @@ static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *exta { u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; struct mlx5_core_dev *dev = esw->dev; - __be32 *attr; + void *attr; int err; if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) @@ -573,7 +571,7 @@ static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *exta SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); - *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16); + MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR); err = mlx5_create_scheduling_element_cmd(dev, SCHEDULING_HIERARCHY_E_SWITCH, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 96d369112bfa..c79ba6197673 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -4105,11 +4105,47 @@ enum { ELEMENT_TYPE_CAP_MASK_QUEUE_GROUP = 1 << 4, }; +enum { + TSAR_ELEMENT_TSAR_TYPE_DWRR = 0x0, + TSAR_ELEMENT_TSAR_TYPE_ROUND_ROBIN = 0x1, + TSAR_ELEMENT_TSAR_TYPE_ETS = 0x2, +}; + +enum { + TSAR_TYPE_CAP_MASK_DWRR = 1 << 0, + TSAR_TYPE_CAP_MASK_ROUND_ROBIN = 1 << 1, + TSAR_TYPE_CAP_MASK_ETS = 1 << 2, +}; + +struct mlx5_ifc_tsar_element_bits { + u8 reserved_at_0[0x8]; + u8 tsar_type[0x8]; + u8 reserved_at_10[0x10]; +}; + +struct mlx5_ifc_vport_element_bits { + u8 reserved_at_0[0x10]; + u8 vport_number[0x10]; +}; + +struct mlx5_ifc_vport_tc_element_bits { + u8 traffic_class[0x4]; + u8 reserved_at_4[0xc]; + u8 vport_number[0x10]; +}; + +union mlx5_ifc_element_attributes_bits { + struct mlx5_ifc_tsar_element_bits tsar; + struct mlx5_ifc_vport_element_bits vport; + struct mlx5_ifc_vport_tc_element_bits vport_tc; + u8 reserved_at_0[0x20]; +}; + struct mlx5_ifc_scheduling_context_bits { u8 element_type[0x8]; u8 reserved_at_8[0x18]; - u8 element_attributes[0x20]; + union mlx5_ifc_element_attributes_bits element_attributes; u8 parent_element_id[0x20]; @@ -4798,35 +4834,6 @@ struct mlx5_ifc_register_loopback_control_bits { u8 reserved_at_20[0x60]; }; -struct mlx5_ifc_vport_tc_element_bits { - u8 traffic_class[0x4]; - u8 reserved_at_4[0xc]; - u8 vport_number[0x10]; -}; - -struct mlx5_ifc_vport_element_bits { - u8 reserved_at_0[0x10]; - u8 vport_number[0x10]; -}; - -enum { - TSAR_ELEMENT_TSAR_TYPE_DWRR = 0x0, - TSAR_ELEMENT_TSAR_TYPE_ROUND_ROBIN = 0x1, - TSAR_ELEMENT_TSAR_TYPE_ETS = 0x2, -}; - -enum { - TSAR_TYPE_CAP_MASK_DWRR = 1 << 0, - TSAR_TYPE_CAP_MASK_ROUND_ROBIN = 1 << 1, - TSAR_TYPE_CAP_MASK_ETS = 1 << 2, -}; - -struct mlx5_ifc_tsar_element_bits { - u8 reserved_at_0[0x8]; - u8 tsar_type[0x8]; - u8 reserved_at_10[0x10]; -}; - enum { MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_SUCCESS = 0x0, MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_FAIL = 0x1, -- cgit v1.2.3 From fceffbfe57af7d9941d08e1a995cccf558d08451 Mon Sep 17 00:00:00 2001 From: Patrick Rudolph Date: Wed, 2 Oct 2024 14:54:58 +0200 Subject: regulator: max5970: Drop unused structs After splitting the max5970 into a MFD device clean the remaining code and drop unused structs. The struct max5970_data and enum max5970_chip_type aren't used. Signed-off-by: Patrick Rudolph Acked-by: Lee Jones Link: https://patch.msgid.link/20241002125500.78278-1-patrick.rudolph@9elements.com Signed-off-by: Mark Brown --- drivers/regulator/max5970-regulator.c | 21 ++++----------------- include/linux/mfd/max5970.h | 12 ------------ 2 files changed, 4 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/drivers/regulator/max5970-regulator.c b/drivers/regulator/max5970-regulator.c index 4a568b1b0107..fc14177ddf5d 100644 --- a/drivers/regulator/max5970-regulator.c +++ b/drivers/regulator/max5970-regulator.c @@ -485,7 +485,7 @@ static int max597x_irq_handler(int irq, struct regulator_irq_data *rid, } static int max597x_adc_range(struct regmap *regmap, const int ch, - u32 *irng, u32 *mon_rng) + int *irng, int *mon_rng) { unsigned int reg; int ret; @@ -552,7 +552,6 @@ static int max597x_setup_irq(struct device *dev, static int max597x_regulator_probe(struct platform_device *pdev) { - struct max5970_data *max597x; struct regmap *regmap = dev_get_regmap(pdev->dev.parent, NULL); struct max5970_regulator *data; struct i2c_client *i2c = to_i2c_client(pdev->dev.parent); @@ -566,26 +565,18 @@ static int max597x_regulator_probe(struct platform_device *pdev) if (!regmap) return -EPROBE_DEFER; - max597x = devm_kzalloc(&i2c->dev, sizeof(struct max5970_data), GFP_KERNEL); - if (!max597x) - return -ENOMEM; - rdevs = devm_kcalloc(&i2c->dev, MAX5970_NUM_SWITCHES, sizeof(struct regulator_dev *), GFP_KERNEL); if (!rdevs) return -ENOMEM; - i2c_set_clientdata(i2c, max597x); - if (of_device_is_compatible(i2c->dev.of_node, "maxim,max5978")) - max597x->num_switches = MAX5978_NUM_SWITCHES; + num_switches = MAX5978_NUM_SWITCHES; else if (of_device_is_compatible(i2c->dev.of_node, "maxim,max5970")) - max597x->num_switches = MAX5970_NUM_SWITCHES; + num_switches = MAX5970_NUM_SWITCHES; else return -ENODEV; - num_switches = max597x->num_switches; - for (i = 0; i < num_switches; i++) { data = devm_kzalloc(&i2c->dev, sizeof(struct max5970_regulator), @@ -596,13 +587,10 @@ static int max597x_regulator_probe(struct platform_device *pdev) data->num_switches = num_switches; data->regmap = regmap; - ret = max597x_adc_range(regmap, i, &max597x->irng[i], &max597x->mon_rng[i]); + ret = max597x_adc_range(regmap, i, &data->irng, &data->mon_rng); if (ret < 0) return ret; - data->irng = max597x->irng[i]; - data->mon_rng = max597x->mon_rng[i]; - config.dev = &i2c->dev; config.driver_data = (void *)data; config.regmap = data->regmap; @@ -614,7 +602,6 @@ static int max597x_regulator_probe(struct platform_device *pdev) return PTR_ERR(rdev); } rdevs[i] = rdev; - max597x->shunt_micro_ohms[i] = data->shunt_micro_ohms; } if (IS_REACHABLE(CONFIG_HWMON)) { diff --git a/include/linux/mfd/max5970.h b/include/linux/mfd/max5970.h index 762a7d40c843..fc50e89edfaa 100644 --- a/include/linux/mfd/max5970.h +++ b/include/linux/mfd/max5970.h @@ -16,18 +16,6 @@ #define MAX5978_NUM_SWITCHES 1 #define MAX5970_NUM_LEDS 4 -struct max5970_data { - int num_switches; - u32 irng[MAX5970_NUM_SWITCHES]; - u32 mon_rng[MAX5970_NUM_SWITCHES]; - u32 shunt_micro_ohms[MAX5970_NUM_SWITCHES]; -}; - -enum max5970_chip_type { - TYPE_MAX5978 = 1, - TYPE_MAX5970, -}; - #define MAX5970_REG_CURRENT_L(ch) (0x01 + (ch) * 4) #define MAX5970_REG_CURRENT_H(ch) (0x00 + (ch) * 4) #define MAX5970_REG_VOLTAGE_L(ch) (0x03 + (ch) * 4) -- cgit v1.2.3 From 0e8158b4a82eb5bfb69df17510d210b073896f00 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 2 Oct 2024 14:22:24 +0200 Subject: OPP: Rework _set_required_devs() to manage a single device per call At this point there are no consumer drivers that makes use of _set_required_devs(), hence it should be straightforward to rework the code to enable it to better integrate with the PM domain attach procedure. During attach, one device at the time is being hooked up to its corresponding PM domain. Therefore, let's update the _set_required_devs() to align to this behaviour, allowing callers to fill out one required_dev per call. Subsequent changes starts making use of this. Signed-off-by: Ulf Hansson Acked-by: Viresh Kumar Link: https://lore.kernel.org/r/20241002122232.194245-4-ulf.hansson@linaro.org --- drivers/opp/core.c | 84 +++++++++++++++++++++++++++++++++----------------- drivers/opp/opp.h | 4 ++- include/linux/pm_opp.h | 10 +++--- 3 files changed, 65 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 3aa18737470f..40e5f29a2670 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -2473,47 +2473,73 @@ err: } -static int _opp_set_required_devs(struct opp_table *opp_table, - struct device *dev, - struct device **required_devs) +static int _opp_set_required_dev(struct opp_table *opp_table, + struct device *dev, + struct device *required_dev, + unsigned int index) { - int i; + struct opp_table *required_table, *pd_table; + struct device *gdev; - if (!opp_table->required_devs) { + /* Genpd core takes care of propagation to parent genpd */ + if (opp_table->is_genpd) { + dev_err(dev, "%s: Operation not supported for genpds\n", __func__); + return -EOPNOTSUPP; + } + + if (index >= opp_table->required_opp_count) { dev_err(dev, "Required OPPs not available, can't set required devs\n"); return -EINVAL; } - /* Another device that shares the OPP table has set the required devs ? */ - if (opp_table->required_devs[0]) - return 0; + required_table = opp_table->required_opp_tables[index]; + if (IS_ERR(required_table)) { + dev_err(dev, "Missing OPP table, unable to set the required devs\n"); + return -ENODEV; + } - for (i = 0; i < opp_table->required_opp_count; i++) { - /* Genpd core takes care of propagation to parent genpd */ - if (required_devs[i] && opp_table->is_genpd && - opp_table->required_opp_tables[i]->is_genpd) { - dev_err(dev, "%s: Operation not supported for genpds\n", __func__); - return -EOPNOTSUPP; + /* + * The required_opp_tables parsing is not perfect, as the OPP core does + * the parsing solely based on the DT node pointers. The core sets the + * required_opp_tables entry to the first OPP table in the "opp_tables" + * list, that matches with the node pointer. + * + * If the target DT OPP table is used by multiple devices and they all + * create separate instances of 'struct opp_table' from it, then it is + * possible that the required_opp_tables entry may be set to the + * incorrect sibling device. + * + * Cross check it again and fix if required. + */ + gdev = dev_to_genpd_dev(required_dev); + if (IS_ERR(gdev)) + return PTR_ERR(gdev); + + pd_table = _find_opp_table(gdev); + if (!IS_ERR(pd_table)) { + if (pd_table != required_table) { + dev_pm_opp_put_opp_table(required_table); + opp_table->required_opp_tables[index] = pd_table; + } else { + dev_pm_opp_put_opp_table(pd_table); } - - opp_table->required_devs[i] = required_devs[i]; } + opp_table->required_devs[index] = required_dev; return 0; } -static void _opp_put_required_devs(struct opp_table *opp_table) +static void _opp_put_required_dev(struct opp_table *opp_table, + unsigned int index) { - int i; - - for (i = 0; i < opp_table->required_opp_count; i++) - opp_table->required_devs[i] = NULL; + opp_table->required_devs[index] = NULL; } static void _opp_clear_config(struct opp_config_data *data) { - if (data->flags & OPP_CONFIG_REQUIRED_DEVS) - _opp_put_required_devs(data->opp_table); + if (data->flags & OPP_CONFIG_REQUIRED_DEV) + _opp_put_required_dev(data->opp_table, + data->required_dev_index); else if (data->flags & OPP_CONFIG_GENPD) _opp_detach_genpd(data->opp_table); @@ -2630,7 +2656,7 @@ int dev_pm_opp_set_config(struct device *dev, struct dev_pm_opp_config *config) /* Attach genpds */ if (config->genpd_names) { - if (config->required_devs) { + if (config->required_dev) { ret = -EINVAL; goto err; } @@ -2641,13 +2667,15 @@ int dev_pm_opp_set_config(struct device *dev, struct dev_pm_opp_config *config) goto err; data->flags |= OPP_CONFIG_GENPD; - } else if (config->required_devs) { - ret = _opp_set_required_devs(opp_table, dev, - config->required_devs); + } else if (config->required_dev) { + ret = _opp_set_required_dev(opp_table, dev, + config->required_dev, + config->required_dev_index); if (ret) goto err; - data->flags |= OPP_CONFIG_REQUIRED_DEVS; + data->required_dev_index = config->required_dev_index; + data->flags |= OPP_CONFIG_REQUIRED_DEV; } ret = xa_alloc(&opp_configs, &id, data, XA_LIMIT(1, INT_MAX), diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h index cff1fabd1ae3..3eed6f145e90 100644 --- a/drivers/opp/opp.h +++ b/drivers/opp/opp.h @@ -35,12 +35,13 @@ extern struct list_head opp_tables; #define OPP_CONFIG_PROP_NAME BIT(3) #define OPP_CONFIG_SUPPORTED_HW BIT(4) #define OPP_CONFIG_GENPD BIT(5) -#define OPP_CONFIG_REQUIRED_DEVS BIT(6) +#define OPP_CONFIG_REQUIRED_DEV BIT(6) /** * struct opp_config_data - data for set config operations * @opp_table: OPP table * @flags: OPP config flags + * @required_dev_index: The position in the array of required_devs * * This structure stores the OPP config information for each OPP table * configuration by the callers. @@ -48,6 +49,7 @@ extern struct list_head opp_tables; struct opp_config_data { struct opp_table *opp_table; unsigned int flags; + unsigned int required_dev_index; }; /** diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 6424692c30b7..bc74bc69107a 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -63,10 +63,11 @@ typedef int (*config_clks_t)(struct device *dev, struct opp_table *opp_table, * @supported_hw_count: Number of elements in the array. * @regulator_names: Array of pointers to the names of the regulator, NULL terminated. * @genpd_names: Null terminated array of pointers containing names of genpd to - * attach. Mutually exclusive with required_devs. + * attach. Mutually exclusive with required_dev. * @virt_devs: Pointer to return the array of genpd virtual devices. Mutually - * exclusive with required_devs. - * @required_devs: Required OPP devices. Mutually exclusive with genpd_names/virt_devs. + * exclusive with required_dev. + * @required_dev: Required OPP device. Mutually exclusive with genpd_names/virt_devs. + * @required_dev_index: The index of the required OPP for the @required_dev. * * This structure contains platform specific OPP configurations for the device. */ @@ -81,7 +82,8 @@ struct dev_pm_opp_config { const char * const *regulator_names; const char * const *genpd_names; struct device ***virt_devs; - struct device **required_devs; + struct device *required_dev; + unsigned int required_dev_index; }; #define OPP_LEVEL_UNSET U32_MAX -- cgit v1.2.3 From 98d277a79126a2df8a2617215846d01f823cfffa Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 2 Oct 2024 14:22:25 +0200 Subject: PM: domains: Support required OPPs in dev_pm_domain_attach_list() In the multiple PM domain case we need platform code to specify the index of the corresponding required OPP in DT for a device, which is what *_opp_attach_genpd() is there to help us with. However, attaching a device to its PM domains is in general better done with dev_pm_domain_attach_list(). To avoid having two different ways to manage this and to prepare for the removal of *_opp_attach_genpd(), let's extend dev_pm_domain_attach|detach_list() to manage the required OPPs too. Signed-off-by: Ulf Hansson Acked-by: Viresh Kumar Link: https://lore.kernel.org/r/20241002122232.194245-5-ulf.hansson@linaro.org --- drivers/base/power/common.c | 21 ++++++++++++++++++++- include/linux/pm_domain.h | 8 ++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/base/power/common.c b/drivers/base/power/common.c index cca2fd0a1aed..781968a128ff 100644 --- a/drivers/base/power/common.c +++ b/drivers/base/power/common.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "power.h" @@ -222,13 +223,15 @@ int dev_pm_domain_attach_list(struct device *dev, if (!pds) return -ENOMEM; - size = sizeof(*pds->pd_devs) + sizeof(*pds->pd_links); + size = sizeof(*pds->pd_devs) + sizeof(*pds->pd_links) + + sizeof(*pds->opp_tokens); pds->pd_devs = kcalloc(num_pds, size, GFP_KERNEL); if (!pds->pd_devs) { ret = -ENOMEM; goto free_pds; } pds->pd_links = (void *)(pds->pd_devs + num_pds); + pds->opp_tokens = (void *)(pds->pd_links + num_pds); if (link_flags && pd_flags & PD_FLAG_DEV_LINK_ON) link_flags |= DL_FLAG_RPM_ACTIVE; @@ -244,6 +247,19 @@ int dev_pm_domain_attach_list(struct device *dev, goto err_attach; } + if (pd_flags & PD_FLAG_REQUIRED_OPP) { + struct dev_pm_opp_config config = { + .required_dev = pd_dev, + .required_dev_index = i, + }; + + ret = dev_pm_opp_set_config(dev, &config); + if (ret < 0) + goto err_link; + + pds->opp_tokens[i] = ret; + } + if (link_flags) { struct device_link *link; @@ -264,9 +280,11 @@ int dev_pm_domain_attach_list(struct device *dev, return num_pds; err_link: + dev_pm_opp_clear_config(pds->opp_tokens[i]); dev_pm_domain_detach(pd_dev, true); err_attach: while (--i >= 0) { + dev_pm_opp_clear_config(pds->opp_tokens[i]); if (pds->pd_links[i]) device_link_del(pds->pd_links[i]); dev_pm_domain_detach(pds->pd_devs[i], true); @@ -361,6 +379,7 @@ void dev_pm_domain_detach_list(struct dev_pm_domain_list *list) return; for (i = 0; i < list->num_pds; i++) { + dev_pm_opp_clear_config(list->opp_tokens[i]); if (list->pd_links[i]) device_link_del(list->pd_links[i]); dev_pm_domain_detach(list->pd_devs[i], true); diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index b637ec14025f..92f9d56f623d 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -30,9 +30,16 @@ * supplier and its PM domain when creating the * device-links. * + * PD_FLAG_REQUIRED_OPP: Assign required_devs for the required OPPs. The + * index of the required OPP must correspond to the + * index in the array of the pd_names. If pd_names + * isn't specified, the index just follows the + * index for the attached PM domain. + * */ #define PD_FLAG_NO_DEV_LINK BIT(0) #define PD_FLAG_DEV_LINK_ON BIT(1) +#define PD_FLAG_REQUIRED_OPP BIT(2) struct dev_pm_domain_attach_data { const char * const *pd_names; @@ -43,6 +50,7 @@ struct dev_pm_domain_attach_data { struct dev_pm_domain_list { struct device **pd_devs; struct device_link **pd_links; + u32 *opp_tokens; u32 num_pds; }; -- cgit v1.2.3 From e130ca9d4873f8d01e3e7b8137e0c14196a3cfb4 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 2 Oct 2024 14:22:27 +0200 Subject: pmdomain: core: Set the required dev for a required OPP during genpd attach In the single PM domain case there is no need for platform code to specify the index of the corresponding required OPP in DT, as the index must be zero. This allows us to assign a required dev for the required OPP from genpd, while attaching a device to its PM domain. In this way, we can remove some of the genpd specific code in the OPP core for the single PM domain case. Although, this cleanup is made from a subsequent change. Signed-off-by: Ulf Hansson Acked-by: Viresh Kumar Link: https://lore.kernel.org/r/20241002122232.194245-7-ulf.hansson@linaro.org --- drivers/pmdomain/core.c | 42 +++++++++++++++++++++++++++++++++++++++--- include/linux/pm_domain.h | 1 + 2 files changed, 40 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/pmdomain/core.c b/drivers/pmdomain/core.c index 259abd338f4c..76490f0bf1e2 100644 --- a/drivers/pmdomain/core.c +++ b/drivers/pmdomain/core.c @@ -1722,6 +1722,7 @@ static void genpd_free_dev_data(struct device *dev, spin_unlock_irq(&dev->power.lock); + dev_pm_opp_clear_config(gpd_data->opp_token); kfree(gpd_data->td); kfree(gpd_data); dev_pm_put_subsys_data(dev); @@ -2884,6 +2885,29 @@ static void genpd_dev_pm_sync(struct device *dev) genpd_queue_power_off_work(pd); } +static int genpd_set_required_opp_dev(struct device *dev, + struct device *base_dev) +{ + struct dev_pm_opp_config config = { + .required_dev = dev, + }; + int ret; + + /* Limit support to non-providers for now. */ + if (of_property_present(base_dev->of_node, "#power-domain-cells")) + return 0; + + if (!dev_pm_opp_of_has_required_opp(base_dev)) + return 0; + + ret = dev_pm_opp_set_config(base_dev, &config); + if (ret < 0) + return ret; + + dev_gpd_data(dev)->opp_token = ret; + return 0; +} + static int genpd_set_required_opp(struct device *dev, unsigned int index) { int ret, pstate; @@ -2908,7 +2932,8 @@ err: } static int __genpd_dev_pm_attach(struct device *dev, struct device *base_dev, - unsigned int index, bool power_on) + unsigned int index, unsigned int num_domains, + bool power_on) { struct of_phandle_args pd_args; struct generic_pm_domain *pd; @@ -2940,6 +2965,17 @@ static int __genpd_dev_pm_attach(struct device *dev, struct device *base_dev, dev->pm_domain->detach = genpd_dev_pm_detach; dev->pm_domain->sync = genpd_dev_pm_sync; + /* + * For a single PM domain the index of the required OPP must be zero, so + * let's try to assign a required dev in that case. In the multiple PM + * domains case, we need platform code to specify the index. + */ + if (num_domains == 1) { + ret = genpd_set_required_opp_dev(dev, base_dev); + if (ret) + goto err; + } + ret = genpd_set_required_opp(dev, index); if (ret) goto err; @@ -2994,7 +3030,7 @@ int genpd_dev_pm_attach(struct device *dev) "#power-domain-cells") != 1) return 0; - return __genpd_dev_pm_attach(dev, dev, 0, true); + return __genpd_dev_pm_attach(dev, dev, 0, 1, true); } EXPORT_SYMBOL_GPL(genpd_dev_pm_attach); @@ -3047,7 +3083,7 @@ struct device *genpd_dev_pm_attach_by_id(struct device *dev, } /* Try to attach the device to the PM domain at the specified index. */ - ret = __genpd_dev_pm_attach(virt_dev, dev, index, false); + ret = __genpd_dev_pm_attach(virt_dev, dev, index, num_domains, false); if (ret < 1) { device_unregister(virt_dev); return ret ? ERR_PTR(ret) : NULL; diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 92f9d56f623d..76775ab38898 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -252,6 +252,7 @@ struct generic_pm_domain_data { unsigned int performance_state; unsigned int default_pstate; unsigned int rpm_pstate; + unsigned int opp_token; bool hw_mode; void *data; }; -- cgit v1.2.3 From d6caca30a548764f8cfd78393ea09fefdf285212 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 2 Oct 2024 14:22:32 +0200 Subject: OPP: Drop redundant *_opp_attach|detach_genpd() All users of *_opp_attach|detach_genpd(), have been converted to use dev|devm_pm_domain_attach|detach_list(), hence let's drop it along with its corresponding exported functions. Acked-by: Viresh Kumar Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20241002122232.194245-12-ulf.hansson@linaro.org --- drivers/opp/core.c | 131 +------------------------------------------------ drivers/opp/opp.h | 3 +- include/linux/pm_opp.h | 38 +------------- 3 files changed, 3 insertions(+), 169 deletions(-) (limited to 'include') diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 40e5f29a2670..0311b18319a4 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -2360,119 +2360,6 @@ static void _opp_put_config_regulators_helper(struct opp_table *opp_table) opp_table->config_regulators = NULL; } -static void _opp_detach_genpd(struct opp_table *opp_table) -{ - int index; - - for (index = 0; index < opp_table->required_opp_count; index++) { - if (!opp_table->required_devs[index]) - continue; - - dev_pm_domain_detach(opp_table->required_devs[index], false); - opp_table->required_devs[index] = NULL; - } -} - -/* - * Multiple generic power domains for a device are supported with the help of - * virtual genpd devices, which are created for each consumer device - genpd - * pair. These are the device structures which are attached to the power domain - * and are required by the OPP core to set the performance state of the genpd. - * The same API also works for the case where single genpd is available and so - * we don't need to support that separately. - * - * This helper will normally be called by the consumer driver of the device - * "dev", as only that has details of the genpd names. - * - * This helper needs to be called once with a list of all genpd to attach. - * Otherwise the original device structure will be used instead by the OPP core. - * - * The order of entries in the names array must match the order in which - * "required-opps" are added in DT. - */ -static int _opp_attach_genpd(struct opp_table *opp_table, struct device *dev, - const char * const *names, struct device ***virt_devs) -{ - struct device *virt_dev, *gdev; - struct opp_table *genpd_table; - int index = 0, ret = -EINVAL; - const char * const *name = names; - - if (!opp_table->required_devs) { - dev_err(dev, "Required OPPs not available, can't attach genpd\n"); - return -EINVAL; - } - - /* Genpd core takes care of propagation to parent genpd */ - if (opp_table->is_genpd) { - dev_err(dev, "%s: Operation not supported for genpds\n", __func__); - return -EOPNOTSUPP; - } - - /* Checking only the first one is enough ? */ - if (opp_table->required_devs[0]) - return 0; - - while (*name) { - if (index >= opp_table->required_opp_count) { - dev_err(dev, "Index can't be greater than required-opp-count - 1, %s (%d : %d)\n", - *name, opp_table->required_opp_count, index); - goto err; - } - - virt_dev = dev_pm_domain_attach_by_name(dev, *name); - if (IS_ERR_OR_NULL(virt_dev)) { - ret = virt_dev ? PTR_ERR(virt_dev) : -ENODEV; - dev_err(dev, "Couldn't attach to pm_domain: %d\n", ret); - goto err; - } - - /* - * The required_opp_tables parsing is not perfect, as the OPP - * core does the parsing solely based on the DT node pointers. - * The core sets the required_opp_tables entry to the first OPP - * table in the "opp_tables" list, that matches with the node - * pointer. - * - * If the target DT OPP table is used by multiple devices and - * they all create separate instances of 'struct opp_table' from - * it, then it is possible that the required_opp_tables entry - * may be set to the incorrect sibling device. - * - * Cross check it again and fix if required. - */ - gdev = dev_to_genpd_dev(virt_dev); - if (IS_ERR(gdev)) { - ret = PTR_ERR(gdev); - goto err; - } - - genpd_table = _find_opp_table(gdev); - if (!IS_ERR(genpd_table)) { - if (genpd_table != opp_table->required_opp_tables[index]) { - dev_pm_opp_put_opp_table(opp_table->required_opp_tables[index]); - opp_table->required_opp_tables[index] = genpd_table; - } else { - dev_pm_opp_put_opp_table(genpd_table); - } - } - - opp_table->required_devs[index] = virt_dev; - index++; - name++; - } - - if (virt_devs) - *virt_devs = opp_table->required_devs; - - return 0; - -err: - _opp_detach_genpd(opp_table); - return ret; - -} - static int _opp_set_required_dev(struct opp_table *opp_table, struct device *dev, struct device *required_dev, @@ -2540,9 +2427,6 @@ static void _opp_clear_config(struct opp_config_data *data) if (data->flags & OPP_CONFIG_REQUIRED_DEV) _opp_put_required_dev(data->opp_table, data->required_dev_index); - else if (data->flags & OPP_CONFIG_GENPD) - _opp_detach_genpd(data->opp_table); - if (data->flags & OPP_CONFIG_REGULATOR) _opp_put_regulators(data->opp_table); if (data->flags & OPP_CONFIG_SUPPORTED_HW) @@ -2654,20 +2538,7 @@ int dev_pm_opp_set_config(struct device *dev, struct dev_pm_opp_config *config) data->flags |= OPP_CONFIG_REGULATOR; } - /* Attach genpds */ - if (config->genpd_names) { - if (config->required_dev) { - ret = -EINVAL; - goto err; - } - - ret = _opp_attach_genpd(opp_table, dev, config->genpd_names, - config->virt_devs); - if (ret) - goto err; - - data->flags |= OPP_CONFIG_GENPD; - } else if (config->required_dev) { + if (config->required_dev) { ret = _opp_set_required_dev(opp_table, dev, config->required_dev, config->required_dev_index); diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h index 3eed6f145e90..4bdb79ffa101 100644 --- a/drivers/opp/opp.h +++ b/drivers/opp/opp.h @@ -34,8 +34,7 @@ extern struct list_head opp_tables; #define OPP_CONFIG_REGULATOR_HELPER BIT(2) #define OPP_CONFIG_PROP_NAME BIT(3) #define OPP_CONFIG_SUPPORTED_HW BIT(4) -#define OPP_CONFIG_GENPD BIT(5) -#define OPP_CONFIG_REQUIRED_DEV BIT(6) +#define OPP_CONFIG_REQUIRED_DEV BIT(5) /** * struct opp_config_data - data for set config operations diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index bc74bc69107a..568183e3e641 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -62,11 +62,7 @@ typedef int (*config_clks_t)(struct device *dev, struct opp_table *opp_table, * @supported_hw: Array of hierarchy of versions to match. * @supported_hw_count: Number of elements in the array. * @regulator_names: Array of pointers to the names of the regulator, NULL terminated. - * @genpd_names: Null terminated array of pointers containing names of genpd to - * attach. Mutually exclusive with required_dev. - * @virt_devs: Pointer to return the array of genpd virtual devices. Mutually - * exclusive with required_dev. - * @required_dev: Required OPP device. Mutually exclusive with genpd_names/virt_devs. + * @required_dev: The required OPP device. * @required_dev_index: The index of the required OPP for the @required_dev. * * This structure contains platform specific OPP configurations for the device. @@ -80,8 +76,6 @@ struct dev_pm_opp_config { const unsigned int *supported_hw; unsigned int supported_hw_count; const char * const *regulator_names; - const char * const *genpd_names; - struct device ***virt_devs; struct device *required_dev; unsigned int required_dev_index; }; @@ -677,36 +671,6 @@ static inline void dev_pm_opp_put_config_regulators(int token) dev_pm_opp_clear_config(token); } -/* genpd helpers */ -static inline int dev_pm_opp_attach_genpd(struct device *dev, - const char * const *names, - struct device ***virt_devs) -{ - struct dev_pm_opp_config config = { - .genpd_names = names, - .virt_devs = virt_devs, - }; - - return dev_pm_opp_set_config(dev, &config); -} - -static inline void dev_pm_opp_detach_genpd(int token) -{ - dev_pm_opp_clear_config(token); -} - -static inline int devm_pm_opp_attach_genpd(struct device *dev, - const char * const *names, - struct device ***virt_devs) -{ - struct dev_pm_opp_config config = { - .genpd_names = names, - .virt_devs = virt_devs, - }; - - return devm_pm_opp_set_config(dev, &config); -} - /* prop-name helpers */ static inline int dev_pm_opp_set_prop_name(struct device *dev, const char *name) { -- cgit v1.2.3 From 13d68a16430312fc21990f48326366eb73891202 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 9 Oct 2024 10:09:47 +0200 Subject: genetlink: extend info user-storage to match NL cb ctx This allows a more uniform implementation of non-dump and dump operations, and will be used later in the series to avoid some per-operation allocation. Additionally rename the NL_ASSERT_DUMP_CTX_FITS macro, to fit a more extended usage. Suggested-by: Jakub Kicinski Reviewed-by: Jakub Kicinski Reviewed-by: Jiri Pirko Signed-off-by: Paolo Abeni Link: https://patch.msgid.link/1130cc2896626b84587a2a5f96a5c6829638f4da.1728460186.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/vxlan/vxlan_mdb.c | 2 +- include/linux/netlink.h | 5 +++-- include/net/genetlink.h | 8 ++++++-- net/core/netdev-genl.c | 2 +- net/core/rtnetlink.c | 2 +- net/devlink/devl_internal.h | 2 +- net/ethtool/rss.c | 2 +- net/netfilter/nf_conntrack_netlink.c | 2 +- net/netlink/genetlink.c | 4 ++-- 9 files changed, 17 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/drivers/net/vxlan/vxlan_mdb.c b/drivers/net/vxlan/vxlan_mdb.c index 60eb95a06d55..ebed05a2804c 100644 --- a/drivers/net/vxlan/vxlan_mdb.c +++ b/drivers/net/vxlan/vxlan_mdb.c @@ -284,7 +284,7 @@ int vxlan_mdb_dump(struct net_device *dev, struct sk_buff *skb, ASSERT_RTNL(); - NL_ASSERT_DUMP_CTX_FITS(struct vxlan_mdb_dump_ctx); + NL_ASSERT_CTX_FITS(struct vxlan_mdb_dump_ctx); nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWMDB, sizeof(*bpm), diff --git a/include/linux/netlink.h b/include/linux/netlink.h index b332c2048c75..a3ca198a3a9e 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -34,6 +34,7 @@ struct netlink_skb_parms { #define NETLINK_CB(skb) (*(struct netlink_skb_parms*)&((skb)->cb)) #define NETLINK_CREDS(skb) (&NETLINK_CB((skb)).creds) +#define NETLINK_CTX_SIZE 48 void netlink_table_grab(void); @@ -293,7 +294,7 @@ struct netlink_callback { int flags; bool strict_check; union { - u8 ctx[48]; + u8 ctx[NETLINK_CTX_SIZE]; /* args is deprecated. Cast a struct over ctx instead * for proper type safety. @@ -302,7 +303,7 @@ struct netlink_callback { }; }; -#define NL_ASSERT_DUMP_CTX_FITS(type_name) \ +#define NL_ASSERT_CTX_FITS(type_name) \ BUILD_BUG_ON(sizeof(type_name) > \ sizeof_field(struct netlink_callback, ctx)) diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 9ab49bfeae78..9d3726e8f90e 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -124,7 +124,8 @@ struct genl_family { * @genlhdr: generic netlink message header * @attrs: netlink attributes * @_net: network namespace - * @user_ptr: user pointers + * @ctx: storage space for the use by the family + * @user_ptr: user pointers (deprecated, use ctx instead) * @extack: extended ACK report struct */ struct genl_info { @@ -135,7 +136,10 @@ struct genl_info { struct genlmsghdr * genlhdr; struct nlattr ** attrs; possible_net_t _net; - void * user_ptr[2]; + union { + u8 ctx[NETLINK_CTX_SIZE]; + void * user_ptr[2]; + }; struct netlink_ext_ack *extack; }; diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 1cb954f2d39e..358cba248796 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -24,7 +24,7 @@ struct netdev_nl_dump_ctx { static struct netdev_nl_dump_ctx *netdev_dump_ctx(struct netlink_callback *cb) { - NL_ASSERT_DUMP_CTX_FITS(struct netdev_nl_dump_ctx); + NL_ASSERT_CTX_FITS(struct netdev_nl_dump_ctx); return (struct netdev_nl_dump_ctx *)cb->ctx; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 6d68247aea70..a9b81b7d9746 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -6243,7 +6243,7 @@ static int rtnl_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb) int idx, s_idx; int err; - NL_ASSERT_DUMP_CTX_FITS(struct rtnl_mdb_dump_ctx); + NL_ASSERT_CTX_FITS(struct rtnl_mdb_dump_ctx); if (cb->strict_check) { err = rtnl_mdb_valid_dump_req(cb->nlh, cb->extack); diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h index c7a8e13f917c..a9f064ab9ed9 100644 --- a/net/devlink/devl_internal.h +++ b/net/devlink/devl_internal.h @@ -166,7 +166,7 @@ int devlink_nl_dumpit(struct sk_buff *msg, struct netlink_callback *cb, static inline struct devlink_nl_dump_state * devlink_dump_state(struct netlink_callback *cb) { - NL_ASSERT_DUMP_CTX_FITS(struct devlink_nl_dump_state); + NL_ASSERT_CTX_FITS(struct devlink_nl_dump_state); return (struct devlink_nl_dump_state *)cb->ctx; } diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c index e07386275e14..7cb106b590ab 100644 --- a/net/ethtool/rss.c +++ b/net/ethtool/rss.c @@ -224,7 +224,7 @@ struct rss_nl_dump_ctx { static struct rss_nl_dump_ctx *rss_dump_ctx(struct netlink_callback *cb) { - NL_ASSERT_DUMP_CTX_FITS(struct rss_nl_dump_ctx); + NL_ASSERT_CTX_FITS(struct rss_nl_dump_ctx); return (struct rss_nl_dump_ctx *)cb->ctx; } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 6a1239433830..36168f8b6efa 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -3870,7 +3870,7 @@ static int __init ctnetlink_init(void) { int ret; - NL_ASSERT_DUMP_CTX_FITS(struct ctnetlink_list_dump_ctx); + NL_ASSERT_CTX_FITS(struct ctnetlink_list_dump_ctx); ret = nfnetlink_subsys_register(&ctnl_subsys); if (ret < 0) { diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index feb54c63a116..29387b605f3e 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -997,7 +997,7 @@ static int genl_start(struct netlink_callback *cb) info->info.attrs = attrs; genl_info_net_set(&info->info, sock_net(cb->skb->sk)); info->info.extack = cb->extack; - memset(&info->info.user_ptr, 0, sizeof(info->info.user_ptr)); + memset(&info->info.ctx, 0, sizeof(info->info.ctx)); cb->data = info; if (ops->start) { @@ -1104,7 +1104,7 @@ static int genl_family_rcv_msg_doit(const struct genl_family *family, info.attrs = attrbuf; info.extack = extack; genl_info_net_set(&info, net); - memset(&info.user_ptr, 0, sizeof(info.user_ptr)); + memset(&info.ctx, 0, sizeof(info.ctx)); if (ops->pre_doit) { err = ops->pre_doit(ops, skb, &info); -- cgit v1.2.3 From 04e65df94b3112a1b319b6deb5bab83fd740bc7d Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 9 Oct 2024 10:09:48 +0200 Subject: netlink: spec: add shaper YAML spec Define the user-space visible interface to query, configure and delete network shapers via yaml definition. Add dummy implementations for the relevant NL callbacks. set() and delete() operations touch a single shaper creating/updating or deleting it. The group() operation creates a shaper's group, nesting multiple input shapers under the specified output shaper. Reviewed-by: Jiri Pirko Signed-off-by: Paolo Abeni Link: https://patch.msgid.link/7a33a1ff370bdbcd0cd3f909575c912cd56f41da.1728460186.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/net_shaper.yaml | 274 ++++++++++++++++++++++++++++ MAINTAINERS | 1 + include/uapi/linux/net_shaper.h | 78 ++++++++ net/Kconfig | 3 + net/Makefile | 1 + net/shaper/Makefile | 8 + net/shaper/shaper.c | 55 ++++++ net/shaper/shaper_nl_gen.c | 125 +++++++++++++ net/shaper/shaper_nl_gen.h | 34 ++++ 9 files changed, 579 insertions(+) create mode 100644 Documentation/netlink/specs/net_shaper.yaml create mode 100644 include/uapi/linux/net_shaper.h create mode 100644 net/shaper/Makefile create mode 100644 net/shaper/shaper.c create mode 100644 net/shaper/shaper_nl_gen.c create mode 100644 net/shaper/shaper_nl_gen.h (limited to 'include') diff --git a/Documentation/netlink/specs/net_shaper.yaml b/Documentation/netlink/specs/net_shaper.yaml new file mode 100644 index 000000000000..618fc09932ff --- /dev/null +++ b/Documentation/netlink/specs/net_shaper.yaml @@ -0,0 +1,274 @@ +# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) +name: net-shaper + +doc: | + Networking HW rate limiting configuration. + + This API allows configuring HW shapers available on the network + devices at different levels (queues, network device) and allows + arbitrary manipulation of the scheduling tree of the involved + shapers. + + Each @shaper is identified within the given device, by a @handle, + comprising both a @scope and an @id. + + Depending on the @scope value, the shapers are attached to specific + HW objects (queues, devices) or, for @node scope, represent a + scheduling group, that can be placed in an arbitrary location of + the scheduling tree. + + Shapers can be created with two different operations: the @set + operation, to create and update a single "attached" shaper, and + the @group operation, to create and update a scheduling + group. Only the @group operation can create @node scope shapers. + + Existing shapers can be deleted/reset via the @delete operation. + + The user can query the running configuration via the @get operation. + +definitions: + - + type: enum + name: scope + doc: Defines the shaper @id interpretation. + render-max: true + entries: + - name: unspec + doc: The scope is not specified. + - + name: netdev + doc: The main shaper for the given network device. + - + name: queue + doc: | + The shaper is attached to the given device queue, + the @id represents the queue number. + - + name: node + doc: | + The shaper allows grouping of queues or other + node shapers; can be nested in either @netdev + shapers or other @node shapers, allowing placement + in any location of the scheduling tree, except + leaves and root. + - + type: enum + name: metric + doc: Different metric supported by the shaper. + entries: + - + name: bps + doc: Shaper operates on a bits per second basis. + - + name: pps + doc: Shaper operates on a packets per second basis. + +attribute-sets: + - + name: net-shaper + attributes: + - + name: handle + type: nest + nested-attributes: handle + doc: Unique identifier for the given shaper inside the owning device. + - + name: metric + type: u32 + enum: metric + doc: Metric used by the given shaper for bw-min, bw-max and burst. + - + name: bw-min + type: uint + doc: Guaranteed bandwidth for the given shaper. + - + name: bw-max + type: uint + doc: Maximum bandwidth for the given shaper or 0 when unlimited. + - + name: burst + type: uint + doc: | + Maximum burst-size for shaping. Should not be interpreted + as a quantum. + - + name: priority + type: u32 + doc: | + Scheduling priority for the given shaper. The priority + scheduling is applied to sibling shapers. + - + name: weight + type: u32 + doc: | + Relative weight for round robin scheduling of the + given shaper. + The scheduling is applied to all sibling shapers + with the same priority. + - + name: ifindex + type: u32 + doc: Interface index owning the specified shaper. + - + name: parent + type: nest + nested-attributes: handle + doc: | + Identifier for the parent of the affected shaper. + Only needed for @group operation. + - + name: leaves + type: nest + multi-attr: true + nested-attributes: leaf-info + doc: | + Describes a set of leaves shapers for a @group operation. + - + name: handle + attributes: + - + name: scope + type: u32 + enum: scope + doc: Defines the shaper @id interpretation. + - + name: id + type: u32 + doc: | + Numeric identifier of a shaper. The id semantic depends on + the scope. For @queue scope it's the queue id and for @node + scope it's the node identifier. + - + name: leaf-info + subset-of: net-shaper + attributes: + - + name: handle + - + name: priority + - + name: weight + +operations: + list: + - + name: get + doc: | + Get information about a shaper for a given device. + attribute-set: net-shaper + + do: + pre: net-shaper-nl-pre-doit + post: net-shaper-nl-post-doit + request: + attributes: &ns-binding + - ifindex + - handle + reply: + attributes: &ns-attrs + - ifindex + - parent + - handle + - metric + - bw-min + - bw-max + - burst + - priority + - weight + + dump: + pre: net-shaper-nl-pre-dumpit + post: net-shaper-nl-post-dumpit + request: + attributes: + - ifindex + reply: + attributes: *ns-attrs + - + name: set + doc: | + Create or update the specified shaper. + The set operation can't be used to create a @node scope shaper, + use the @group operation instead. + attribute-set: net-shaper + flags: [ admin-perm ] + + do: + pre: net-shaper-nl-pre-doit + post: net-shaper-nl-post-doit + request: + attributes: + - ifindex + - handle + - metric + - bw-min + - bw-max + - burst + - priority + - weight + + - + name: delete + doc: | + Clear (remove) the specified shaper. When deleting + a @node shaper, reattach all the node's leaves to the + deleted node's parent. + If, after the removal, the parent shaper has no more + leaves and the parent shaper scope is @node, the parent + node is deleted, recursively. + When deleting a @queue shaper or a @netdev shaper, + the shaper disappears from the hierarchy, but the + queue/device can still send traffic: it has an implicit + node with infinite bandwidth. The queue's implicit node + feeds an implicit RR node at the root of the hierarchy. + attribute-set: net-shaper + flags: [ admin-perm ] + + do: + pre: net-shaper-nl-pre-doit + post: net-shaper-nl-post-doit + request: + attributes: *ns-binding + + - + name: group + doc: | + Create or update a scheduling group, attaching the specified + @leaves shapers under the specified node identified by @handle. + The @leaves shapers scope must be @queue and the node shaper + scope must be either @node or @netdev. + When the node shaper has @node scope, if the @handle @id is not + specified, a new shaper of such scope is created, otherwise the + specified node must already exist. + When updating an existing node shaper, the specified @leaves are + added to the existing node; such node will also retain any preexisting + leave. + The @parent handle for a new node shaper defaults to the parent + of all the leaves, provided all the leaves share the same parent. + Otherwise @parent handle must be specified. + The user can optionally provide shaping attributes for the node + shaper. + The operation is atomic, on failure no change is applied to + the device shaping configuration, otherwise the @node shaper + full identifier, comprising @binding and @handle, is provided + as the reply. + attribute-set: net-shaper + flags: [ admin-perm ] + + do: + pre: net-shaper-nl-pre-doit + post: net-shaper-nl-post-doit + request: + attributes: + - ifindex + - parent + - handle + - metric + - bw-min + - bw-max + - burst + - priority + - weight + - leaves + reply: + attributes: *ns-binding diff --git a/MAINTAINERS b/MAINTAINERS index 1389704c7d8d..2927b44dda25 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16116,6 +16116,7 @@ F: include/linux/platform_data/wiznet.h F: include/uapi/linux/cn_proc.h F: include/uapi/linux/ethtool_netlink.h F: include/uapi/linux/if_* +F: include/uapi/linux/net_shaper.h F: include/uapi/linux/netdev* F: tools/testing/selftests/drivers/net/ X: Documentation/devicetree/bindings/net/bluetooth/ diff --git a/include/uapi/linux/net_shaper.h b/include/uapi/linux/net_shaper.h new file mode 100644 index 000000000000..9e3fa63618ee --- /dev/null +++ b/include/uapi/linux/net_shaper.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/net_shaper.yaml */ +/* YNL-GEN uapi header */ + +#ifndef _UAPI_LINUX_NET_SHAPER_H +#define _UAPI_LINUX_NET_SHAPER_H + +#define NET_SHAPER_FAMILY_NAME "net-shaper" +#define NET_SHAPER_FAMILY_VERSION 1 + +/** + * enum net_shaper_scope - Defines the shaper @id interpretation. + * @NET_SHAPER_SCOPE_UNSPEC: The scope is not specified. + * @NET_SHAPER_SCOPE_NETDEV: The main shaper for the given network device. + * @NET_SHAPER_SCOPE_QUEUE: The shaper is attached to the given device queue, + * the @id represents the queue number. + * @NET_SHAPER_SCOPE_NODE: The shaper allows grouping of queues or other node + * shapers; can be nested in either @netdev shapers or other @node shapers, + * allowing placement in any location of the scheduling tree, except leaves + * and root. + */ +enum net_shaper_scope { + NET_SHAPER_SCOPE_UNSPEC, + NET_SHAPER_SCOPE_NETDEV, + NET_SHAPER_SCOPE_QUEUE, + NET_SHAPER_SCOPE_NODE, + + /* private: */ + __NET_SHAPER_SCOPE_MAX, + NET_SHAPER_SCOPE_MAX = (__NET_SHAPER_SCOPE_MAX - 1) +}; + +/** + * enum net_shaper_metric - Different metric supported by the shaper. + * @NET_SHAPER_METRIC_BPS: Shaper operates on a bits per second basis. + * @NET_SHAPER_METRIC_PPS: Shaper operates on a packets per second basis. + */ +enum net_shaper_metric { + NET_SHAPER_METRIC_BPS, + NET_SHAPER_METRIC_PPS, +}; + +enum { + NET_SHAPER_A_HANDLE = 1, + NET_SHAPER_A_METRIC, + NET_SHAPER_A_BW_MIN, + NET_SHAPER_A_BW_MAX, + NET_SHAPER_A_BURST, + NET_SHAPER_A_PRIORITY, + NET_SHAPER_A_WEIGHT, + NET_SHAPER_A_IFINDEX, + NET_SHAPER_A_PARENT, + NET_SHAPER_A_LEAVES, + + __NET_SHAPER_A_MAX, + NET_SHAPER_A_MAX = (__NET_SHAPER_A_MAX - 1) +}; + +enum { + NET_SHAPER_A_HANDLE_SCOPE = 1, + NET_SHAPER_A_HANDLE_ID, + + __NET_SHAPER_A_HANDLE_MAX, + NET_SHAPER_A_HANDLE_MAX = (__NET_SHAPER_A_HANDLE_MAX - 1) +}; + +enum { + NET_SHAPER_CMD_GET = 1, + NET_SHAPER_CMD_SET, + NET_SHAPER_CMD_DELETE, + NET_SHAPER_CMD_GROUP, + + __NET_SHAPER_CMD_MAX, + NET_SHAPER_CMD_MAX = (__NET_SHAPER_CMD_MAX - 1) +}; + +#endif /* _UAPI_LINUX_NET_SHAPER_H */ diff --git a/net/Kconfig b/net/Kconfig index a629f92dc86b..c3fca69a7c83 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -72,6 +72,9 @@ config NET_DEVMEM depends on GENERIC_ALLOCATOR depends on PAGE_POOL +config NET_SHAPER + bool + menu "Networking options" source "net/packet/Kconfig" diff --git a/net/Makefile b/net/Makefile index 65bb8c72a35e..60ed5190eda8 100644 --- a/net/Makefile +++ b/net/Makefile @@ -79,3 +79,4 @@ obj-$(CONFIG_XDP_SOCKETS) += xdp/ obj-$(CONFIG_MPTCP) += mptcp/ obj-$(CONFIG_MCTP) += mctp/ obj-$(CONFIG_NET_HANDSHAKE) += handshake/ +obj-$(CONFIG_NET_SHAPER) += shaper/ diff --git a/net/shaper/Makefile b/net/shaper/Makefile new file mode 100644 index 000000000000..54af7169a331 --- /dev/null +++ b/net/shaper/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Makefile for the net shaper infrastructure. +# +# Copyright (c) 2024, Red Hat, Inc. +# + +obj-y += shaper.o shaper_nl_gen.o diff --git a/net/shaper/shaper.c b/net/shaper/shaper.c new file mode 100644 index 000000000000..a1b20888f502 --- /dev/null +++ b/net/shaper/shaper.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include + +#include "shaper_nl_gen.h" + +int net_shaper_nl_pre_doit(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info) +{ + return -EOPNOTSUPP; +} + +void net_shaper_nl_post_doit(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info) +{ +} + +int net_shaper_nl_get_doit(struct sk_buff *skb, struct genl_info *info) +{ + return -EOPNOTSUPP; +} + +int net_shaper_nl_get_dumpit(struct sk_buff *skb, + struct netlink_callback *cb) +{ + return -EOPNOTSUPP; +} + +int net_shaper_nl_set_doit(struct sk_buff *skb, struct genl_info *info) +{ + return -EOPNOTSUPP; +} + +int net_shaper_nl_delete_doit(struct sk_buff *skb, struct genl_info *info) +{ + return -EOPNOTSUPP; +} + +int net_shaper_nl_pre_dumpit(struct netlink_callback *cb) +{ + return -EOPNOTSUPP; +} + +int net_shaper_nl_post_dumpit(struct netlink_callback *cb) +{ + return -EOPNOTSUPP; +} + +static int __init shaper_init(void) +{ + return genl_register_family(&net_shaper_nl_family); +} + +subsys_initcall(shaper_init); diff --git a/net/shaper/shaper_nl_gen.c b/net/shaper/shaper_nl_gen.c new file mode 100644 index 000000000000..34185c5989e6 --- /dev/null +++ b/net/shaper/shaper_nl_gen.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/net_shaper.yaml */ +/* YNL-GEN kernel source */ + +#include +#include + +#include "shaper_nl_gen.h" + +#include + +/* Common nested types */ +const struct nla_policy net_shaper_handle_nl_policy[NET_SHAPER_A_HANDLE_ID + 1] = { + [NET_SHAPER_A_HANDLE_SCOPE] = NLA_POLICY_MAX(NLA_U32, 3), + [NET_SHAPER_A_HANDLE_ID] = { .type = NLA_U32, }, +}; + +const struct nla_policy net_shaper_leaf_info_nl_policy[NET_SHAPER_A_WEIGHT + 1] = { + [NET_SHAPER_A_HANDLE] = NLA_POLICY_NESTED(net_shaper_handle_nl_policy), + [NET_SHAPER_A_PRIORITY] = { .type = NLA_U32, }, + [NET_SHAPER_A_WEIGHT] = { .type = NLA_U32, }, +}; + +/* NET_SHAPER_CMD_GET - do */ +static const struct nla_policy net_shaper_get_do_nl_policy[NET_SHAPER_A_IFINDEX + 1] = { + [NET_SHAPER_A_IFINDEX] = { .type = NLA_U32, }, + [NET_SHAPER_A_HANDLE] = NLA_POLICY_NESTED(net_shaper_handle_nl_policy), +}; + +/* NET_SHAPER_CMD_GET - dump */ +static const struct nla_policy net_shaper_get_dump_nl_policy[NET_SHAPER_A_IFINDEX + 1] = { + [NET_SHAPER_A_IFINDEX] = { .type = NLA_U32, }, +}; + +/* NET_SHAPER_CMD_SET - do */ +static const struct nla_policy net_shaper_set_nl_policy[NET_SHAPER_A_IFINDEX + 1] = { + [NET_SHAPER_A_IFINDEX] = { .type = NLA_U32, }, + [NET_SHAPER_A_HANDLE] = NLA_POLICY_NESTED(net_shaper_handle_nl_policy), + [NET_SHAPER_A_METRIC] = NLA_POLICY_MAX(NLA_U32, 1), + [NET_SHAPER_A_BW_MIN] = { .type = NLA_UINT, }, + [NET_SHAPER_A_BW_MAX] = { .type = NLA_UINT, }, + [NET_SHAPER_A_BURST] = { .type = NLA_UINT, }, + [NET_SHAPER_A_PRIORITY] = { .type = NLA_U32, }, + [NET_SHAPER_A_WEIGHT] = { .type = NLA_U32, }, +}; + +/* NET_SHAPER_CMD_DELETE - do */ +static const struct nla_policy net_shaper_delete_nl_policy[NET_SHAPER_A_IFINDEX + 1] = { + [NET_SHAPER_A_IFINDEX] = { .type = NLA_U32, }, + [NET_SHAPER_A_HANDLE] = NLA_POLICY_NESTED(net_shaper_handle_nl_policy), +}; + +/* NET_SHAPER_CMD_GROUP - do */ +static const struct nla_policy net_shaper_group_nl_policy[NET_SHAPER_A_LEAVES + 1] = { + [NET_SHAPER_A_IFINDEX] = { .type = NLA_U32, }, + [NET_SHAPER_A_PARENT] = NLA_POLICY_NESTED(net_shaper_handle_nl_policy), + [NET_SHAPER_A_HANDLE] = NLA_POLICY_NESTED(net_shaper_handle_nl_policy), + [NET_SHAPER_A_METRIC] = NLA_POLICY_MAX(NLA_U32, 1), + [NET_SHAPER_A_BW_MIN] = { .type = NLA_UINT, }, + [NET_SHAPER_A_BW_MAX] = { .type = NLA_UINT, }, + [NET_SHAPER_A_BURST] = { .type = NLA_UINT, }, + [NET_SHAPER_A_PRIORITY] = { .type = NLA_U32, }, + [NET_SHAPER_A_WEIGHT] = { .type = NLA_U32, }, + [NET_SHAPER_A_LEAVES] = NLA_POLICY_NESTED(net_shaper_leaf_info_nl_policy), +}; + +/* Ops table for net_shaper */ +static const struct genl_split_ops net_shaper_nl_ops[] = { + { + .cmd = NET_SHAPER_CMD_GET, + .pre_doit = net_shaper_nl_pre_doit, + .doit = net_shaper_nl_get_doit, + .post_doit = net_shaper_nl_post_doit, + .policy = net_shaper_get_do_nl_policy, + .maxattr = NET_SHAPER_A_IFINDEX, + .flags = GENL_CMD_CAP_DO, + }, + { + .cmd = NET_SHAPER_CMD_GET, + .start = net_shaper_nl_pre_dumpit, + .dumpit = net_shaper_nl_get_dumpit, + .done = net_shaper_nl_post_dumpit, + .policy = net_shaper_get_dump_nl_policy, + .maxattr = NET_SHAPER_A_IFINDEX, + .flags = GENL_CMD_CAP_DUMP, + }, + { + .cmd = NET_SHAPER_CMD_SET, + .pre_doit = net_shaper_nl_pre_doit, + .doit = net_shaper_nl_set_doit, + .post_doit = net_shaper_nl_post_doit, + .policy = net_shaper_set_nl_policy, + .maxattr = NET_SHAPER_A_IFINDEX, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = NET_SHAPER_CMD_DELETE, + .pre_doit = net_shaper_nl_pre_doit, + .doit = net_shaper_nl_delete_doit, + .post_doit = net_shaper_nl_post_doit, + .policy = net_shaper_delete_nl_policy, + .maxattr = NET_SHAPER_A_IFINDEX, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = NET_SHAPER_CMD_GROUP, + .pre_doit = net_shaper_nl_pre_doit, + .doit = net_shaper_nl_group_doit, + .post_doit = net_shaper_nl_post_doit, + .policy = net_shaper_group_nl_policy, + .maxattr = NET_SHAPER_A_LEAVES, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, +}; + +struct genl_family net_shaper_nl_family __ro_after_init = { + .name = NET_SHAPER_FAMILY_NAME, + .version = NET_SHAPER_FAMILY_VERSION, + .netnsok = true, + .parallel_ops = true, + .module = THIS_MODULE, + .split_ops = net_shaper_nl_ops, + .n_split_ops = ARRAY_SIZE(net_shaper_nl_ops), +}; diff --git a/net/shaper/shaper_nl_gen.h b/net/shaper/shaper_nl_gen.h new file mode 100644 index 000000000000..016cb6f3187b --- /dev/null +++ b/net/shaper/shaper_nl_gen.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/net_shaper.yaml */ +/* YNL-GEN kernel header */ + +#ifndef _LINUX_NET_SHAPER_GEN_H +#define _LINUX_NET_SHAPER_GEN_H + +#include +#include + +#include + +/* Common nested types */ +extern const struct nla_policy net_shaper_handle_nl_policy[NET_SHAPER_A_HANDLE_ID + 1]; +extern const struct nla_policy net_shaper_leaf_info_nl_policy[NET_SHAPER_A_WEIGHT + 1]; + +int net_shaper_nl_pre_doit(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info); +void +net_shaper_nl_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb, + struct genl_info *info); +int net_shaper_nl_pre_dumpit(struct netlink_callback *cb); +int net_shaper_nl_post_dumpit(struct netlink_callback *cb); + +int net_shaper_nl_get_doit(struct sk_buff *skb, struct genl_info *info); +int net_shaper_nl_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int net_shaper_nl_set_doit(struct sk_buff *skb, struct genl_info *info); +int net_shaper_nl_delete_doit(struct sk_buff *skb, struct genl_info *info); +int net_shaper_nl_group_doit(struct sk_buff *skb, struct genl_info *info); + +extern struct genl_family net_shaper_nl_family; + +#endif /* _LINUX_NET_SHAPER_GEN_H */ -- cgit v1.2.3 From 4b623f9f0f59652ea71fcb27d60b4c3b65126dbb Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 9 Oct 2024 10:09:49 +0200 Subject: net-shapers: implement NL get operation Introduce the basic infrastructure to implement the net-shaper core functionality. Each network devices carries a net-shaper cache, the NL get() operation fetches the data from such cache. The cache is initially empty, will be fill by the set()/group() operation implemented later and is destroyed at device cleanup time. The net_shaper_fill_handle(), net_shaper_ctx_init(), and net_shaper_generic_pre() implementations handle generic index type attributes, despite the current caller always pass a constant value to avoid more noise in later patches using them with different attributes. Reviewed-by: Jakub Kicinski Reviewed-by: Jiri Pirko Signed-off-by: Paolo Abeni Link: https://patch.msgid.link/ddd10fd645a9367803ad02fca4a5664ea5ace170.1728460186.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- Documentation/networking/kapi.rst | 3 + include/linux/netdevice.h | 21 +++ include/net/net_shaper.h | 120 ++++++++++++++ net/core/dev.c | 6 + net/core/dev.h | 6 + net/shaper/shaper.c | 335 +++++++++++++++++++++++++++++++++++++- 6 files changed, 484 insertions(+), 7 deletions(-) create mode 100644 include/net/net_shaper.h (limited to 'include') diff --git a/Documentation/networking/kapi.rst b/Documentation/networking/kapi.rst index ea55f462cefa..98682b9a13ee 100644 --- a/Documentation/networking/kapi.rst +++ b/Documentation/networking/kapi.rst @@ -104,6 +104,9 @@ Driver Support .. kernel-doc:: include/linux/netdevice.h :internal: +.. kernel-doc:: include/net/net_shaper.h + :internal: + PHY Support ----------- diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3baf8e539b6f..e6b93d01e631 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1603,6 +1603,14 @@ struct net_device_ops { int (*ndo_hwtstamp_set)(struct net_device *dev, struct kernel_hwtstamp_config *kernel_config, struct netlink_ext_ack *extack); + +#if IS_ENABLED(CONFIG_NET_SHAPER) + /** + * @net_shaper_ops: Device shaping offload operations + * see include/net/net_shapers.h + */ + const struct net_shaper_ops *net_shaper_ops; +#endif }; /** @@ -2406,6 +2414,19 @@ struct net_device { u64 max_pacing_offload_horizon; + /** + * @lock: protects @net_shaper_hierarchy, feel free to use for other + * netdev-scope protection. Ordering: take after rtnl_lock. + */ + struct mutex lock; + +#if IS_ENABLED(CONFIG_NET_SHAPER) + /** + * @net_shaper_hierarchy: data tracking the current shaper status + * see include/net/net_shapers.h + */ + struct net_shaper_hierarchy *net_shaper_hierarchy; +#endif u8 priv[] ____cacheline_aligned __counted_by(priv_len); } ____cacheline_aligned; diff --git a/include/net/net_shaper.h b/include/net/net_shaper.h new file mode 100644 index 000000000000..5c3f49b52fe9 --- /dev/null +++ b/include/net/net_shaper.h @@ -0,0 +1,120 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _NET_SHAPER_H_ +#define _NET_SHAPER_H_ + +#include + +#include + +struct net_device; +struct devlink; +struct netlink_ext_ack; + +enum net_shaper_binding_type { + NET_SHAPER_BINDING_TYPE_NETDEV, + /* NET_SHAPER_BINDING_TYPE_DEVLINK_PORT */ +}; + +struct net_shaper_binding { + enum net_shaper_binding_type type; + union { + struct net_device *netdev; + struct devlink *devlink; + }; +}; + +struct net_shaper_handle { + enum net_shaper_scope scope; + u32 id; +}; + +/** + * struct net_shaper - represents a shaping node on the NIC H/W + * zeroed field are considered not set. + * @parent: Unique identifier for the shaper parent, usually implied + * @handle: Unique identifier for this shaper + * @metric: Specify if the rate limits refers to PPS or BPS + * @bw_min: Minimum guaranteed rate for this shaper + * @bw_max: Maximum peak rate allowed for this shaper + * @burst: Maximum burst for the peek rate of this shaper + * @priority: Scheduling priority for this shaper + * @weight: Scheduling weight for this shaper + */ +struct net_shaper { + struct net_shaper_handle parent; + struct net_shaper_handle handle; + enum net_shaper_metric metric; + u64 bw_min; + u64 bw_max; + u64 burst; + u32 priority; + u32 weight; + + /* private: */ + u32 leaves; /* accounted only for NODE scope */ + struct rcu_head rcu; +}; + +/** + * struct net_shaper_ops - Operations on device H/W shapers + * + * The operations applies to either net_device and devlink objects. + * The initial shaping configuration at device initialization is empty: + * does not constraint the rate in any way. + * The network core keeps track of the applied user-configuration in + * the net_device or devlink structure. + * The operations are serialized via a per device lock. + * + * Device not supporting any kind of nesting should not provide the + * group operation. + * + * Each shaper is uniquely identified within the device with a 'handle' + * comprising the shaper scope and a scope-specific id. + */ +struct net_shaper_ops { + /** + * @group: create the specified shapers scheduling group + * + * Nest the @leaves shapers identified under the * @node shaper. + * All the shapers belong to the device specified by @binding. + * The @leaves arrays size is specified by @leaves_count. + * Create either the @leaves and the @node shaper; or if they already + * exists, links them together in the desired way. + * @leaves scope must be NET_SHAPER_SCOPE_QUEUE. + */ + int (*group)(struct net_shaper_binding *binding, int leaves_count, + const struct net_shaper *leaves, + const struct net_shaper *node, + struct netlink_ext_ack *extack); + + /** + * @set: Updates the specified shaper + * + * Updates or creates the @shaper on the device specified by @binding. + */ + int (*set)(struct net_shaper_binding *binding, + const struct net_shaper *shaper, + struct netlink_ext_ack *extack); + + /** + * @delete: Removes the specified shaper + * + * Removes the shaper configuration as identified by the given @handle + * on the device specified by @binding, restoring the default behavior. + */ + int (*delete)(struct net_shaper_binding *binding, + const struct net_shaper_handle *handle, + struct netlink_ext_ack *extack); + + /** + * @capabilities: get the shaper features supported by the device + * + * Fills the bitmask @cap with the supported capabilities for the + * specified @scope and device specified by @binding. + */ + void (*capabilities)(struct net_shaper_binding *binding, + enum net_shaper_scope scope, unsigned long *cap); +}; + +#endif diff --git a/net/core/dev.c b/net/core/dev.c index ea5fbcd133ae..6e727c49a6f7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11147,6 +11147,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, hash_init(dev->qdisc_hash); #endif + mutex_init(&dev->lock); + dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; setup(dev); @@ -11217,6 +11219,8 @@ void free_netdev(struct net_device *dev) return; } + mutex_destroy(&dev->lock); + kfree(dev->ethtool); netif_free_tx_queues(dev); netif_free_rx_queues(dev); @@ -11426,6 +11430,8 @@ void unregister_netdevice_many_notify(struct list_head *head, mutex_destroy(&dev->ethtool->rss_lock); + net_shaper_flush_netdev(dev); + if (skb) rtmsg_ifinfo_send(skb, dev, GFP_KERNEL, portid, nlh); diff --git a/net/core/dev.h b/net/core/dev.h index 5654325c5b71..13c558874af3 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -35,6 +35,12 @@ void dev_addr_flush(struct net_device *dev); int dev_addr_init(struct net_device *dev); void dev_addr_check(struct net_device *dev); +#if IS_ENABLED(CONFIG_NET_SHAPER) +void net_shaper_flush_netdev(struct net_device *dev); +#else +static inline void net_shaper_flush_netdev(struct net_device *dev) {} +#endif + /* sysctls not referred to from outside net/core/ */ extern int netdev_unregister_timeout_secs; extern int weight_p; diff --git a/net/shaper/shaper.c b/net/shaper/shaper.c index a1b20888f502..22daf7dde999 100644 --- a/net/shaper/shaper.c +++ b/net/shaper/shaper.c @@ -1,30 +1,333 @@ // SPDX-License-Identifier: GPL-2.0-or-later +#include +#include +#include #include +#include +#include #include +#include +#include +#include #include "shaper_nl_gen.h" +#include "../core/dev.h" + +#define NET_SHAPER_SCOPE_SHIFT 26 +#define NET_SHAPER_ID_MASK GENMASK(NET_SHAPER_SCOPE_SHIFT - 1, 0) +#define NET_SHAPER_SCOPE_MASK GENMASK(31, NET_SHAPER_SCOPE_SHIFT) + +#define NET_SHAPER_ID_UNSPEC NET_SHAPER_ID_MASK + +struct net_shaper_hierarchy { + struct xarray shapers; +}; + +struct net_shaper_nl_ctx { + struct net_shaper_binding binding; + netdevice_tracker dev_tracker; + unsigned long start_index; +}; + +static struct net_shaper_binding *net_shaper_binding_from_ctx(void *ctx) +{ + return &((struct net_shaper_nl_ctx *)ctx)->binding; +} + +static struct net_shaper_hierarchy * +net_shaper_hierarchy(struct net_shaper_binding *binding) +{ + /* Pairs with WRITE_ONCE() in net_shaper_hierarchy_setup. */ + if (binding->type == NET_SHAPER_BINDING_TYPE_NETDEV) + return READ_ONCE(binding->netdev->net_shaper_hierarchy); + + /* No other type supported yet. */ + return NULL; +} + +static int net_shaper_fill_binding(struct sk_buff *msg, + const struct net_shaper_binding *binding, + u32 type) +{ + /* Should never happen, as currently only NETDEV is supported. */ + if (WARN_ON_ONCE(binding->type != NET_SHAPER_BINDING_TYPE_NETDEV)) + return -EINVAL; + + if (nla_put_u32(msg, type, binding->netdev->ifindex)) + return -EMSGSIZE; + + return 0; +} + +static int net_shaper_fill_handle(struct sk_buff *msg, + const struct net_shaper_handle *handle, + u32 type) +{ + struct nlattr *handle_attr; + + if (handle->scope == NET_SHAPER_SCOPE_UNSPEC) + return 0; + + handle_attr = nla_nest_start(msg, type); + if (!handle_attr) + return -EMSGSIZE; + + if (nla_put_u32(msg, NET_SHAPER_A_HANDLE_SCOPE, handle->scope) || + (handle->scope >= NET_SHAPER_SCOPE_QUEUE && + nla_put_u32(msg, NET_SHAPER_A_HANDLE_ID, handle->id))) + goto handle_nest_cancel; + + nla_nest_end(msg, handle_attr); + return 0; + +handle_nest_cancel: + nla_nest_cancel(msg, handle_attr); + return -EMSGSIZE; +} + +static int +net_shaper_fill_one(struct sk_buff *msg, + const struct net_shaper_binding *binding, + const struct net_shaper *shaper, + const struct genl_info *info) +{ + void *hdr; + + hdr = genlmsg_iput(msg, info); + if (!hdr) + return -EMSGSIZE; + + if (net_shaper_fill_binding(msg, binding, NET_SHAPER_A_IFINDEX) || + net_shaper_fill_handle(msg, &shaper->parent, + NET_SHAPER_A_PARENT) || + net_shaper_fill_handle(msg, &shaper->handle, + NET_SHAPER_A_HANDLE) || + ((shaper->bw_min || shaper->bw_max || shaper->burst) && + nla_put_u32(msg, NET_SHAPER_A_METRIC, shaper->metric)) || + (shaper->bw_min && + nla_put_uint(msg, NET_SHAPER_A_BW_MIN, shaper->bw_min)) || + (shaper->bw_max && + nla_put_uint(msg, NET_SHAPER_A_BW_MAX, shaper->bw_max)) || + (shaper->burst && + nla_put_uint(msg, NET_SHAPER_A_BURST, shaper->burst)) || + (shaper->priority && + nla_put_u32(msg, NET_SHAPER_A_PRIORITY, shaper->priority)) || + (shaper->weight && + nla_put_u32(msg, NET_SHAPER_A_WEIGHT, shaper->weight))) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +/* Initialize the context fetching the relevant device and + * acquiring a reference to it. + */ +static int net_shaper_ctx_setup(const struct genl_info *info, int type, + struct net_shaper_nl_ctx *ctx) +{ + struct net *ns = genl_info_net(info); + struct net_device *dev; + int ifindex; + + if (GENL_REQ_ATTR_CHECK(info, type)) + return -EINVAL; + + ifindex = nla_get_u32(info->attrs[type]); + dev = netdev_get_by_index(ns, ifindex, &ctx->dev_tracker, GFP_KERNEL); + if (!dev) { + NL_SET_BAD_ATTR(info->extack, info->attrs[type]); + return -ENOENT; + } + + if (!dev->netdev_ops->net_shaper_ops) { + NL_SET_BAD_ATTR(info->extack, info->attrs[type]); + netdev_put(dev, &ctx->dev_tracker); + return -EOPNOTSUPP; + } + + ctx->binding.type = NET_SHAPER_BINDING_TYPE_NETDEV; + ctx->binding.netdev = dev; + return 0; +} + +static void net_shaper_ctx_cleanup(struct net_shaper_nl_ctx *ctx) +{ + if (ctx->binding.type == NET_SHAPER_BINDING_TYPE_NETDEV) + netdev_put(ctx->binding.netdev, &ctx->dev_tracker); +} + +static u32 net_shaper_handle_to_index(const struct net_shaper_handle *handle) +{ + return FIELD_PREP(NET_SHAPER_SCOPE_MASK, handle->scope) | + FIELD_PREP(NET_SHAPER_ID_MASK, handle->id); +} + +static struct net_shaper * +net_shaper_lookup(struct net_shaper_binding *binding, + const struct net_shaper_handle *handle) +{ + struct net_shaper_hierarchy *hierarchy = net_shaper_hierarchy(binding); + u32 index = net_shaper_handle_to_index(handle); + + return hierarchy ? xa_load(&hierarchy->shapers, index) : NULL; +} + +static int net_shaper_parse_handle(const struct nlattr *attr, + const struct genl_info *info, + struct net_shaper_handle *handle) +{ + struct nlattr *tb[NET_SHAPER_A_HANDLE_MAX + 1]; + struct nlattr *id_attr; + u32 id = 0; + int ret; + + ret = nla_parse_nested(tb, NET_SHAPER_A_HANDLE_MAX, attr, + net_shaper_handle_nl_policy, info->extack); + if (ret < 0) + return ret; + + if (NL_REQ_ATTR_CHECK(info->extack, attr, tb, + NET_SHAPER_A_HANDLE_SCOPE)) + return -EINVAL; + + handle->scope = nla_get_u32(tb[NET_SHAPER_A_HANDLE_SCOPE]); + + /* The default id for NODE scope shapers is an invalid one + * to help the 'group' operation discriminate between new + * NODE shaper creation (ID_UNSPEC) and reuse of existing + * shaper (any other value). + */ + id_attr = tb[NET_SHAPER_A_HANDLE_ID]; + if (id_attr) + id = nla_get_u32(id_attr); + else if (handle->scope == NET_SHAPER_SCOPE_NODE) + id = NET_SHAPER_ID_UNSPEC; + + handle->id = id; + return 0; +} + +static int net_shaper_generic_pre(struct genl_info *info, int type) +{ + struct net_shaper_nl_ctx *ctx = (struct net_shaper_nl_ctx *)info->ctx; + + BUILD_BUG_ON(sizeof(*ctx) > sizeof(info->ctx)); + + return net_shaper_ctx_setup(info, type, ctx); +} + int net_shaper_nl_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info) { - return -EOPNOTSUPP; + return net_shaper_generic_pre(info, NET_SHAPER_A_IFINDEX); +} + +static void net_shaper_generic_post(struct genl_info *info) +{ + net_shaper_ctx_cleanup((struct net_shaper_nl_ctx *)info->ctx); } void net_shaper_nl_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info) { + net_shaper_generic_post(info); +} + +int net_shaper_nl_pre_dumpit(struct netlink_callback *cb) +{ + struct net_shaper_nl_ctx *ctx = (struct net_shaper_nl_ctx *)cb->ctx; + const struct genl_info *info = genl_info_dump(cb); + + return net_shaper_ctx_setup(info, NET_SHAPER_A_IFINDEX, ctx); +} + +int net_shaper_nl_post_dumpit(struct netlink_callback *cb) +{ + net_shaper_ctx_cleanup((struct net_shaper_nl_ctx *)cb->ctx); + return 0; } int net_shaper_nl_get_doit(struct sk_buff *skb, struct genl_info *info) { - return -EOPNOTSUPP; + struct net_shaper_binding *binding; + struct net_shaper_handle handle; + struct net_shaper *shaper; + struct sk_buff *msg; + int ret; + + if (GENL_REQ_ATTR_CHECK(info, NET_SHAPER_A_HANDLE)) + return -EINVAL; + + binding = net_shaper_binding_from_ctx(info->ctx); + ret = net_shaper_parse_handle(info->attrs[NET_SHAPER_A_HANDLE], info, + &handle); + if (ret < 0) + return ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + rcu_read_lock(); + shaper = net_shaper_lookup(binding, &handle); + if (!shaper) { + NL_SET_BAD_ATTR(info->extack, + info->attrs[NET_SHAPER_A_HANDLE]); + rcu_read_unlock(); + ret = -ENOENT; + goto free_msg; + } + + ret = net_shaper_fill_one(msg, binding, shaper, info); + rcu_read_unlock(); + if (ret) + goto free_msg; + + ret = genlmsg_reply(msg, info); + if (ret) + goto free_msg; + + return 0; + +free_msg: + nlmsg_free(msg); + return ret; } int net_shaper_nl_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { - return -EOPNOTSUPP; + struct net_shaper_nl_ctx *ctx = (struct net_shaper_nl_ctx *)cb->ctx; + const struct genl_info *info = genl_info_dump(cb); + struct net_shaper_hierarchy *hierarchy; + struct net_shaper_binding *binding; + struct net_shaper *shaper; + int ret = 0; + + /* Don't error out dumps performed before any set operation. */ + binding = net_shaper_binding_from_ctx(ctx); + hierarchy = net_shaper_hierarchy(binding); + if (!hierarchy) + return 0; + + rcu_read_lock(); + for (; (shaper = xa_find(&hierarchy->shapers, &ctx->start_index, + U32_MAX, XA_PRESENT)); ctx->start_index++) { + ret = net_shaper_fill_one(skb, binding, shaper, info); + if (ret) + break; + } + rcu_read_unlock(); + + return ret; } int net_shaper_nl_set_doit(struct sk_buff *skb, struct genl_info *info) @@ -37,14 +340,32 @@ int net_shaper_nl_delete_doit(struct sk_buff *skb, struct genl_info *info) return -EOPNOTSUPP; } -int net_shaper_nl_pre_dumpit(struct netlink_callback *cb) +static void net_shaper_flush(struct net_shaper_binding *binding) { - return -EOPNOTSUPP; + struct net_shaper_hierarchy *hierarchy = net_shaper_hierarchy(binding); + struct net_shaper *cur; + unsigned long index; + + if (!hierarchy) + return; + + xa_lock(&hierarchy->shapers); + xa_for_each(&hierarchy->shapers, index, cur) { + __xa_erase(&hierarchy->shapers, index); + kfree(cur); + } + xa_unlock(&hierarchy->shapers); + kfree(hierarchy); } -int net_shaper_nl_post_dumpit(struct netlink_callback *cb) +void net_shaper_flush_netdev(struct net_device *dev) { - return -EOPNOTSUPP; + struct net_shaper_binding binding = { + .type = NET_SHAPER_BINDING_TYPE_NETDEV, + .netdev = dev, + }; + + net_shaper_flush(&binding); } static int __init shaper_init(void) -- cgit v1.2.3 From 14bba9285aedefb99647d716b0f61bf32081e387 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 9 Oct 2024 10:09:54 +0200 Subject: netlink: spec: add shaper introspection support Allow the user-space to fine-grain query the shaping features supported by the NIC on each domain. Reviewed-by: Jiri Pirko Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni Link: https://patch.msgid.link/3ddd10e450e3fe7d4b944c0d0b886d4483529ee6.1728460186.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/net_shaper.yaml | 88 +++++++++++++++++++++++++++++ include/uapi/linux/net_shaper.h | 17 ++++++ net/shaper/shaper.c | 32 +++++++++++ net/shaper/shaper_nl_gen.c | 29 ++++++++++ net/shaper/shaper_nl_gen.h | 10 ++++ 5 files changed, 176 insertions(+) (limited to 'include') diff --git a/Documentation/netlink/specs/net_shaper.yaml b/Documentation/netlink/specs/net_shaper.yaml index 618fc09932ff..8ebad0d02904 100644 --- a/Documentation/netlink/specs/net_shaper.yaml +++ b/Documentation/netlink/specs/net_shaper.yaml @@ -26,6 +26,11 @@ doc: | The user can query the running configuration via the @get operation. + Different devices can provide different feature sets, e.g. with no + support for complex scheduling hierarchy, or for some shaping + parameters. The user can introspect the HW capabilities via the + @cap-get operation. + definitions: - type: enum @@ -148,6 +153,53 @@ attribute-sets: name: priority - name: weight + - + name: caps + attributes: + - + name: ifindex + type: u32 + doc: Interface index queried for shapers capabilities. + - + name: scope + type: u32 + enum: scope + doc: The scope to which the queried capabilities apply. + - + name: support-metric-bps + type: flag + doc: The device accepts 'bps' metric for bw-min, bw-max and burst. + - + name: support-metric-pps + type: flag + doc: The device accepts 'pps' metric for bw-min, bw-max and burst. + - + name: support-nesting + type: flag + doc: | + The device supports nesting shaper belonging to this scope + below 'node' scoped shapers. Only 'queue' and 'node' + scope can have flag 'support-nesting'. + - + name: support-bw-min + type: flag + doc: The device supports a minimum guaranteed B/W. + - + name: support-bw-max + type: flag + doc: The device supports maximum B/W shaping. + - + name: support-burst + type: flag + doc: The device supports a maximum burst size. + - + name: support-priority + type: flag + doc: The device supports priority scheduling. + - + name: support-weight + type: flag + doc: The device supports weighted round robin scheduling. operations: list: @@ -272,3 +324,39 @@ operations: - leaves reply: attributes: *ns-binding + + - + name: cap-get + doc: | + Get the shaper capabilities supported by the given device + for the specified scope. + attribute-set: caps + + do: + pre: net-shaper-nl-cap-pre-doit + post: net-shaper-nl-cap-post-doit + request: + attributes: + - ifindex + - scope + reply: + attributes: &cap-attrs + - ifindex + - scope + - support-metric-bps + - support-metric-pps + - support-nesting + - support-bw-min + - support-bw-max + - support-burst + - support-priority + - support-weight + + dump: + pre: net-shaper-nl-cap-pre-dumpit + post: net-shaper-nl-cap-post-dumpit + request: + attributes: + - ifindex + reply: + attributes: *cap-attrs diff --git a/include/uapi/linux/net_shaper.h b/include/uapi/linux/net_shaper.h index 9e3fa63618ee..d8834b59f7d7 100644 --- a/include/uapi/linux/net_shaper.h +++ b/include/uapi/linux/net_shaper.h @@ -65,11 +65,28 @@ enum { NET_SHAPER_A_HANDLE_MAX = (__NET_SHAPER_A_HANDLE_MAX - 1) }; +enum { + NET_SHAPER_A_CAPS_IFINDEX = 1, + NET_SHAPER_A_CAPS_SCOPE, + NET_SHAPER_A_CAPS_SUPPORT_METRIC_BPS, + NET_SHAPER_A_CAPS_SUPPORT_METRIC_PPS, + NET_SHAPER_A_CAPS_SUPPORT_NESTING, + NET_SHAPER_A_CAPS_SUPPORT_BW_MIN, + NET_SHAPER_A_CAPS_SUPPORT_BW_MAX, + NET_SHAPER_A_CAPS_SUPPORT_BURST, + NET_SHAPER_A_CAPS_SUPPORT_PRIORITY, + NET_SHAPER_A_CAPS_SUPPORT_WEIGHT, + + __NET_SHAPER_A_CAPS_MAX, + NET_SHAPER_A_CAPS_MAX = (__NET_SHAPER_A_CAPS_MAX - 1) +}; + enum { NET_SHAPER_CMD_GET = 1, NET_SHAPER_CMD_SET, NET_SHAPER_CMD_DELETE, NET_SHAPER_CMD_GROUP, + NET_SHAPER_CMD_CAP_GET, __NET_SHAPER_CMD_MAX, NET_SHAPER_CMD_MAX = (__NET_SHAPER_CMD_MAX - 1) diff --git a/net/shaper/shaper.c b/net/shaper/shaper.c index 85ad172833fc..92c8da046391 100644 --- a/net/shaper/shaper.c +++ b/net/shaper/shaper.c @@ -598,6 +598,27 @@ int net_shaper_nl_post_dumpit(struct netlink_callback *cb) return 0; } +int net_shaper_nl_cap_pre_doit(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info) +{ + return -EOPNOTSUPP; +} + +void net_shaper_nl_cap_post_doit(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info) +{ +} + +int net_shaper_nl_cap_pre_dumpit(struct netlink_callback *cb) +{ + return -EOPNOTSUPP; +} + +int net_shaper_nl_cap_post_dumpit(struct netlink_callback *cb) +{ + return -EOPNOTSUPP; +} + int net_shaper_nl_get_doit(struct sk_buff *skb, struct genl_info *info) { struct net_shaper_binding *binding; @@ -1126,6 +1147,17 @@ free_msg: goto free_leaves; } +int net_shaper_nl_cap_get_doit(struct sk_buff *skb, struct genl_info *info) +{ + return 0; +} + +int net_shaper_nl_cap_get_dumpit(struct sk_buff *skb, + struct netlink_callback *cb) +{ + return 0; +} + static void net_shaper_flush(struct net_shaper_binding *binding) { struct net_shaper_hierarchy *hierarchy = net_shaper_hierarchy(binding); diff --git a/net/shaper/shaper_nl_gen.c b/net/shaper/shaper_nl_gen.c index 34185c5989e6..204c8ae8c7b1 100644 --- a/net/shaper/shaper_nl_gen.c +++ b/net/shaper/shaper_nl_gen.c @@ -65,6 +65,17 @@ static const struct nla_policy net_shaper_group_nl_policy[NET_SHAPER_A_LEAVES + [NET_SHAPER_A_LEAVES] = NLA_POLICY_NESTED(net_shaper_leaf_info_nl_policy), }; +/* NET_SHAPER_CMD_CAP_GET - do */ +static const struct nla_policy net_shaper_cap_get_do_nl_policy[NET_SHAPER_A_CAPS_SCOPE + 1] = { + [NET_SHAPER_A_CAPS_IFINDEX] = { .type = NLA_U32, }, + [NET_SHAPER_A_CAPS_SCOPE] = NLA_POLICY_MAX(NLA_U32, 3), +}; + +/* NET_SHAPER_CMD_CAP_GET - dump */ +static const struct nla_policy net_shaper_cap_get_dump_nl_policy[NET_SHAPER_A_CAPS_IFINDEX + 1] = { + [NET_SHAPER_A_CAPS_IFINDEX] = { .type = NLA_U32, }, +}; + /* Ops table for net_shaper */ static const struct genl_split_ops net_shaper_nl_ops[] = { { @@ -112,6 +123,24 @@ static const struct genl_split_ops net_shaper_nl_ops[] = { .maxattr = NET_SHAPER_A_LEAVES, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, + { + .cmd = NET_SHAPER_CMD_CAP_GET, + .pre_doit = net_shaper_nl_cap_pre_doit, + .doit = net_shaper_nl_cap_get_doit, + .post_doit = net_shaper_nl_cap_post_doit, + .policy = net_shaper_cap_get_do_nl_policy, + .maxattr = NET_SHAPER_A_CAPS_SCOPE, + .flags = GENL_CMD_CAP_DO, + }, + { + .cmd = NET_SHAPER_CMD_CAP_GET, + .start = net_shaper_nl_cap_pre_dumpit, + .dumpit = net_shaper_nl_cap_get_dumpit, + .done = net_shaper_nl_cap_post_dumpit, + .policy = net_shaper_cap_get_dump_nl_policy, + .maxattr = NET_SHAPER_A_CAPS_IFINDEX, + .flags = GENL_CMD_CAP_DUMP, + }, }; struct genl_family net_shaper_nl_family __ro_after_init = { diff --git a/net/shaper/shaper_nl_gen.h b/net/shaper/shaper_nl_gen.h index 016cb6f3187b..cb7f9026fc23 100644 --- a/net/shaper/shaper_nl_gen.h +++ b/net/shaper/shaper_nl_gen.h @@ -17,17 +17,27 @@ extern const struct nla_policy net_shaper_leaf_info_nl_policy[NET_SHAPER_A_WEIGH int net_shaper_nl_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); +int net_shaper_nl_cap_pre_doit(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info); void net_shaper_nl_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); +void +net_shaper_nl_cap_post_doit(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info); int net_shaper_nl_pre_dumpit(struct netlink_callback *cb); +int net_shaper_nl_cap_pre_dumpit(struct netlink_callback *cb); int net_shaper_nl_post_dumpit(struct netlink_callback *cb); +int net_shaper_nl_cap_post_dumpit(struct netlink_callback *cb); int net_shaper_nl_get_doit(struct sk_buff *skb, struct genl_info *info); int net_shaper_nl_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); int net_shaper_nl_set_doit(struct sk_buff *skb, struct genl_info *info); int net_shaper_nl_delete_doit(struct sk_buff *skb, struct genl_info *info); int net_shaper_nl_group_doit(struct sk_buff *skb, struct genl_info *info); +int net_shaper_nl_cap_get_doit(struct sk_buff *skb, struct genl_info *info); +int net_shaper_nl_cap_get_dumpit(struct sk_buff *skb, + struct netlink_callback *cb); extern struct genl_family net_shaper_nl_family; -- cgit v1.2.3 From 608a5c05c39b75fa2539ce9e521d289c5a5326f7 Mon Sep 17 00:00:00 2001 From: Wenjun Wu Date: Wed, 9 Oct 2024 10:09:58 +0200 Subject: virtchnl: support queue rate limit and quanta size configuration This patch adds new virtchnl opcodes and structures for rate limit and quanta size configuration, which include: 1. VIRTCHNL_OP_CONFIG_QUEUE_BW, to configure max bandwidth for each VF per queue. 2. VIRTCHNL_OP_CONFIG_QUANTA, to configure quanta size per queue. 3. VIRTCHNL_OP_GET_QOS_CAPS, VF queries current QoS configuration, such as enabled TCs, arbiter type, up2tc and bandwidth of VSI node. The configuration is previously set by DCB and PF, and now is the potential QoS capability of VF. VF can take it as reference to configure queue TC mapping. Reviewed-by: Jiri Pirko Signed-off-by: Wenjun Wu Signed-off-by: Paolo Abeni Link: https://patch.msgid.link/839002f7bd6f63b985a060a51b079f6e6dbbe237.1728460186.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- include/linux/avf/virtchnl.h | 119 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) (limited to 'include') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index f41395264dca..223e433c39fe 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -89,6 +89,9 @@ enum virtchnl_rx_hsplit { VIRTCHNL_RX_HSPLIT_SPLIT_SCTP = 8, }; +enum virtchnl_bw_limit_type { + VIRTCHNL_BW_SHAPER = 0, +}; /* END GENERIC DEFINES */ /* Opcodes for VF-PF communication. These are placed in the v_opcode field @@ -151,6 +154,11 @@ enum virtchnl_ops { VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2 = 55, VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2 = 56, VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2 = 57, + /* opcode 57 - 65 are reserved */ + VIRTCHNL_OP_GET_QOS_CAPS = 66, + /* opcode 68 through 111 are reserved */ + VIRTCHNL_OP_CONFIG_QUEUE_BW = 112, + VIRTCHNL_OP_CONFIG_QUANTA = 113, VIRTCHNL_OP_MAX, }; @@ -261,6 +269,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource); #define VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC BIT(26) #define VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF BIT(27) #define VIRTCHNL_VF_OFFLOAD_FDIR_PF BIT(28) +#define VIRTCHNL_VF_OFFLOAD_QOS BIT(29) #define VF_BASE_MODE_OFFLOADS (VIRTCHNL_VF_OFFLOAD_L2 | \ VIRTCHNL_VF_OFFLOAD_VLAN | \ @@ -1416,6 +1425,85 @@ struct virtchnl_fdir_del { VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_fdir_del); +struct virtchnl_shaper_bw { + /* Unit is Kbps */ + u32 committed; + u32 peak; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_shaper_bw); + +/* VIRTCHNL_OP_GET_QOS_CAPS + * VF sends this message to get its QoS Caps, such as + * TC number, Arbiter and Bandwidth. + */ +struct virtchnl_qos_cap_elem { + u8 tc_num; + u8 tc_prio; +#define VIRTCHNL_ABITER_STRICT 0 +#define VIRTCHNL_ABITER_ETS 2 + u8 arbiter; +#define VIRTCHNL_STRICT_WEIGHT 1 + u8 weight; + enum virtchnl_bw_limit_type type; + union { + struct virtchnl_shaper_bw shaper; + u8 pad2[32]; + }; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(40, virtchnl_qos_cap_elem); + +struct virtchnl_qos_cap_list { + u16 vsi_id; + u16 num_elem; + struct virtchnl_qos_cap_elem cap[]; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(4, virtchnl_qos_cap_list); +#define virtchnl_qos_cap_list_LEGACY_SIZEOF 44 + +/* VIRTCHNL_OP_CONFIG_QUEUE_BW */ +struct virtchnl_queue_bw { + u16 queue_id; + u8 tc; + u8 pad; + struct virtchnl_shaper_bw shaper; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_queue_bw); + +struct virtchnl_queues_bw_cfg { + u16 vsi_id; + u16 num_queues; + struct virtchnl_queue_bw cfg[]; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(4, virtchnl_queues_bw_cfg); +#define virtchnl_queues_bw_cfg_LEGACY_SIZEOF 16 + +enum virtchnl_queue_type { + VIRTCHNL_QUEUE_TYPE_TX = 0, + VIRTCHNL_QUEUE_TYPE_RX = 1, +}; + +/* structure to specify a chunk of contiguous queues */ +struct virtchnl_queue_chunk { + /* see enum virtchnl_queue_type */ + s32 type; + u16 start_queue_id; + u16 num_queues; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_queue_chunk); + +struct virtchnl_quanta_cfg { + u16 quanta_size; + struct virtchnl_queue_chunk queue_select; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_quanta_cfg); + #define __vss_byone(p, member, count, old) \ (struct_size(p, member, count) + (old - 1 - struct_size(p, member, 0))) @@ -1438,6 +1526,8 @@ VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_fdir_del); __vss(virtchnl_vlan_filter_list_v2, __vss_byelem, p, m, c), \ __vss(virtchnl_tc_info, __vss_byelem, p, m, c), \ __vss(virtchnl_rdma_qvlist_info, __vss_byelem, p, m, c), \ + __vss(virtchnl_qos_cap_list, __vss_byelem, p, m, c), \ + __vss(virtchnl_queues_bw_cfg, __vss_byelem, p, m, c), \ __vss(virtchnl_rss_key, __vss_byone, p, m, c), \ __vss(virtchnl_rss_lut, __vss_byone, p, m, c)) @@ -1637,6 +1727,35 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, case VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2: valid_len = sizeof(struct virtchnl_vlan_setting); break; + case VIRTCHNL_OP_GET_QOS_CAPS: + break; + case VIRTCHNL_OP_CONFIG_QUEUE_BW: + valid_len = virtchnl_queues_bw_cfg_LEGACY_SIZEOF; + if (msglen >= valid_len) { + struct virtchnl_queues_bw_cfg *q_bw = + (struct virtchnl_queues_bw_cfg *)msg; + + valid_len = virtchnl_struct_size(q_bw, cfg, + q_bw->num_queues); + if (q_bw->num_queues == 0) { + err_msg_format = true; + break; + } + } + break; + case VIRTCHNL_OP_CONFIG_QUANTA: + valid_len = sizeof(struct virtchnl_quanta_cfg); + if (msglen >= valid_len) { + struct virtchnl_quanta_cfg *q_quanta = + (struct virtchnl_quanta_cfg *)msg; + + if (q_quanta->quanta_size == 0 || + q_quanta->queue_select.num_queues == 0) { + err_msg_format = true; + break; + } + } + break; /* These are always errors coming from the VF. */ case VIRTCHNL_OP_EVENT: case VIRTCHNL_OP_UNKNOWN: -- cgit v1.2.3 From 5bd48a3a14df4b3ee1be0757efcc0f40d4f57b35 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Thu, 10 Oct 2024 04:56:52 +0100 Subject: bpf: fix argument type in bpf_loop documentation The `index` argument to bpf_loop() is threaded as an u64. This lead in a subtle verifier denial where clang cloned the argument in another register[1]. [1] https://github.com/systemd/systemd/pull/34650#issuecomment-2401092895 Signed-off-by: Matteo Croce Link: https://lore.kernel.org/r/20241010035652.17830-1-technoboy85@gmail.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 2 +- kernel/bpf/verifier.c | 2 +- tools/include/uapi/linux/bpf.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 8ab4d8184b9d..874af0186fe8 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5371,7 +5371,7 @@ union bpf_attr { * Currently, the **flags** must be 0. Currently, nr_loops is * limited to 1 << 23 (~8 million) loops. * - * long (\*callback_fn)(u32 index, void \*ctx); + * long (\*callback_fn)(u64 index, void \*ctx); * * where **index** is the current index in the loop. The index * is zero-indexed. diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 7d9b38ffd220..cfc62e0776bf 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -9917,7 +9917,7 @@ static int set_loop_callback_state(struct bpf_verifier_env *env, { /* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, * u64 flags); - * callback_fn(u32 index, void *callback_ctx); + * callback_fn(u64 index, void *callback_ctx); */ callee->regs[BPF_REG_1].type = SCALAR_VALUE; callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3]; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 7610883c8191..5937c39069ba 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5371,7 +5371,7 @@ union bpf_attr { * Currently, the **flags** must be 0. Currently, nr_loops is * limited to 1 << 23 (~8 million) loops. * - * long (\*callback_fn)(u32 index, void \*ctx); + * long (\*callback_fn)(u64 index, void \*ctx); * * where **index** is the current index in the loop. The index * is zero-indexed. -- cgit v1.2.3 From 0a6c61bc9c636e9a32d9f5a4d6d3b031d08763ab Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Thu, 10 Oct 2024 23:59:09 +0900 Subject: fgraph: Simplify return address printing in function graph tracer Simplify return address printing in the function graph tracer by removing fgraph_extras. Since this feature is only used by the function graph tracer and the feature flags can directly accessible from the function graph tracer, fgraph_extras can be removed from the fgraph callback. Cc: Donglin Peng Link: https://lore.kernel.org/172857234900.270774.15378354017601069781.stgit@devnote2 Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 16 +++---------- kernel/trace/fgraph.c | 45 +++++++++++++++++++++++------------- kernel/trace/ftrace.c | 3 +-- kernel/trace/trace.h | 3 +-- kernel/trace/trace_functions_graph.c | 18 +++++++-------- kernel/trace/trace_irqsoff.c | 3 +-- kernel/trace/trace_sched_wakeup.c | 3 +-- kernel/trace/trace_selftest.c | 8 +++---- 8 files changed, 48 insertions(+), 51 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 2ac3b3b53cd0..4c7dd5e58c9f 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1068,29 +1068,20 @@ struct ftrace_graph_ret { unsigned long long rettime; } __packed; -struct fgraph_extras; struct fgraph_ops; /* Type of the callback handlers for tracing function graph*/ typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *, struct fgraph_ops *); /* return */ typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *, - struct fgraph_ops *, - struct fgraph_extras *); /* entry */ + struct fgraph_ops *); /* entry */ extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops, - struct fgraph_extras *extras); + struct fgraph_ops *gops); bool ftrace_pids_enabled(struct ftrace_ops *ops); #ifdef CONFIG_FUNCTION_GRAPH_TRACER -/* Used to convey some extra datas when creating a graph entry */ -struct fgraph_extras { - u32 flags; - unsigned long retaddr; -}; - struct fgraph_ops { trace_func_graph_ent_t entryfunc; trace_func_graph_ret_t retfunc; @@ -1131,13 +1122,12 @@ function_graph_enter(unsigned long ret, unsigned long func, struct ftrace_ret_stack * ftrace_graph_get_ret_stack(struct task_struct *task, int skip); +unsigned long ftrace_graph_top_ret_addr(struct task_struct *task); unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, unsigned long ret, unsigned long *retp); unsigned long *fgraph_get_task_var(struct fgraph_ops *gops); -u32 graph_tracer_flags_get(u32 flags); - /* * Sometimes we don't want to trace a function with the function * graph tracer but we want them to keep traced by the usual function diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index 27e523f01ed2..ee829d65f301 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -290,8 +290,7 @@ static inline unsigned long make_data_type_val(int idx, int size, int offset) } /* ftrace_graph_entry set to this to tell some archs to run function graph */ -static int entry_run(struct ftrace_graph_ent *trace, struct fgraph_ops *ops, - struct fgraph_extras *extras) +static int entry_run(struct ftrace_graph_ent *trace, struct fgraph_ops *ops) { return 0; } @@ -519,8 +518,7 @@ int __weak ftrace_disable_ftrace_graph_caller(void) #endif int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops, - struct fgraph_extras *extras) + struct fgraph_ops *gops) { return 0; } @@ -648,20 +646,13 @@ int function_graph_enter(unsigned long ret, unsigned long func, unsigned long frame_pointer, unsigned long *retp) { struct ftrace_graph_ent trace; - struct fgraph_extras extras; unsigned long bitmap = 0; int offset; int i; - int idx = 0; trace.func = func; trace.depth = ++current->curr_ret_depth; - extras.flags = graph_tracer_flags_get(TRACE_GRAPH_PRINT_RETADDR); - if (IS_ENABLED(CONFIG_FUNCTION_GRAPH_RETADDR) - && extras.flags & TRACE_GRAPH_PRINT_RETADDR) - extras.retaddr = ftrace_graph_ret_addr(current, &idx, ret, retp); - offset = ftrace_push_return_trace(ret, func, frame_pointer, retp, 0); if (offset < 0) goto out; @@ -670,7 +661,7 @@ int function_graph_enter(unsigned long ret, unsigned long func, if (static_branch_likely(&fgraph_do_direct)) { int save_curr_ret_stack = current->curr_ret_stack; - if (static_call(fgraph_func)(&trace, fgraph_direct_gops, &extras)) + if (static_call(fgraph_func)(&trace, fgraph_direct_gops)) bitmap |= BIT(fgraph_direct_gops->idx); else /* Clear out any saved storage */ @@ -688,7 +679,7 @@ int function_graph_enter(unsigned long ret, unsigned long func, save_curr_ret_stack = current->curr_ret_stack; if (ftrace_ops_test(&gops->ops, func, NULL) && - gops->entryfunc(&trace, gops, &extras)) + gops->entryfunc(&trace, gops)) bitmap |= BIT(i); else /* Clear out any saved storage */ @@ -905,6 +896,29 @@ ftrace_graph_get_ret_stack(struct task_struct *task, int idx) return ret_stack; } +/** + * ftrace_graph_top_ret_addr - return the top return address in the shadow stack + * @task: The task to read the shadow stack from. + * + * Return the first return address on the shadow stack of the @task, which is + * not the fgraph's return_to_handler. + */ +unsigned long ftrace_graph_top_ret_addr(struct task_struct *task) +{ + unsigned long return_handler = (unsigned long)dereference_kernel_function_descriptor(return_to_handler); + struct ftrace_ret_stack *ret_stack = NULL; + int offset = task->curr_ret_stack; + + if (offset < 0) + return 0; + + do { + ret_stack = get_ret_stack(task, offset, &offset); + } while (ret_stack && ret_stack->ret == return_handler); + + return ret_stack ? ret_stack->ret : 0; +} + /** * ftrace_graph_ret_addr - return the original value of the return address * @task: The task the unwinder is being executed on @@ -1145,8 +1159,7 @@ void ftrace_graph_exit_task(struct task_struct *t) #ifdef CONFIG_DYNAMIC_FTRACE static int fgraph_pid_func(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops, - struct fgraph_extras *extras) + struct fgraph_ops *gops) { struct trace_array *tr = gops->ops.private; int pid; @@ -1160,7 +1173,7 @@ static int fgraph_pid_func(struct ftrace_graph_ent *trace, return 0; } - return gops->saved_func(trace, gops, NULL); + return gops->saved_func(trace, gops); } void fgraph_update_pid_func(void) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 5d87dac83b80..cae388122ca8 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -827,8 +827,7 @@ struct profile_fgraph_data { }; static int profile_graph_entry(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops, - struct fgraph_extras *extras) + struct fgraph_ops *gops) { struct profile_fgraph_data *profile_data; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 13f08f257c0b..6adf48ef4312 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -695,8 +695,7 @@ void trace_default_header(struct seq_file *m); void print_trace_header(struct seq_file *m, struct trace_iterator *iter); void trace_graph_return(struct ftrace_graph_ret *trace, struct fgraph_ops *gops); -int trace_graph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops, - struct fgraph_extras *extras); +int trace_graph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops); void tracing_start_cmdline_record(void); void tracing_stop_cmdline_record(void); diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 3dd63ae2afe8..20d0c579d3b5 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -90,9 +90,9 @@ static struct tracer_flags tracer_flags = { .opts = trace_opts }; -u32 graph_tracer_flags_get(u32 flags) +static bool tracer_flags_is_set(u32 flags) { - return tracer_flags.val & flags; + return (tracer_flags.val & flags) == flags; } /* @@ -179,8 +179,7 @@ struct fgraph_times { }; int trace_graph_entry(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops, - struct fgraph_extras *extras) + struct fgraph_ops *gops) { unsigned long *task_var = fgraph_get_task_var(gops); struct trace_array *tr = gops->private; @@ -246,11 +245,12 @@ int trace_graph_entry(struct ftrace_graph_ent *trace, disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { trace_ctx = tracing_gen_ctx_flags(flags); - if (unlikely(IS_ENABLED(CONFIG_FUNCTION_GRAPH_RETADDR) && extras - && (extras->flags & TRACE_GRAPH_PRINT_RETADDR))) - ret = __trace_graph_retaddr_entry(tr, trace, trace_ctx, - extras->retaddr); - else + if (unlikely(IS_ENABLED(CONFIG_FUNCTION_GRAPH_RETADDR) && + tracer_flags_is_set(TRACE_GRAPH_PRINT_RETADDR))) { + unsigned long retaddr = ftrace_graph_top_ret_addr(current); + + ret = __trace_graph_retaddr_entry(tr, trace, trace_ctx, retaddr); + } else ret = __trace_graph_entry(tr, trace, trace_ctx); } else { ret = 0; diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index eb3aa36cf10f..fce064e20570 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -176,8 +176,7 @@ static int irqsoff_display_graph(struct trace_array *tr, int set) } static int irqsoff_graph_entry(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops, - struct fgraph_extras *extras) + struct fgraph_ops *gops) { struct trace_array *tr = irqsoff_trace; struct trace_array_cpu *data; diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 155de2551507..ae2ace5e515a 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -113,8 +113,7 @@ static int wakeup_display_graph(struct trace_array *tr, int set) } static int wakeup_graph_entry(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops, - struct fgraph_extras *extras) + struct fgraph_ops *gops) { struct trace_array *tr = wakeup_trace; struct trace_array_cpu *data; diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index fbb99f8c8062..d3a14ae47e26 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -774,8 +774,7 @@ struct fgraph_fixture { }; static __init int store_entry(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops, - struct fgraph_extras *extras) + struct fgraph_ops *gops) { struct fgraph_fixture *fixture = container_of(gops, struct fgraph_fixture, gops); const char *type = fixture->store_type_name; @@ -1026,8 +1025,7 @@ static unsigned int graph_hang_thresh; /* Wrap the real function entry probe to avoid possible hanging */ static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops, - struct fgraph_extras *extras) + struct fgraph_ops *gops) { /* This is harmlessly racy, we want to approximately detect a hang */ if (unlikely(++graph_hang_thresh > GRAPH_MAX_FUNC_TEST)) { @@ -1041,7 +1039,7 @@ static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace, return 0; } - return trace_graph_entry(trace, gops, NULL); + return trace_graph_entry(trace, gops); } static struct fgraph_ops fgraph_ops __initdata = { -- cgit v1.2.3 From bafffd56c608106d11e7aec851f114dcd66b2091 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 10 Oct 2024 14:54:46 +0100 Subject: clocksource: Remove unused clocksource_change_rating clocksource_change_rating() has been unused since 2017's commit 63ed4e0c67df ("Drivers: hv: vmbus: Consolidate all Hyper-V specific clocksource code") Remove it. __clocksource_change_rating now only has one use which is ifdef'd. Move it into the ifdef'd section. Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20241010135446.213098-1-linux@treblig.org --- include/linux/clocksource.h | 1 - kernel/time/clocksource.c | 40 ++++++++++------------------------------ 2 files changed, 10 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index d35b677b08fe..ef1b16da6ad5 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -215,7 +215,6 @@ static inline s64 clocksource_cyc2ns(u64 cycles, u32 mult, u32 shift) extern int clocksource_unregister(struct clocksource*); extern void clocksource_touch_watchdog(void); -extern void clocksource_change_rating(struct clocksource *cs, int rating); extern void clocksource_suspend(void); extern void clocksource_resume(void); extern struct clocksource * __init clocksource_default_clock(void); diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 23336eecb4f4..aab6472853fa 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -20,6 +20,8 @@ #include "tick-internal.h" #include "timekeeping_internal.h" +static void clocksource_enqueue(struct clocksource *cs); + static noinline u64 cycles_to_nsec_safe(struct clocksource *cs, u64 start, u64 end) { u64 delta = clocksource_delta(end, start, cs->mask); @@ -171,7 +173,6 @@ static inline void clocksource_watchdog_unlock(unsigned long *flags) } static int clocksource_watchdog_kthread(void *data); -static void __clocksource_change_rating(struct clocksource *cs, int rating); static void clocksource_watchdog_work(struct work_struct *work) { @@ -191,6 +192,13 @@ static void clocksource_watchdog_work(struct work_struct *work) kthread_run(clocksource_watchdog_kthread, NULL, "kwatchdog"); } +static void clocksource_change_rating(struct clocksource *cs, int rating) +{ + list_del(&cs->list); + cs->rating = rating; + clocksource_enqueue(cs); +} + static void __clocksource_unstable(struct clocksource *cs) { cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG); @@ -697,7 +705,7 @@ static int __clocksource_watchdog_kthread(void) list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) { if (cs->flags & CLOCK_SOURCE_UNSTABLE) { list_del_init(&cs->wd_list); - __clocksource_change_rating(cs, 0); + clocksource_change_rating(cs, 0); select = 1; } if (cs->flags & CLOCK_SOURCE_RESELECT) { @@ -1255,34 +1263,6 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq) } EXPORT_SYMBOL_GPL(__clocksource_register_scale); -static void __clocksource_change_rating(struct clocksource *cs, int rating) -{ - list_del(&cs->list); - cs->rating = rating; - clocksource_enqueue(cs); -} - -/** - * clocksource_change_rating - Change the rating of a registered clocksource - * @cs: clocksource to be changed - * @rating: new rating - */ -void clocksource_change_rating(struct clocksource *cs, int rating) -{ - unsigned long flags; - - mutex_lock(&clocksource_mutex); - clocksource_watchdog_lock(&flags); - __clocksource_change_rating(cs, rating); - clocksource_watchdog_unlock(&flags); - - clocksource_select(); - clocksource_select_watchdog(false); - clocksource_suspend_select(false); - mutex_unlock(&clocksource_mutex); -} -EXPORT_SYMBOL(clocksource_change_rating); - /* * Unbind clocksource @cs. Called with clocksource_mutex held */ -- cgit v1.2.3 From c6ca31981b545ad3081007b6aa88b6aab1b0cece Mon Sep 17 00:00:00 2001 From: Martin Kelly Date: Thu, 10 Oct 2024 12:33:01 -0700 Subject: bpf: Update bpf_override_return() comment The documentation says CONFIG_FUNCTION_ERROR_INJECTION is supported only on x86. This was presumably true at the time of writing, but it's now supported on many other architectures too. Drop this statement, since it's not correct anymore and it fits better in other documentation anyway. Signed-off-by: Martin Kelly Link: https://lore.kernel.org/r/20241010193301.995909-1-martin.kelly@crowdstrike.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 4 ---- tools/include/uapi/linux/bpf.h | 4 ---- 2 files changed, 8 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 874af0186fe8..627c4195f04f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3103,10 +3103,6 @@ union bpf_attr { * with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration * option, and in this case it only works on functions tagged with * **ALLOW_ERROR_INJECTION** in the kernel code. - * - * Also, the helper is only available for the architectures having - * the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing, - * x86 architecture is the only one to support this feature. * Return * 0 * diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 5937c39069ba..0e49ce2981a0 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3103,10 +3103,6 @@ union bpf_attr { * with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration * option, and in this case it only works on functions tagged with * **ALLOW_ERROR_INJECTION** in the kernel code. - * - * Also, the helper is only available for the architectures having - * the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing, - * x86 architecture is the only one to support this feature. * Return * 0 * -- cgit v1.2.3 From 445936f9e258eca624c8239056bd8cd6e853b3fd Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 23 Sep 2024 11:59:57 +0200 Subject: thermal: core: Add user thresholds support The user thresholds mechanism is a way to have the userspace to tell the thermal framework to send a notification when a temperature limit is crossed. There is no id, no hysteresis, just the temperature and the direction of the limit crossing. That means we can be notified when a threshold is crossed the way up only, or the way down only or both ways. That allows to create hysteresis values if it is needed. A threshold can be added, deleted or flushed. The latter means all thresholds belonging to a thermal zone will be deleted. When a threshold is added: - if the same threshold (temperature and direction) exists, an error is returned - if a threshold is specified with the same temperature but a different direction, the specified direction is added - if there is no threshold with the same temperature then it is created When a threshold is deleted: - if the same threshold (temperature and direction) exists, it is deleted - if a threshold is specified with the same temperature but a different direction, the specified direction is removed - if there is no threshold with the same temperature, then an error is returned When the threshold are flushed: - All thresholds related to a thermal zone are deleted When a threshold is crossed: - the userspace does not need to know which threshold(s) have been crossed, it will be notified with the current temperature and the previous temperature - if multiple thresholds have been crossed between two updates only one notification will be send to the userspace, it is pointless to send a notification per thresholds crossed as the userspace can handle that easily when it has the temperature delta information Signed-off-by: Daniel Lezcano Link: https://patch.msgid.link/20240923100005.2532430-2-daniel.lezcano@linaro.org [ rjw: Subject edit, use BIT(0) and BIT(1) in symbol definitions ] Signed-off-by: Rafael J. Wysocki --- drivers/thermal/Makefile | 1 + drivers/thermal/thermal_core.h | 2 + drivers/thermal/thermal_thresholds.c | 229 +++++++++++++++++++++++++++++++++++ drivers/thermal/thermal_thresholds.h | 19 +++ include/linux/thermal.h | 3 + include/uapi/linux/thermal.h | 2 + 6 files changed, 256 insertions(+) create mode 100644 drivers/thermal/thermal_thresholds.c create mode 100644 drivers/thermal/thermal_thresholds.h (limited to 'include') diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile index 41c4d56beb40..1e1559bb971e 100644 --- a/drivers/thermal/Makefile +++ b/drivers/thermal/Makefile @@ -6,6 +6,7 @@ CFLAGS_thermal_core.o := -I$(src) obj-$(CONFIG_THERMAL) += thermal_sys.o thermal_sys-y += thermal_core.o thermal_sysfs.o thermal_sys-y += thermal_trip.o thermal_helpers.o +thermal_sys-y += thermal_thresholds.o # netlink interface to manage the thermal framework thermal_sys-$(CONFIG_THERMAL_NETLINK) += thermal_netlink.o diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h index a64d39b1c86b..1ea91d59498b 100644 --- a/drivers/thermal/thermal_core.h +++ b/drivers/thermal/thermal_core.h @@ -13,6 +13,7 @@ #include #include "thermal_netlink.h" +#include "thermal_thresholds.h" #include "thermal_debugfs.h" struct thermal_attr { @@ -139,6 +140,7 @@ struct thermal_zone_device { #ifdef CONFIG_THERMAL_DEBUGFS struct thermal_debugfs *debugfs; #endif + struct list_head user_thresholds; struct thermal_trip_desc trips[] __counted_by(num_trips); }; diff --git a/drivers/thermal/thermal_thresholds.c b/drivers/thermal/thermal_thresholds.c new file mode 100644 index 000000000000..f33b6d5474d8 --- /dev/null +++ b/drivers/thermal/thermal_thresholds.c @@ -0,0 +1,229 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2024 Linaro Limited + * + * Author: Daniel Lezcano + * + * Thermal thresholds + */ +#include +#include +#include + +#include "thermal_core.h" +#include "thermal_thresholds.h" + +int thermal_thresholds_init(struct thermal_zone_device *tz) +{ + INIT_LIST_HEAD(&tz->user_thresholds); + + return 0; +} + +void thermal_thresholds_flush(struct thermal_zone_device *tz) +{ + struct list_head *thresholds = &tz->user_thresholds; + struct user_threshold *entry, *tmp; + + lockdep_assert_held(&tz->lock); + + list_for_each_entry_safe(entry, tmp, thresholds, list_node) { + list_del(&entry->list_node); + kfree(entry); + } + + __thermal_zone_device_update(tz, THERMAL_TZ_FLUSH_THRESHOLDS); +} + +void thermal_thresholds_exit(struct thermal_zone_device *tz) +{ + thermal_thresholds_flush(tz); +} + +static int __thermal_thresholds_cmp(void *data, + const struct list_head *l1, + const struct list_head *l2) +{ + struct user_threshold *t1 = container_of(l1, struct user_threshold, list_node); + struct user_threshold *t2 = container_of(l2, struct user_threshold, list_node); + + return t1->temperature - t2->temperature; +} + +static struct user_threshold *__thermal_thresholds_find(const struct list_head *thresholds, + int temperature) +{ + struct user_threshold *t; + + list_for_each_entry(t, thresholds, list_node) + if (t->temperature == temperature) + return t; + + return NULL; +} + +static bool __thermal_threshold_is_crossed(struct user_threshold *threshold, int temperature, + int last_temperature, int direction, + int *low, int *high) +{ + + if (temperature >= threshold->temperature) { + if (threshold->temperature > *low && + THERMAL_THRESHOLD_WAY_DOWN & threshold->direction) + *low = threshold->temperature; + + if (last_temperature < threshold->temperature && + threshold->direction & direction) + return true; + } else { + if (threshold->temperature < *high && THERMAL_THRESHOLD_WAY_UP + & threshold->direction) + *high = threshold->temperature; + + if (last_temperature >= threshold->temperature && + threshold->direction & direction) + return true; + } + + return false; +} + +static bool thermal_thresholds_handle_raising(struct list_head *thresholds, int temperature, + int last_temperature, int *low, int *high) +{ + struct user_threshold *t; + + list_for_each_entry(t, thresholds, list_node) { + if (__thermal_threshold_is_crossed(t, temperature, last_temperature, + THERMAL_THRESHOLD_WAY_UP, low, high)) + return true; + } + + return false; +} + +static bool thermal_thresholds_handle_dropping(struct list_head *thresholds, int temperature, + int last_temperature, int *low, int *high) +{ + struct user_threshold *t; + + list_for_each_entry_reverse(t, thresholds, list_node) { + if (__thermal_threshold_is_crossed(t, temperature, last_temperature, + THERMAL_THRESHOLD_WAY_DOWN, low, high)) + return true; + } + + return false; +} + +void thermal_thresholds_handle(struct thermal_zone_device *tz, int *low, int *high) +{ + struct list_head *thresholds = &tz->user_thresholds; + + int temperature = tz->temperature; + int last_temperature = tz->last_temperature; + bool notify; + + lockdep_assert_held(&tz->lock); + + /* + * We need a second update in order to detect a threshold being crossed + */ + if (last_temperature == THERMAL_TEMP_INVALID) + return; + + /* + * The temperature is stable, so obviously we can not have + * crossed a threshold. + */ + if (last_temperature == temperature) + return; + + /* + * Since last update the temperature: + * - increased : thresholds are crossed the way up + * - decreased : thresholds are crossed the way down + */ + if (temperature > last_temperature) + notify = thermal_thresholds_handle_raising(thresholds, temperature, + last_temperature, low, high); + else + notify = thermal_thresholds_handle_dropping(thresholds, temperature, + last_temperature, low, high); + + if (notify) + pr_debug("A threshold has been crossed the way %s, with a temperature=%d, last_temperature=%d\n", + temperature > last_temperature ? "up" : "down", temperature, last_temperature); +} + +int thermal_thresholds_add(struct thermal_zone_device *tz, int temperature, int direction) +{ + struct list_head *thresholds = &tz->user_thresholds; + struct user_threshold *t; + + lockdep_assert_held(&tz->lock); + + t = __thermal_thresholds_find(thresholds, temperature); + if (t) { + if (t->direction == direction) + return -EEXIST; + + t->direction |= direction; + } else { + + t = kmalloc(sizeof(*t), GFP_KERNEL); + if (!t) + return -ENOMEM; + + INIT_LIST_HEAD(&t->list_node); + t->temperature = temperature; + t->direction = direction; + list_add(&t->list_node, thresholds); + list_sort(NULL, thresholds, __thermal_thresholds_cmp); + } + + __thermal_zone_device_update(tz, THERMAL_TZ_ADD_THRESHOLD); + + return 0; +} + +int thermal_thresholds_delete(struct thermal_zone_device *tz, int temperature, int direction) +{ + struct list_head *thresholds = &tz->user_thresholds; + struct user_threshold *t; + + lockdep_assert_held(&tz->lock); + + t = __thermal_thresholds_find(thresholds, temperature); + if (!t) + return -ENOENT; + + if (t->direction == direction) { + list_del(&t->list_node); + kfree(t); + } else { + t->direction &= ~direction; + } + + __thermal_zone_device_update(tz, THERMAL_TZ_DEL_THRESHOLD); + + return 0; +} + +int thermal_thresholds_for_each(struct thermal_zone_device *tz, + int (*cb)(struct user_threshold *, void *arg), void *arg) +{ + struct list_head *thresholds = &tz->user_thresholds; + struct user_threshold *entry; + int ret; + + lockdep_assert_held(&tz->lock); + + list_for_each_entry(entry, thresholds, list_node) { + ret = cb(entry, arg); + if (ret) + return ret; + } + + return 0; +} diff --git a/drivers/thermal/thermal_thresholds.h b/drivers/thermal/thermal_thresholds.h new file mode 100644 index 000000000000..232f4e8089af --- /dev/null +++ b/drivers/thermal/thermal_thresholds.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __THERMAL_THRESHOLDS_H__ +#define __THERMAL_THRESHOLDS_H__ + +struct user_threshold { + struct list_head list_node; + int temperature; + int direction; +}; + +int thermal_thresholds_init(struct thermal_zone_device *tz); +void thermal_thresholds_exit(struct thermal_zone_device *tz); +void thermal_thresholds_flush(struct thermal_zone_device *tz); +void thermal_thresholds_handle(struct thermal_zone_device *tz, int *low, int *high); +int thermal_thresholds_add(struct thermal_zone_device *tz, int temperature, int direction); +int thermal_thresholds_delete(struct thermal_zone_device *tz, int temperature, int direction); +int thermal_thresholds_for_each(struct thermal_zone_device *tz, + int (*cb)(struct user_threshold *, void *arg), void *arg); +#endif diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 25ea8fe2313e..bcaa92732e14 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -56,6 +56,9 @@ enum thermal_notify_event { THERMAL_TZ_UNBIND_CDEV, /* Cooling dev is unbind from the thermal zone */ THERMAL_INSTANCE_WEIGHT_CHANGED, /* Thermal instance weight changed */ THERMAL_TZ_RESUME, /* Thermal zone is resuming after system sleep */ + THERMAL_TZ_ADD_THRESHOLD, /* Threshold added */ + THERMAL_TZ_DEL_THRESHOLD, /* Threshold deleted */ + THERMAL_TZ_FLUSH_THRESHOLDS, /* All thresholds deleted */ }; /** diff --git a/include/uapi/linux/thermal.h b/include/uapi/linux/thermal.h index fc78bf3aead7..2e6f60a36173 100644 --- a/include/uapi/linux/thermal.h +++ b/include/uapi/linux/thermal.h @@ -3,6 +3,8 @@ #define _UAPI_LINUX_THERMAL_H #define THERMAL_NAME_LENGTH 20 +#define THERMAL_THRESHOLD_WAY_UP BIT(0) +#define THERMAL_THRESHOLD_WAY_DOWN BIT(1) enum thermal_device_mode { THERMAL_DEVICE_DISABLED = 0, -- cgit v1.2.3 From 9fb6fef0fb49124291837af1da5028f79d53f98e Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Fri, 14 Jun 2024 13:06:03 +0300 Subject: resource: Add resource set range and size helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Setting the end address for a resource with a given size lacks a helper and is therefore coded manually unlike the getter side which has a helper for resource size calculation. Also, almost all callsites that calculate the end address for a resource also set the start address right before it like this: res->start = start_addr; res->end = res->start + size - 1; Add resource_set_range(res, start_addr, size) that sets the start address and calculates the end address to simplify this often repeated fragment. Also add resource_set_size() for the cases where setting the start address of the resource is not necessary but mention in its kerneldoc that resource_set_range() is preferred when setting both addresses. Link: https://lore.kernel.org/r/20240614100606.15830-2-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron --- include/linux/ioport.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 6e9fb667a1c5..5385349f0b8a 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -249,6 +249,38 @@ struct resource *lookup_resource(struct resource *root, resource_size_t start); int adjust_resource(struct resource *res, resource_size_t start, resource_size_t size); resource_size_t resource_alignment(struct resource *res); + +/** + * resource_set_size - Calculate resource end address from size and start + * @res: Resource descriptor + * @size: Size of the resource + * + * Calculate the end address for @res based on @size. + * + * Note: The start address of @res must be set when calling this function. + * Prefer resource_set_range() if setting both the start address and @size. + */ +static inline void resource_set_size(struct resource *res, resource_size_t size) +{ + res->end = res->start + size - 1; +} + +/** + * resource_set_range - Set resource start and end addresses + * @res: Resource descriptor + * @start: Start address for the resource + * @size: Size of the resource + * + * Set @res start address and calculate the end address based on @size. + */ +static inline void resource_set_range(struct resource *res, + resource_size_t start, + resource_size_t size) +{ + res->start = start; + resource_set_size(res, size); +} + static inline resource_size_t resource_size(const struct resource *res) { return res->end - res->start + 1; -- cgit v1.2.3 From 7888af4166d4ab07ba51234be6ba332b7807e901 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 8 Oct 2024 19:05:28 -0400 Subject: ftrace: Make ftrace_regs abstract from direct use ftrace_regs was created to hold registers that store information to save function parameters, return value and stack. Since it is a subset of pt_regs, it should only be used by its accessor functions. But because pt_regs can easily be taken from ftrace_regs (on most archs), it is tempting to use it directly. But when running on other architectures, it may fail to build or worse, build but crash the kernel! Instead, make struct ftrace_regs an empty structure and have the architectures define __arch_ftrace_regs and all the accessor functions will typecast to it to get to the actual fields. This will help avoid usage of ftrace_regs directly. Link: https://lore.kernel.org/all/20241007171027.629bdafd@gandalf.local.home/ Cc: "linux-arch@vger.kernel.org" Cc: "x86@kernel.org" Cc: Mathieu Desnoyers Cc: Mark Rutland Cc: Will Deacon Cc: Huacai Chen Cc: WANG Xuerui Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Christophe Leroy Cc: Naveen N Rao Cc: Madhavan Srinivasan Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Alexander Gordeev Cc: Christian Borntraeger Cc: Sven Schnelle Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Link: https://lore.kernel.org/20241008230628.958778821@goodmis.org Acked-by: Catalin Marinas Signed-off-by: Steven Rostedt (Google) Acked-by: Masami Hiramatsu (Google) Acked-by: Heiko Carstens # s390 Signed-off-by: Steven Rostedt (Google) --- arch/arm64/include/asm/ftrace.h | 20 +++++++++++--------- arch/arm64/kernel/asm-offsets.c | 22 +++++++++++----------- arch/arm64/kernel/ftrace.c | 10 +++++----- arch/loongarch/include/asm/ftrace.h | 22 ++++++++++++---------- arch/loongarch/kernel/ftrace_dyn.c | 2 +- arch/powerpc/include/asm/ftrace.h | 21 ++++++++++++--------- arch/powerpc/kernel/trace/ftrace.c | 4 ++-- arch/powerpc/kernel/trace/ftrace_64_pg.c | 2 +- arch/riscv/include/asm/ftrace.h | 21 ++++++++++++--------- arch/riscv/kernel/asm-offsets.c | 28 ++++++++++++++-------------- arch/riscv/kernel/ftrace.c | 2 +- arch/s390/include/asm/ftrace.h | 23 +++++++++++++---------- arch/s390/kernel/asm-offsets.c | 4 ++-- arch/s390/kernel/ftrace.c | 2 +- arch/s390/lib/test_unwind.c | 4 ++-- arch/x86/include/asm/ftrace.h | 25 ++++++++++++++----------- arch/x86/kernel/ftrace.c | 2 +- include/linux/ftrace.h | 21 ++++++++++++++++++--- kernel/trace/ftrace.c | 2 +- 19 files changed, 134 insertions(+), 103 deletions(-) (limited to 'include') diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index dc9cf0bd2a4c..bbb69c7751b9 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -56,6 +56,8 @@ unsigned long ftrace_call_adjust(unsigned long addr); #ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS struct dyn_ftrace; struct ftrace_ops; +struct ftrace_regs; +#define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs)) #define arch_ftrace_get_regs(regs) NULL @@ -63,7 +65,7 @@ struct ftrace_ops; * Note: sizeof(struct ftrace_regs) must be a multiple of 16 to ensure correct * stack alignment */ -struct ftrace_regs { +struct __arch_ftrace_regs { /* x0 - x8 */ unsigned long regs[9]; @@ -83,47 +85,47 @@ struct ftrace_regs { static __always_inline unsigned long ftrace_regs_get_instruction_pointer(const struct ftrace_regs *fregs) { - return fregs->pc; + return arch_ftrace_regs(fregs)->pc; } static __always_inline void ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, unsigned long pc) { - fregs->pc = pc; + arch_ftrace_regs(fregs)->pc = pc; } static __always_inline unsigned long ftrace_regs_get_stack_pointer(const struct ftrace_regs *fregs) { - return fregs->sp; + return arch_ftrace_regs(fregs)->sp; } static __always_inline unsigned long ftrace_regs_get_argument(struct ftrace_regs *fregs, unsigned int n) { if (n < 8) - return fregs->regs[n]; + return arch_ftrace_regs(fregs)->regs[n]; return 0; } static __always_inline unsigned long ftrace_regs_get_return_value(const struct ftrace_regs *fregs) { - return fregs->regs[0]; + return arch_ftrace_regs(fregs)->regs[0]; } static __always_inline void ftrace_regs_set_return_value(struct ftrace_regs *fregs, unsigned long ret) { - fregs->regs[0] = ret; + arch_ftrace_regs(fregs)->regs[0] = ret; } static __always_inline void ftrace_override_function_with_return(struct ftrace_regs *fregs) { - fregs->pc = fregs->lr; + arch_ftrace_regs(fregs)->pc = arch_ftrace_regs(fregs)->lr; } int ftrace_regs_query_register_offset(const char *name); @@ -143,7 +145,7 @@ static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs, * The ftrace trampoline will return to this address instead of the * instrumented function. */ - fregs->direct_tramp = addr; + arch_ftrace_regs(fregs)->direct_tramp = addr; } #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 27de1dddb0ab..a5de57f68219 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -84,19 +84,19 @@ int main(void) DEFINE(PT_REGS_SIZE, sizeof(struct pt_regs)); BLANK(); #ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS - DEFINE(FREGS_X0, offsetof(struct ftrace_regs, regs[0])); - DEFINE(FREGS_X2, offsetof(struct ftrace_regs, regs[2])); - DEFINE(FREGS_X4, offsetof(struct ftrace_regs, regs[4])); - DEFINE(FREGS_X6, offsetof(struct ftrace_regs, regs[6])); - DEFINE(FREGS_X8, offsetof(struct ftrace_regs, regs[8])); - DEFINE(FREGS_FP, offsetof(struct ftrace_regs, fp)); - DEFINE(FREGS_LR, offsetof(struct ftrace_regs, lr)); - DEFINE(FREGS_SP, offsetof(struct ftrace_regs, sp)); - DEFINE(FREGS_PC, offsetof(struct ftrace_regs, pc)); + DEFINE(FREGS_X0, offsetof(struct __arch_ftrace_regs, regs[0])); + DEFINE(FREGS_X2, offsetof(struct __arch_ftrace_regs, regs[2])); + DEFINE(FREGS_X4, offsetof(struct __arch_ftrace_regs, regs[4])); + DEFINE(FREGS_X6, offsetof(struct __arch_ftrace_regs, regs[6])); + DEFINE(FREGS_X8, offsetof(struct __arch_ftrace_regs, regs[8])); + DEFINE(FREGS_FP, offsetof(struct __arch_ftrace_regs, fp)); + DEFINE(FREGS_LR, offsetof(struct __arch_ftrace_regs, lr)); + DEFINE(FREGS_SP, offsetof(struct __arch_ftrace_regs, sp)); + DEFINE(FREGS_PC, offsetof(struct __arch_ftrace_regs, pc)); #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS - DEFINE(FREGS_DIRECT_TRAMP, offsetof(struct ftrace_regs, direct_tramp)); + DEFINE(FREGS_DIRECT_TRAMP, offsetof(struct __arch_ftrace_regs, direct_tramp)); #endif - DEFINE(FREGS_SIZE, sizeof(struct ftrace_regs)); + DEFINE(FREGS_SIZE, sizeof(struct __arch_ftrace_regs)); BLANK(); #endif #ifdef CONFIG_COMPAT diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index a650f5e11fc5..b2d947175cbe 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -23,10 +23,10 @@ struct fregs_offset { int offset; }; -#define FREGS_OFFSET(n, field) \ -{ \ - .name = n, \ - .offset = offsetof(struct ftrace_regs, field), \ +#define FREGS_OFFSET(n, field) \ +{ \ + .name = n, \ + .offset = offsetof(struct __arch_ftrace_regs, field), \ } static const struct fregs_offset fregs_offsets[] = { @@ -481,7 +481,7 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) { - prepare_ftrace_return(ip, &fregs->lr, fregs->fp); + prepare_ftrace_return(ip, &arch_ftrace_regs(fregs)->lr, arch_ftrace_regs(fregs)->fp); } #else /* diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h index c0a682808e07..0e15d36ce251 100644 --- a/arch/loongarch/include/asm/ftrace.h +++ b/arch/loongarch/include/asm/ftrace.h @@ -43,38 +43,40 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent); #ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS struct ftrace_ops; +struct ftrace_regs; +#define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs)) -struct ftrace_regs { +struct __arch_ftrace_regs { struct pt_regs regs; }; static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs) { - return &fregs->regs; + return &arch_ftrace_regs(fregs)->regs; } static __always_inline unsigned long ftrace_regs_get_instruction_pointer(struct ftrace_regs *fregs) { - return instruction_pointer(&fregs->regs); + return instruction_pointer(&arch_ftrace_regs(fregs)->regs); } static __always_inline void ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, unsigned long ip) { - instruction_pointer_set(&fregs->regs, ip); + instruction_pointer_set(&arch_ftrace_regs(fregs)->regs, ip); } #define ftrace_regs_get_argument(fregs, n) \ - regs_get_kernel_argument(&(fregs)->regs, n) + regs_get_kernel_argument(&arch_ftrace_regs(fregs)->regs, n) #define ftrace_regs_get_stack_pointer(fregs) \ - kernel_stack_pointer(&(fregs)->regs) + kernel_stack_pointer(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_return_value(fregs) \ - regs_return_value(&(fregs)->regs) + regs_return_value(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_set_return_value(fregs, ret) \ - regs_set_return_value(&(fregs)->regs, ret) + regs_set_return_value(&arch_ftrace_regs(fregs)->regs, ret) #define ftrace_override_function_with_return(fregs) \ - override_function_with_return(&(fregs)->regs) + override_function_with_return(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_query_register_offset(name) \ regs_query_register_offset(name) @@ -90,7 +92,7 @@ __arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr) } #define arch_ftrace_set_direct_caller(fregs, addr) \ - __arch_ftrace_set_direct_caller(&(fregs)->regs, addr) + __arch_ftrace_set_direct_caller(&arch_ftrace_regs(fregs)->regs, addr) #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ #endif diff --git a/arch/loongarch/kernel/ftrace_dyn.c b/arch/loongarch/kernel/ftrace_dyn.c index bff058317062..18056229e22e 100644 --- a/arch/loongarch/kernel/ftrace_dyn.c +++ b/arch/loongarch/kernel/ftrace_dyn.c @@ -241,7 +241,7 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent) void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) { - struct pt_regs *regs = &fregs->regs; + struct pt_regs *regs = &arch_ftrace_regs(fregs)->regs; unsigned long *parent = (unsigned long *)®s->regs[1]; prepare_ftrace_return(ip, (unsigned long *)parent); diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 559560286e6d..e299fd47d201 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -32,39 +32,42 @@ struct dyn_arch_ftrace { int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); #define ftrace_init_nop ftrace_init_nop -struct ftrace_regs { +struct ftrace_regs; +#define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs)) + +struct __arch_ftrace_regs { struct pt_regs regs; }; static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs) { /* We clear regs.msr in ftrace_call */ - return fregs->regs.msr ? &fregs->regs : NULL; + return arch_ftrace_regs(fregs)->regs.msr ? &arch_ftrace_regs(fregs)->regs : NULL; } static __always_inline void ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, unsigned long ip) { - regs_set_return_ip(&fregs->regs, ip); + regs_set_return_ip(&arch_ftrace_regs(fregs)->regs, ip); } static __always_inline unsigned long ftrace_regs_get_instruction_pointer(struct ftrace_regs *fregs) { - return instruction_pointer(&fregs->regs); + return instruction_pointer(&arch_ftrace_regs(fregs)->regs); } #define ftrace_regs_get_argument(fregs, n) \ - regs_get_kernel_argument(&(fregs)->regs, n) + regs_get_kernel_argument(&arch_ftrace_regs(fregs)->regs, n) #define ftrace_regs_get_stack_pointer(fregs) \ - kernel_stack_pointer(&(fregs)->regs) + kernel_stack_pointer(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_return_value(fregs) \ - regs_return_value(&(fregs)->regs) + regs_return_value(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_set_return_value(fregs, ret) \ - regs_set_return_value(&(fregs)->regs, ret) + regs_set_return_value(&arch_ftrace_regs(fregs)->regs, ret) #define ftrace_override_function_with_return(fregs) \ - override_function_with_return(&(fregs)->regs) + override_function_with_return(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_query_register_offset(name) \ regs_query_register_offset(name) diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index d8d6b4fd9a14..df41f4a7c738 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -421,7 +421,7 @@ int __init ftrace_dyn_arch_init(void) void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) { - unsigned long sp = fregs->regs.gpr[1]; + unsigned long sp = arch_ftrace_regs(fregs)->regs.gpr[1]; int bit; if (unlikely(ftrace_graph_is_dead())) @@ -439,6 +439,6 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, ftrace_test_recursion_unlock(bit); out: - fregs->regs.link = parent_ip; + arch_ftrace_regs(fregs)->regs.link = parent_ip; } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.c b/arch/powerpc/kernel/trace/ftrace_64_pg.c index 12fab1803bcf..d3c5552e4984 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_pg.c +++ b/arch/powerpc/kernel/trace/ftrace_64_pg.c @@ -829,7 +829,7 @@ out: void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) { - fregs->regs.link = __prepare_ftrace_return(parent_ip, ip, fregs->regs.gpr[1]); + arch_ftrace_regs(fregs)->regs.link = __prepare_ftrace_return(parent_ip, ip, arch_ftrace_regs(fregs)->regs.gpr[1]); } #else unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index 2cddd79ff21b..c6bcdff105b5 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -126,7 +126,10 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); #ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS #define arch_ftrace_get_regs(regs) NULL struct ftrace_ops; -struct ftrace_regs { +struct ftrace_regs; +#define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs)) + +struct __arch_ftrace_regs { unsigned long epc; unsigned long ra; unsigned long sp; @@ -150,42 +153,42 @@ struct ftrace_regs { static __always_inline unsigned long ftrace_regs_get_instruction_pointer(const struct ftrace_regs *fregs) { - return fregs->epc; + return arch_ftrace_regs(fregs)->epc; } static __always_inline void ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, unsigned long pc) { - fregs->epc = pc; + arch_ftrace_regs(fregs)->epc = pc; } static __always_inline unsigned long ftrace_regs_get_stack_pointer(const struct ftrace_regs *fregs) { - return fregs->sp; + return arch_ftrace_regs(fregs)->sp; } static __always_inline unsigned long ftrace_regs_get_argument(struct ftrace_regs *fregs, unsigned int n) { if (n < 8) - return fregs->args[n]; + return arch_ftrace_regs(fregs)->args[n]; return 0; } static __always_inline unsigned long ftrace_regs_get_return_value(const struct ftrace_regs *fregs) { - return fregs->a0; + return arch_ftrace_regs(fregs)->a0; } static __always_inline void ftrace_regs_set_return_value(struct ftrace_regs *fregs, unsigned long ret) { - fregs->a0 = ret; + arch_ftrace_regs(fregs)->a0 = ret; } static __always_inline void ftrace_override_function_with_return(struct ftrace_regs *fregs) { - fregs->epc = fregs->ra; + arch_ftrace_regs(fregs)->epc = arch_ftrace_regs(fregs)->ra; } int ftrace_regs_query_register_offset(const char *name); @@ -196,7 +199,7 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs, unsigned long addr) { - fregs->t1 = addr; + arch_ftrace_regs(fregs)->t1 = addr; } #endif /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */ diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c index e94180ba432f..f6f5a277ba9d 100644 --- a/arch/riscv/kernel/asm-offsets.c +++ b/arch/riscv/kernel/asm-offsets.c @@ -498,19 +498,19 @@ void asm_offsets(void) OFFSET(STACKFRAME_RA, stackframe, ra); #ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS - DEFINE(FREGS_SIZE_ON_STACK, ALIGN(sizeof(struct ftrace_regs), STACK_ALIGN)); - DEFINE(FREGS_EPC, offsetof(struct ftrace_regs, epc)); - DEFINE(FREGS_RA, offsetof(struct ftrace_regs, ra)); - DEFINE(FREGS_SP, offsetof(struct ftrace_regs, sp)); - DEFINE(FREGS_S0, offsetof(struct ftrace_regs, s0)); - DEFINE(FREGS_T1, offsetof(struct ftrace_regs, t1)); - DEFINE(FREGS_A0, offsetof(struct ftrace_regs, a0)); - DEFINE(FREGS_A1, offsetof(struct ftrace_regs, a1)); - DEFINE(FREGS_A2, offsetof(struct ftrace_regs, a2)); - DEFINE(FREGS_A3, offsetof(struct ftrace_regs, a3)); - DEFINE(FREGS_A4, offsetof(struct ftrace_regs, a4)); - DEFINE(FREGS_A5, offsetof(struct ftrace_regs, a5)); - DEFINE(FREGS_A6, offsetof(struct ftrace_regs, a6)); - DEFINE(FREGS_A7, offsetof(struct ftrace_regs, a7)); + DEFINE(FREGS_SIZE_ON_STACK, ALIGN(sizeof(struct __arch_ftrace_regs), STACK_ALIGN)); + DEFINE(FREGS_EPC, offsetof(struct __arch_ftrace_regs, epc)); + DEFINE(FREGS_RA, offsetof(struct __arch_ftrace_regs, ra)); + DEFINE(FREGS_SP, offsetof(struct __arch_ftrace_regs, sp)); + DEFINE(FREGS_S0, offsetof(struct __arch_ftrace_regs, s0)); + DEFINE(FREGS_T1, offsetof(struct __arch_ftrace_regs, t1)); + DEFINE(FREGS_A0, offsetof(struct __arch_ftrace_regs, a0)); + DEFINE(FREGS_A1, offsetof(struct __arch_ftrace_regs, a1)); + DEFINE(FREGS_A2, offsetof(struct __arch_ftrace_regs, a2)); + DEFINE(FREGS_A3, offsetof(struct __arch_ftrace_regs, a3)); + DEFINE(FREGS_A4, offsetof(struct __arch_ftrace_regs, a4)); + DEFINE(FREGS_A5, offsetof(struct __arch_ftrace_regs, a5)); + DEFINE(FREGS_A6, offsetof(struct __arch_ftrace_regs, a6)); + DEFINE(FREGS_A7, offsetof(struct __arch_ftrace_regs, a7)); #endif } diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 4b95c574fd04..5081ad886841 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -214,7 +214,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) { - prepare_ftrace_return(&fregs->ra, ip, fregs->s0); + prepare_ftrace_return(&arch_ftrace_regs(fregs)->ra, ip, arch_ftrace_regs(fregs)->s0); } #else /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */ extern void ftrace_graph_call(void); diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index 406746666eb7..1498d0a9c762 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -51,13 +51,16 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) return addr; } -struct ftrace_regs { +struct ftrace_regs; +#define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs)) + +struct __arch_ftrace_regs { struct pt_regs regs; }; static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs) { - struct pt_regs *regs = &fregs->regs; + struct pt_regs *regs = &arch_ftrace_regs(fregs)->regs; if (test_pt_regs_flag(regs, PIF_FTRACE_FULL_REGS)) return regs; @@ -84,26 +87,26 @@ static __always_inline unsigned long fgraph_ret_regs_frame_pointer(struct fgraph static __always_inline unsigned long ftrace_regs_get_instruction_pointer(const struct ftrace_regs *fregs) { - return fregs->regs.psw.addr; + return arch_ftrace_regs(fregs)->regs.psw.addr; } static __always_inline void ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, unsigned long ip) { - fregs->regs.psw.addr = ip; + arch_ftrace_regs(fregs)->regs.psw.addr = ip; } #define ftrace_regs_get_argument(fregs, n) \ - regs_get_kernel_argument(&(fregs)->regs, n) + regs_get_kernel_argument(&arch_ftrace_regs(fregs)->regs, n) #define ftrace_regs_get_stack_pointer(fregs) \ - kernel_stack_pointer(&(fregs)->regs) + kernel_stack_pointer(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_return_value(fregs) \ - regs_return_value(&(fregs)->regs) + regs_return_value(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_set_return_value(fregs, ret) \ - regs_set_return_value(&(fregs)->regs, ret) + regs_set_return_value(&arch_ftrace_regs(fregs)->regs, ret) #define ftrace_override_function_with_return(fregs) \ - override_function_with_return(&(fregs)->regs) + override_function_with_return(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_query_register_offset(name) \ regs_query_register_offset(name) @@ -117,7 +120,7 @@ ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, */ static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs, unsigned long addr) { - struct pt_regs *regs = &fregs->regs; + struct pt_regs *regs = &arch_ftrace_regs(fregs)->regs; regs->orig_gpr2 = addr; } #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 5529248d84fb..db9659980175 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -184,8 +184,8 @@ int main(void) OFFSET(__FGRAPH_RET_FP, fgraph_ret_regs, fp); DEFINE(__FGRAPH_RET_SIZE, sizeof(struct fgraph_ret_regs)); #endif - OFFSET(__FTRACE_REGS_PT_REGS, ftrace_regs, regs); - DEFINE(__FTRACE_REGS_SIZE, sizeof(struct ftrace_regs)); + OFFSET(__FTRACE_REGS_PT_REGS, __arch_ftrace_regs, regs); + DEFINE(__FTRACE_REGS_SIZE, sizeof(struct __arch_ftrace_regs)); OFFSET(__PCPU_FLAGS, pcpu, flags); return 0; diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 0b6e62d1d8b8..51439a71e392 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -318,7 +318,7 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, if (bit < 0) return; - kmsan_unpoison_memory(fregs, sizeof(*fregs)); + kmsan_unpoison_memory(fregs, ftrace_regs_size()); regs = ftrace_get_regs(fregs); p = get_kprobe((kprobe_opcode_t *)ip); if (!regs || unlikely(!p) || kprobe_disabled(p)) diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index 8b7f981e6f34..6e42100875e7 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -270,9 +270,9 @@ static void notrace __used test_unwind_ftrace_handler(unsigned long ip, struct ftrace_ops *fops, struct ftrace_regs *fregs) { - struct unwindme *u = (struct unwindme *)fregs->regs.gprs[2]; + struct unwindme *u = (struct unwindme *)arch_ftrace_regs(fregs)->regs.gprs[2]; - u->ret = test_unwind(NULL, (u->flags & UWM_REGS) ? &fregs->regs : NULL, + u->ret = test_unwind(NULL, (u->flags & UWM_REGS) ? &arch_ftrace_regs(fregs)->regs : NULL, (u->flags & UWM_SP) ? u->sp : 0); } diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 0152a81d9b4a..87943f7a299b 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -33,7 +33,10 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) } #ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS -struct ftrace_regs { +struct ftrace_regs; +#define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs)) + +struct __arch_ftrace_regs { struct pt_regs regs; }; @@ -41,27 +44,27 @@ static __always_inline struct pt_regs * arch_ftrace_get_regs(struct ftrace_regs *fregs) { /* Only when FL_SAVE_REGS is set, cs will be non zero */ - if (!fregs->regs.cs) + if (!arch_ftrace_regs(fregs)->regs.cs) return NULL; - return &fregs->regs; + return &arch_ftrace_regs(fregs)->regs; } #define ftrace_regs_set_instruction_pointer(fregs, _ip) \ - do { (fregs)->regs.ip = (_ip); } while (0) + do { arch_ftrace_regs(fregs)->regs.ip = (_ip); } while (0) #define ftrace_regs_get_instruction_pointer(fregs) \ - ((fregs)->regs.ip) + arch_ftrace_regs(fregs)->regs.ip) #define ftrace_regs_get_argument(fregs, n) \ - regs_get_kernel_argument(&(fregs)->regs, n) + regs_get_kernel_argument(&arch_ftrace_regs(fregs)->regs, n) #define ftrace_regs_get_stack_pointer(fregs) \ - kernel_stack_pointer(&(fregs)->regs) + kernel_stack_pointer(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_return_value(fregs) \ - regs_return_value(&(fregs)->regs) + regs_return_value(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_set_return_value(fregs, ret) \ - regs_set_return_value(&(fregs)->regs, ret) + regs_set_return_value(&arch_ftrace_regs(fregs)->regs, ret) #define ftrace_override_function_with_return(fregs) \ - override_function_with_return(&(fregs)->regs) + override_function_with_return(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_query_register_offset(name) \ regs_query_register_offset(name) @@ -88,7 +91,7 @@ __arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr) regs->orig_ax = addr; } #define arch_ftrace_set_direct_caller(fregs, addr) \ - __arch_ftrace_set_direct_caller(&(fregs)->regs, addr) + __arch_ftrace_set_direct_caller(&arch_ftrace_regs(fregs)->regs, addr) #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ #ifdef CONFIG_DYNAMIC_FTRACE diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 8da0e66ca22d..adb09f78edb2 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -647,7 +647,7 @@ void prepare_ftrace_return(unsigned long ip, unsigned long *parent, void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) { - struct pt_regs *regs = &fregs->regs; + struct pt_regs *regs = &arch_ftrace_regs(fregs)->regs; unsigned long *stack = (unsigned long *)kernel_stack_pointer(regs); prepare_ftrace_return(ip, (unsigned long *)stack, 0); diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 4c7dd5e58c9f..66f10291a0b2 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -115,8 +115,6 @@ static inline int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *val extern int ftrace_enabled; -#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS - /** * ftrace_regs - ftrace partial/optimal register set * @@ -142,11 +140,28 @@ extern int ftrace_enabled; * * NOTE: user *must not* access regs directly, only do it via APIs, because * the member can be changed according to the architecture. + * This is why the structure is empty here, so that nothing accesses + * the ftrace_regs directly. */ struct ftrace_regs { + /* Nothing to see here, use the accessor functions! */ +}; + +#define ftrace_regs_size() sizeof(struct __arch_ftrace_regs) + +#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS + +struct __arch_ftrace_regs { struct pt_regs regs; }; -#define arch_ftrace_get_regs(fregs) (&(fregs)->regs) + +struct ftrace_regs; +#define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs)) + +static inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs) +{ + return &arch_ftrace_regs(fregs)->regs; +} /* * ftrace_regs_set_instruction_pointer() is to be defined by the architecture diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index cae388122ca8..e9fd4fb2769e 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -7943,7 +7943,7 @@ out: void arch_ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) { - kmsan_unpoison_memory(fregs, sizeof(*fregs)); + kmsan_unpoison_memory(fregs, ftrace_regs_size()); __ftrace_ops_list_func(ip, parent_ip, NULL, fregs); } #else -- cgit v1.2.3 From 483c5c2bc6b1c0d17214e2672032def605ff2ba9 Mon Sep 17 00:00:00 2001 From: Yanteng Si Date: Fri, 20 Sep 2024 13:34:23 +0800 Subject: serial: clean up uart_info Since commit ebd2c8f6d2ec ("serial: kill off uart_info") has removed uart_info, the uart_info declaration looks lonely, let it go. Signed-off-by: Yanteng Si Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240920053423.1373354-1-siyanteng@cqsoftware.com.cn Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_data/sa11x0-serial.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/platform_data/sa11x0-serial.h b/include/linux/platform_data/sa11x0-serial.h index 8b79ab08af45..a88096bc74e4 100644 --- a/include/linux/platform_data/sa11x0-serial.h +++ b/include/linux/platform_data/sa11x0-serial.h @@ -10,7 +10,6 @@ #define SA11X0_SERIAL_H struct uart_port; -struct uart_info; /* * This is a temporary structure for registering these -- cgit v1.2.3 From 7738a7ab9d12c5371ed97114ee2132d4512e9fd5 Mon Sep 17 00:00:00 2001 From: Parker Newman Date: Wed, 2 Oct 2024 11:12:33 -0400 Subject: misc: eeprom: eeprom_93cx6: Add quirk for extra read clock cycle Add a quirk similar to eeprom_93xx46 to add an extra clock cycle before reading data from the EEPROM. The 93Cx6 family of EEPROMs output a "dummy 0 bit" between the writing of the op-code/address from the host to the EEPROM and the reading of the actual data from the EEPROM. More info can be found on page 6 of the AT93C46 datasheet (linked below). Similar notes are found in other 93xx6 datasheets. In summary the read operation for a 93Cx6 EEPROM is: Write to EEPROM: 110[A5-A0] (9 bits) Read from EEPROM: 0[D15-D0] (17 bits) Where: 110 is the start bit and READ OpCode [A5-A0] is the address to read from 0 is a "dummy bit" preceding the actual data [D15-D0] is the actual data. Looking at the READ timing diagrams in the 93Cx6 datasheets the dummy bit should be clocked out on the last address bit clock cycle meaning it should be discarded naturally. However, depending on the hardware configuration sometimes this dummy bit is not discarded. This is the case with Exar PCI UARTs which require an extra clock cycle between sending the address and reading the data. Datasheet: https://ww1.microchip.com/downloads/en/DeviceDoc/Atmel-5193-SEEPROM-AT93C46D-Datasheet.pdf Reviewed-by: Andy Shevchenko Signed-off-by: Parker Newman Link: https://lore.kernel.org/r/0f23973efefccd2544705a0480b4ad4c2353e407.1727880931.git.pnewman@connecttech.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/eeprom/eeprom_93cx6.c | 10 ++++++++++ include/linux/eeprom_93cx6.h | 11 +++++++++++ 2 files changed, 21 insertions(+) (limited to 'include') diff --git a/drivers/misc/eeprom/eeprom_93cx6.c b/drivers/misc/eeprom/eeprom_93cx6.c index 9627294fe3e9..4c9827fe9217 100644 --- a/drivers/misc/eeprom/eeprom_93cx6.c +++ b/drivers/misc/eeprom/eeprom_93cx6.c @@ -186,6 +186,11 @@ void eeprom_93cx6_read(struct eeprom_93cx6 *eeprom, const u8 word, eeprom_93cx6_write_bits(eeprom, command, PCI_EEPROM_WIDTH_OPCODE + eeprom->width); + if (has_quirk_extra_read_cycle(eeprom)) { + eeprom_93cx6_pulse_high(eeprom); + eeprom_93cx6_pulse_low(eeprom); + } + /* * Read the requested 16 bits. */ @@ -252,6 +257,11 @@ void eeprom_93cx6_readb(struct eeprom_93cx6 *eeprom, const u8 byte, eeprom_93cx6_write_bits(eeprom, command, PCI_EEPROM_WIDTH_OPCODE + eeprom->width + 1); + if (has_quirk_extra_read_cycle(eeprom)) { + eeprom_93cx6_pulse_high(eeprom); + eeprom_93cx6_pulse_low(eeprom); + } + /* * Read the requested 8 bits. */ diff --git a/include/linux/eeprom_93cx6.h b/include/linux/eeprom_93cx6.h index c860c72a921d..3a485cc0e0fa 100644 --- a/include/linux/eeprom_93cx6.h +++ b/include/linux/eeprom_93cx6.h @@ -11,6 +11,8 @@ Supported chipsets: 93c46, 93c56 and 93c66. */ +#include + /* * EEPROM operation defines. */ @@ -34,6 +36,7 @@ * @register_write(struct eeprom_93cx6 *eeprom): handler to * write to the eeprom register by using all reg_* fields. * @width: eeprom width, should be one of the PCI_EEPROM_WIDTH_* defines + * @quirks: eeprom or controller quirks * @drive_data: Set if we're driving the data line. * @reg_data_in: register field to indicate data input * @reg_data_out: register field to indicate data output @@ -50,6 +53,9 @@ struct eeprom_93cx6 { void (*register_write)(struct eeprom_93cx6 *eeprom); int width; + unsigned int quirks; +/* Some EEPROMs require an extra clock cycle before reading */ +#define PCI_EEPROM_QUIRK_EXTRA_READ_CYCLE BIT(0) char drive_data; char reg_data_in; @@ -71,3 +77,8 @@ extern void eeprom_93cx6_wren(struct eeprom_93cx6 *eeprom, bool enable); extern void eeprom_93cx6_write(struct eeprom_93cx6 *eeprom, u8 addr, u16 data); + +static inline bool has_quirk_extra_read_cycle(struct eeprom_93cx6 *eeprom) +{ + return eeprom->quirks & PCI_EEPROM_QUIRK_EXTRA_READ_CYCLE; +} -- cgit v1.2.3 From 11dad94b50263bbe87d015041c77be61c8c44161 Mon Sep 17 00:00:00 2001 From: Andrew Kreimer Date: Thu, 10 Oct 2024 12:13:55 +0300 Subject: phy: sun4i-usb: Fix a typo Fix a typo in comments: wether -> whether. Signed-off-by: Andrew Kreimer Acked-by: Chen-Yu Tsai Link: https://lore.kernel.org/r/20241010091355.8271-1-algonell@gmail.com Signed-off-by: Vinod Koul --- include/linux/phy/phy-sun4i-usb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/phy/phy-sun4i-usb.h b/include/linux/phy/phy-sun4i-usb.h index 91eb755ee73b..f3e7b13608e4 100644 --- a/include/linux/phy/phy-sun4i-usb.h +++ b/include/linux/phy/phy-sun4i-usb.h @@ -11,7 +11,7 @@ /** * sun4i_usb_phy_set_squelch_detect() - Enable/disable squelch detect * @phy: reference to a sun4i usb phy - * @enabled: wether to enable or disable squelch detect + * @enabled: whether to enable or disable squelch detect */ void sun4i_usb_phy_set_squelch_detect(struct phy *phy, bool enabled); -- cgit v1.2.3 From eac79786c7397925149a1bfc4bb704777cd42a99 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 7 Oct 2024 20:12:40 +0800 Subject: ASoC: SOF: Intel: hda-mlink: expose unlocked interrupt enable routine When the eml_lock is already taken, we need an unlocked version. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Liam Girdwood Signed-off-by: Bard Liao Link: https://patch.msgid.link/20241007121241.30914-2-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- include/sound/hda-mlink.h | 4 ++++ sound/soc/sof/intel/hda-mlink.c | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+) (limited to 'include') diff --git a/include/sound/hda-mlink.h b/include/sound/hda-mlink.h index 9ced94686ce3..6774f4b9e5fc 100644 --- a/include/sound/hda-mlink.h +++ b/include/sound/hda-mlink.h @@ -15,6 +15,7 @@ int hda_bus_ml_init(struct hdac_bus *bus); void hda_bus_ml_free(struct hdac_bus *bus); int hdac_bus_eml_get_count(struct hdac_bus *bus, bool alt, int elid); +void hdac_bus_eml_enable_interrupt_unlocked(struct hdac_bus *bus, bool alt, int elid, bool enable); void hdac_bus_eml_enable_interrupt(struct hdac_bus *bus, bool alt, int elid, bool enable); bool hdac_bus_eml_check_interrupt(struct hdac_bus *bus, bool alt, int elid); @@ -71,6 +72,9 @@ static inline void hda_bus_ml_free(struct hdac_bus *bus) { } static inline int hdac_bus_eml_get_count(struct hdac_bus *bus, bool alt, int elid) { return 0; } +static inline void +hdac_bus_eml_enable_interrupt_unlocked(struct hdac_bus *bus, bool alt, int elid, bool enable) { } + static inline void hdac_bus_eml_enable_interrupt(struct hdac_bus *bus, bool alt, int elid, bool enable) { } diff --git a/sound/soc/sof/intel/hda-mlink.c b/sound/soc/sof/intel/hda-mlink.c index 9a3559c78b62..46f89d6d06f8 100644 --- a/sound/soc/sof/intel/hda-mlink.c +++ b/sound/soc/sof/intel/hda-mlink.c @@ -481,6 +481,24 @@ int hdac_bus_eml_get_count(struct hdac_bus *bus, bool alt, int elid) } EXPORT_SYMBOL_NS(hdac_bus_eml_get_count, SND_SOC_SOF_HDA_MLINK); +void hdac_bus_eml_enable_interrupt_unlocked(struct hdac_bus *bus, bool alt, int elid, bool enable) +{ + struct hdac_ext2_link *h2link; + struct hdac_ext_link *hlink; + + h2link = find_ext2_link(bus, alt, elid); + if (!h2link) + return; + + if (!h2link->intc) + return; + + hlink = &h2link->hext_link; + + hdaml_link_enable_interrupt(hlink->ml_addr + AZX_REG_ML_LCTL, enable); +} +EXPORT_SYMBOL_NS(hdac_bus_eml_enable_interrupt_unlocked, SND_SOC_SOF_HDA_MLINK); + void hdac_bus_eml_enable_interrupt(struct hdac_bus *bus, bool alt, int elid, bool enable) { struct hdac_ext2_link *h2link; -- cgit v1.2.3 From d677aebd663ddc287f2b2bda098474694a0ca875 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 10 Oct 2024 03:41:00 +0000 Subject: tcp: move sysctl_tcp_l3mdev_accept to netns_ipv4_read_rx sysctl_tcp_l3mdev_accept is read from TCP receive fast path from tcp_v6_early_demux(), __inet6_lookup_established, inet_request_bound_dev_if(). Move it to netns_ipv4_read_rx. Remove the '#ifdef CONFIG_NET_L3_MASTER_DEV' that was guarding its definition. Note this adds a hole of three bytes that could be filled later. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Cc: Wei Wang Cc: Coco Li Link: https://patch.msgid.link/20241010034100.320832-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst | 2 +- include/net/netns/ipv4.h | 5 ++--- net/core/net_namespace.c | 4 +++- 3 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst b/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst index 392e08a6ec04..629da6dc6d74 100644 --- a/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst +++ b/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst @@ -59,7 +59,7 @@ u8 sysctl_udp_early_demux u8 sysctl_nexthop_compat_mode u8 sysctl_fwmark_reflect u8 sysctl_tcp_fwmark_accept -u8 sysctl_tcp_l3mdev_accept +u8 sysctl_tcp_l3mdev_accept read_mostly __inet6_lookup_established/inet_request_bound_dev_if u8 sysctl_tcp_mtu_probing int sysctl_tcp_mtu_probe_floor int sysctl_tcp_base_mss diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 66a4cffc44ee..3b1de80b5c25 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -76,6 +76,8 @@ struct netns_ipv4 { __cacheline_group_begin(netns_ipv4_read_rx); u8 sysctl_ip_early_demux; u8 sysctl_tcp_early_demux; + u8 sysctl_tcp_l3mdev_accept; + /* 3 bytes hole, try to pack */ int sysctl_tcp_reordering; int sysctl_tcp_rmem[3]; __cacheline_group_end(netns_ipv4_read_rx); @@ -151,9 +153,6 @@ struct netns_ipv4 { u8 sysctl_fwmark_reflect; u8 sysctl_tcp_fwmark_accept; -#ifdef CONFIG_NET_L3_MASTER_DEV - u8 sysctl_tcp_l3mdev_accept; -#endif u8 sysctl_tcp_mtu_probing; int sysctl_tcp_mtu_probe_floor; int sysctl_tcp_base_mss; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index a5bc1fd8b034..0a86aff17f51 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -1159,11 +1159,13 @@ static void __init netns_ipv4_struct_check(void) sysctl_ip_early_demux); CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx, sysctl_tcp_early_demux); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx, + sysctl_tcp_l3mdev_accept); CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx, sysctl_tcp_reordering); CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx, sysctl_tcp_rmem); - CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_rx, 18); + CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_rx, 22); } #endif -- cgit v1.2.3 From ed870e35db660724ff0d815d9a3ef9a6247ffbab Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Wed, 9 Oct 2024 10:32:09 -0700 Subject: lsm: add the lsm_prop data structure When more than one security module is exporting data to audit and networking sub-systems a single 32 bit integer is no longer sufficient to represent the data. Add a structure to be used instead. The lsm_prop structure definition is intended to keep the LSM specific information private to the individual security modules. The module specific information is included in a new set of header files under include/lsm. Each security module is allowed to define the information included for its use in the lsm_prop. SELinux includes a u32 secid. Smack includes a pointer into its global label list. The conditional compilation based on feature inclusion is contained in the include/lsm files. Cc: apparmor@lists.ubuntu.com Cc: bpf@vger.kernel.org Cc: selinux@vger.kernel.org Cc: linux-security-module@vger.kernel.org Suggested-by: Paul Moore Signed-off-by: Casey Schaufler Acked-by: John Johansen [PM: added include/linux/lsm/ to MAINTAINERS, subj tweak] Signed-off-by: Paul Moore --- MAINTAINERS | 1 + include/linux/lsm/apparmor.h | 17 +++++++++++++++++ include/linux/lsm/bpf.h | 16 ++++++++++++++++ include/linux/lsm/selinux.h | 16 ++++++++++++++++ include/linux/lsm/smack.h | 17 +++++++++++++++++ include/linux/security.h | 20 ++++++++++++++++++++ 6 files changed, 87 insertions(+) create mode 100644 include/linux/lsm/apparmor.h create mode 100644 include/linux/lsm/bpf.h create mode 100644 include/linux/lsm/selinux.h create mode 100644 include/linux/lsm/smack.h (limited to 'include') diff --git a/MAINTAINERS b/MAINTAINERS index c27f3190737f..f870842fad9c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20846,6 +20846,7 @@ Q: https://patchwork.kernel.org/project/linux-security-module/list B: mailto:linux-security-module@vger.kernel.org P: https://github.com/LinuxSecurityModule/kernel/blob/main/README.md T: git https://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/lsm.git +F: include/linux/lsm/ F: include/linux/lsm_audit.h F: include/linux/lsm_hook_defs.h F: include/linux/lsm_hooks.h diff --git a/include/linux/lsm/apparmor.h b/include/linux/lsm/apparmor.h new file mode 100644 index 000000000000..612cbfacb072 --- /dev/null +++ b/include/linux/lsm/apparmor.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Linux Security Module interface to other subsystems. + * AppArmor presents single pointer to an aa_label structure. + */ +#ifndef __LINUX_LSM_APPARMOR_H +#define __LINUX_LSM_APPARMOR_H + +struct aa_label; + +struct lsm_prop_apparmor { +#ifdef CONFIG_SECURITY_APPARMOR + struct aa_label *label; +#endif +}; + +#endif /* ! __LINUX_LSM_APPARMOR_H */ diff --git a/include/linux/lsm/bpf.h b/include/linux/lsm/bpf.h new file mode 100644 index 000000000000..8106e206fcef --- /dev/null +++ b/include/linux/lsm/bpf.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Linux Security Module interface to other subsystems. + * BPF may present a single u32 value. + */ +#ifndef __LINUX_LSM_BPF_H +#define __LINUX_LSM_BPF_H +#include + +struct lsm_prop_bpf { +#ifdef CONFIG_BPF_LSM + u32 secid; +#endif +}; + +#endif /* ! __LINUX_LSM_BPF_H */ diff --git a/include/linux/lsm/selinux.h b/include/linux/lsm/selinux.h new file mode 100644 index 000000000000..9455a6b5b910 --- /dev/null +++ b/include/linux/lsm/selinux.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Linux Security Module interface to other subsystems. + * SELinux presents a single u32 value which is known as a secid. + */ +#ifndef __LINUX_LSM_SELINUX_H +#define __LINUX_LSM_SELINUX_H +#include + +struct lsm_prop_selinux { +#ifdef CONFIG_SECURITY_SELINUX + u32 secid; +#endif +}; + +#endif /* ! __LINUX_LSM_SELINUX_H */ diff --git a/include/linux/lsm/smack.h b/include/linux/lsm/smack.h new file mode 100644 index 000000000000..ff730dd7a734 --- /dev/null +++ b/include/linux/lsm/smack.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Linux Security Module interface to other subsystems. + * Smack presents a pointer into the global Smack label list. + */ +#ifndef __LINUX_LSM_SMACK_H +#define __LINUX_LSM_SMACK_H + +struct smack_known; + +struct lsm_prop_smack { +#ifdef CONFIG_SECURITY_SMACK + struct smack_known *skp; +#endif +}; + +#endif /* ! __LINUX_LSM_SMACK_H */ diff --git a/include/linux/security.h b/include/linux/security.h index b86ec2afc691..555249a8d121 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -34,6 +34,10 @@ #include #include #include +#include +#include +#include +#include struct linux_binprm; struct cred; @@ -152,6 +156,22 @@ enum lockdown_reason { LOCKDOWN_CONFIDENTIALITY_MAX, }; +/* scaffolding */ +struct lsm_prop_scaffold { + u32 secid; +}; + +/* + * Data exported by the security modules + */ +struct lsm_prop { + struct lsm_prop_selinux selinux; + struct lsm_prop_smack smack; + struct lsm_prop_apparmor apparmor; + struct lsm_prop_bpf bpf; + struct lsm_prop_scaffold scaffold; +}; + extern const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1]; extern u32 lsm_active_cnt; extern const struct lsm_id *lsm_idlist[]; -- cgit v1.2.3 From 870b7fdc660b38c4e1bd8bf48e62aa352ddf8f42 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Wed, 9 Oct 2024 10:32:10 -0700 Subject: lsm: use lsm_prop in security_audit_rule_match Change the secid parameter of security_audit_rule_match to a lsm_prop structure pointer. Pass the entry from the lsm_prop structure for the approprite slot to the LSM hook. Change the users of security_audit_rule_match to use the lsm_prop instead of a u32. The scaffolding function lsmprop_init() fills the structure with the value of the old secid, ensuring that it is available to the appropriate module hook. The sources of the secid, security_task_getsecid() and security_inode_getsecid(), will be converted to use the lsm_prop structure later in the series. At that point the use of lsmprop_init() is dropped. Signed-off-by: Casey Schaufler [PM: subject line tweak] Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 3 ++- include/linux/security.h | 7 ++++--- kernel/auditfilter.c | 11 +++++++---- kernel/auditsc.c | 18 ++++++++++++++---- security/apparmor/audit.c | 8 ++++++-- security/apparmor/include/audit.h | 2 +- security/integrity/ima/ima.h | 2 +- security/integrity/ima/ima_policy.c | 11 +++++++---- security/security.c | 7 ++++--- security/selinux/include/audit.h | 4 ++-- security/selinux/ss/services.c | 10 +++++++--- security/smack/smack_lsm.c | 11 ++++++++--- 12 files changed, 63 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 9eca013aa5e1..ea7f17e37756 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -416,7 +416,8 @@ LSM_HOOK(void, LSM_RET_VOID, key_post_create_or_update, struct key *keyring, LSM_HOOK(int, 0, audit_rule_init, u32 field, u32 op, char *rulestr, void **lsmrule, gfp_t gfp) LSM_HOOK(int, 0, audit_rule_known, struct audit_krule *krule) -LSM_HOOK(int, 0, audit_rule_match, u32 secid, u32 field, u32 op, void *lsmrule) +LSM_HOOK(int, 0, audit_rule_match, struct lsm_prop *prop, u32 field, u32 op, + void *lsmrule) LSM_HOOK(void, LSM_RET_VOID, audit_rule_free, void *lsmrule) #endif /* CONFIG_AUDIT */ diff --git a/include/linux/security.h b/include/linux/security.h index 555249a8d121..a4f020491e7c 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -2115,7 +2115,8 @@ static inline void security_key_post_create_or_update(struct key *keyring, int security_audit_rule_init(u32 field, u32 op, char *rulestr, void **lsmrule, gfp_t gfp); int security_audit_rule_known(struct audit_krule *krule); -int security_audit_rule_match(u32 secid, u32 field, u32 op, void *lsmrule); +int security_audit_rule_match(struct lsm_prop *prop, u32 field, u32 op, + void *lsmrule); void security_audit_rule_free(void *lsmrule); #else @@ -2131,8 +2132,8 @@ static inline int security_audit_rule_known(struct audit_krule *krule) return 0; } -static inline int security_audit_rule_match(u32 secid, u32 field, u32 op, - void *lsmrule) +static inline int security_audit_rule_match(struct lsm_prop *prop, u32 field, + u32 op, void *lsmrule) { return 0; } diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 470041c49a44..288a2092fd0d 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -1339,8 +1339,8 @@ int audit_filter(int msgtype, unsigned int listtype) for (i = 0; i < e->rule.field_count; i++) { struct audit_field *f = &e->rule.fields[i]; + struct lsm_prop prop = { }; pid_t pid; - u32 sid; switch (f->type) { case AUDIT_PID: @@ -1370,9 +1370,12 @@ int audit_filter(int msgtype, unsigned int listtype) case AUDIT_SUBJ_SEN: case AUDIT_SUBJ_CLR: if (f->lsm_rule) { - security_current_getsecid_subj(&sid); - result = security_audit_rule_match(sid, - f->type, f->op, f->lsm_rule); + /* scaffolding */ + security_current_getsecid_subj( + &prop.scaffold.secid); + result = security_audit_rule_match( + &prop, f->type, f->op, + f->lsm_rule); } break; case AUDIT_EXE: diff --git a/kernel/auditsc.c b/kernel/auditsc.c index cd57053b4a69..aaf672a962d6 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -471,6 +471,7 @@ static int audit_filter_rules(struct task_struct *tsk, const struct cred *cred; int i, need_sid = 1; u32 sid; + struct lsm_prop prop = { }; unsigned int sessionid; if (ctx && rule->prio <= ctx->prio) @@ -681,7 +682,10 @@ static int audit_filter_rules(struct task_struct *tsk, security_current_getsecid_subj(&sid); need_sid = 0; } - result = security_audit_rule_match(sid, f->type, + /* scaffolding */ + prop.scaffold.secid = sid; + result = security_audit_rule_match(&prop, + f->type, f->op, f->lsm_rule); } @@ -696,15 +700,19 @@ static int audit_filter_rules(struct task_struct *tsk, if (f->lsm_rule) { /* Find files that match */ if (name) { + /* scaffolding */ + prop.scaffold.secid = name->osid; result = security_audit_rule_match( - name->osid, + &prop, f->type, f->op, f->lsm_rule); } else if (ctx) { list_for_each_entry(n, &ctx->names_list, list) { + /* scaffolding */ + prop.scaffold.secid = n->osid; if (security_audit_rule_match( - n->osid, + &prop, f->type, f->op, f->lsm_rule)) { @@ -716,7 +724,9 @@ static int audit_filter_rules(struct task_struct *tsk, /* Find ipc objects that match */ if (!ctx || ctx->type != AUDIT_IPC) break; - if (security_audit_rule_match(ctx->ipc.osid, + /* scaffolding */ + prop.scaffold.secid = ctx->ipc.osid; + if (security_audit_rule_match(&prop, f->type, f->op, f->lsm_rule)) ++result; diff --git a/security/apparmor/audit.c b/security/apparmor/audit.c index 6b5181c668b5..87df6fa2a48d 100644 --- a/security/apparmor/audit.c +++ b/security/apparmor/audit.c @@ -264,13 +264,17 @@ int aa_audit_rule_known(struct audit_krule *rule) return 0; } -int aa_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule) +int aa_audit_rule_match(struct lsm_prop *prop, u32 field, u32 op, void *vrule) { struct aa_audit_rule *rule = vrule; struct aa_label *label; int found = 0; - label = aa_secid_to_label(sid); + /* scaffolding */ + if (!prop->apparmor.label && prop->scaffold.secid) + label = aa_secid_to_label(prop->scaffold.secid); + else + label = prop->apparmor.label; if (!label) return -ENOENT; diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h index 0c8cc86b417b..e27229349abb 100644 --- a/security/apparmor/include/audit.h +++ b/security/apparmor/include/audit.h @@ -202,6 +202,6 @@ static inline int complain_error(int error) void aa_audit_rule_free(void *vrule); int aa_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule, gfp_t gfp); int aa_audit_rule_known(struct audit_krule *rule); -int aa_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule); +int aa_audit_rule_match(struct lsm_prop *prop, u32 field, u32 op, void *vrule); #endif /* __AA_AUDIT_H */ diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h index 3c323ca213d4..cdfe8c8c7bac 100644 --- a/security/integrity/ima/ima.h +++ b/security/integrity/ima/ima.h @@ -555,7 +555,7 @@ static inline void ima_filter_rule_free(void *lsmrule) { } -static inline int ima_filter_rule_match(u32 secid, u32 field, u32 op, +static inline int ima_filter_rule_match(struct lsm_prop *prop, u32 field, u32 op, void *lsmrule) { return -EINVAL; diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index 09da8e639239..22a62e675ebc 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -635,7 +635,7 @@ static bool ima_match_rules(struct ima_rule_entry *rule, return false; for (i = 0; i < MAX_LSM_RULES; i++) { int rc = 0; - u32 osid; + struct lsm_prop prop = { }; if (!lsm_rule->lsm[i].rule) { if (!lsm_rule->lsm[i].args_p) @@ -649,15 +649,18 @@ retry: case LSM_OBJ_USER: case LSM_OBJ_ROLE: case LSM_OBJ_TYPE: - security_inode_getsecid(inode, &osid); - rc = ima_filter_rule_match(osid, lsm_rule->lsm[i].type, + /* scaffolding */ + security_inode_getsecid(inode, &prop.scaffold.secid); + rc = ima_filter_rule_match(&prop, lsm_rule->lsm[i].type, Audit_equal, lsm_rule->lsm[i].rule); break; case LSM_SUBJ_USER: case LSM_SUBJ_ROLE: case LSM_SUBJ_TYPE: - rc = ima_filter_rule_match(secid, lsm_rule->lsm[i].type, + /* scaffolding */ + prop.scaffold.secid = secid; + rc = ima_filter_rule_match(&prop, lsm_rule->lsm[i].type, Audit_equal, lsm_rule->lsm[i].rule); break; diff --git a/security/security.c b/security/security.c index 6875eb4a59fc..deab7f912e12 100644 --- a/security/security.c +++ b/security/security.c @@ -5570,7 +5570,7 @@ void security_audit_rule_free(void *lsmrule) /** * security_audit_rule_match() - Check if a label matches an audit rule - * @secid: security label + * @prop: security label * @field: LSM audit field * @op: matching operator * @lsmrule: audit rule @@ -5581,9 +5581,10 @@ void security_audit_rule_free(void *lsmrule) * Return: Returns 1 if secid matches the rule, 0 if it does not, -ERRNO on * failure. */ -int security_audit_rule_match(u32 secid, u32 field, u32 op, void *lsmrule) +int security_audit_rule_match(struct lsm_prop *prop, u32 field, u32 op, + void *lsmrule) { - return call_int_hook(audit_rule_match, secid, field, op, lsmrule); + return call_int_hook(audit_rule_match, prop, field, op, lsmrule); } #endif /* CONFIG_AUDIT */ diff --git a/security/selinux/include/audit.h b/security/selinux/include/audit.h index 168d17be7df3..c745ea2a993d 100644 --- a/security/selinux/include/audit.h +++ b/security/selinux/include/audit.h @@ -41,7 +41,7 @@ void selinux_audit_rule_free(void *rule); /** * selinux_audit_rule_match - determine if a context ID matches a rule. - * @sid: the context ID to check + * @prop: includes the context ID to check * @field: the field this rule refers to * @op: the operator the rule uses * @rule: pointer to the audit rule to check against @@ -49,7 +49,7 @@ void selinux_audit_rule_free(void *rule); * Returns 1 if the context id matches the rule, 0 if it does not, and * -errno on failure. */ -int selinux_audit_rule_match(u32 sid, u32 field, u32 op, void *rule); +int selinux_audit_rule_match(struct lsm_prop *prop, u32 field, u32 op, void *rule); /** * selinux_audit_rule_known - check to see if rule contains selinux fields. diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index a9830fbfc5c6..e0c14773a7b7 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -3635,7 +3635,7 @@ int selinux_audit_rule_known(struct audit_krule *rule) return 0; } -int selinux_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule) +int selinux_audit_rule_match(struct lsm_prop *prop, u32 field, u32 op, void *vrule) { struct selinux_state *state = &selinux_state; struct selinux_policy *policy; @@ -3661,10 +3661,14 @@ int selinux_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule) goto out; } - ctxt = sidtab_search(policy->sidtab, sid); + /* scaffolding */ + if (!prop->selinux.secid && prop->scaffold.secid) + prop->selinux.secid = prop->scaffold.secid; + + ctxt = sidtab_search(policy->sidtab, prop->selinux.secid); if (unlikely(!ctxt)) { WARN_ONCE(1, "selinux_audit_rule_match: unrecognized SID %d\n", - sid); + prop->selinux.secid); match = -ENOENT; goto out; } diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 370fd594da12..535233ad7203 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -4757,7 +4757,7 @@ static int smack_audit_rule_known(struct audit_krule *krule) /** * smack_audit_rule_match - Audit given object ? - * @secid: security id for identifying the object to test + * @prop: security id for identifying the object to test * @field: audit rule flags given from user-space * @op: required testing operator * @vrule: smack internal rule presentation @@ -4765,7 +4765,8 @@ static int smack_audit_rule_known(struct audit_krule *krule) * The core Audit hook. It's used to take the decision of * whether to audit or not to audit a given object. */ -static int smack_audit_rule_match(u32 secid, u32 field, u32 op, void *vrule) +static int smack_audit_rule_match(struct lsm_prop *prop, u32 field, u32 op, + void *vrule) { struct smack_known *skp; char *rule = vrule; @@ -4778,7 +4779,11 @@ static int smack_audit_rule_match(u32 secid, u32 field, u32 op, void *vrule) if (field != AUDIT_SUBJ_USER && field != AUDIT_OBJ_USER) return 0; - skp = smack_from_secid(secid); + /* scaffolding */ + if (!prop->smack.skp && prop->scaffold.secid) + skp = smack_from_secid(prop->scaffold.secid); + else + skp = prop->smack.skp; /* * No need to do string comparisons. If a match occurs, -- cgit v1.2.3 From 6f2f724f0e116d9ea960ff3dd645add12e60e176 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Wed, 9 Oct 2024 10:32:11 -0700 Subject: lsm: add lsmprop_to_secctx hook Add a new hook security_lsmprop_to_secctx() and its LSM specific implementations. The LSM specific code will use the lsm_prop element allocated for that module. This allows for the possibility that more than one module may be called upon to translate a secid to a string, as can occur in the audit code. Signed-off-by: Casey Schaufler [PM: subject line tweak] Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 2 ++ include/linux/security.h | 10 +++++++++- security/apparmor/include/secid.h | 2 ++ security/apparmor/lsm.c | 1 + security/apparmor/secid.c | 25 +++++++++++++++++++++++-- security/security.c | 21 +++++++++++++++++++++ security/selinux/hooks.c | 16 ++++++++++++++-- security/selinux/include/audit.h | 3 ++- security/smack/smack_lsm.c | 31 ++++++++++++++++++++++++++----- 9 files changed, 100 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index ea7f17e37756..ed6ea0b1ec57 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -294,6 +294,8 @@ LSM_HOOK(int, -EINVAL, setprocattr, const char *name, void *value, size_t size) LSM_HOOK(int, 0, ismaclabel, const char *name) LSM_HOOK(int, -EOPNOTSUPP, secid_to_secctx, u32 secid, char **secdata, u32 *seclen) +LSM_HOOK(int, -EOPNOTSUPP, lsmprop_to_secctx, struct lsm_prop *prop, + char **secdata, u32 *seclen) LSM_HOOK(int, 0, secctx_to_secid, const char *secdata, u32 seclen, u32 *secid) LSM_HOOK(void, LSM_RET_VOID, release_secctx, char *secdata, u32 seclen) LSM_HOOK(void, LSM_RET_VOID, inode_invalidate_secctx, struct inode *inode) diff --git a/include/linux/security.h b/include/linux/security.h index a4f020491e7c..f1c68e38b15d 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -535,6 +535,7 @@ int security_setprocattr(int lsmid, const char *name, void *value, size_t size); int security_netlink_send(struct sock *sk, struct sk_buff *skb); int security_ismaclabel(const char *name); int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen); +int security_lsmprop_to_secctx(struct lsm_prop *prop, char **secdata, u32 *seclen); int security_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid); void security_release_secctx(char *secdata, u32 seclen); void security_inode_invalidate_secctx(struct inode *inode); @@ -1488,7 +1489,14 @@ static inline int security_ismaclabel(const char *name) return 0; } -static inline int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) +static inline int security_secid_to_secctx(u32 secid, char **secdata, + u32 *seclen) +{ + return -EOPNOTSUPP; +} + +static inline int security_lsmprop_to_secctx(struct lsm_prop *prop, + char **secdata, u32 *seclen) { return -EOPNOTSUPP; } diff --git a/security/apparmor/include/secid.h b/security/apparmor/include/secid.h index a912a5d5d04f..cc6d1c9f4a47 100644 --- a/security/apparmor/include/secid.h +++ b/security/apparmor/include/secid.h @@ -26,6 +26,8 @@ extern int apparmor_display_secid_mode; struct aa_label *aa_secid_to_label(u32 secid); int apparmor_secid_to_secctx(u32 secid, char **secdata, u32 *seclen); +int apparmor_lsmprop_to_secctx(struct lsm_prop *prop, char **secdata, + u32 *seclen); int apparmor_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid); void apparmor_release_secctx(char *secdata, u32 seclen); diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index f5d05297d59e..a58b72ed246c 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -1517,6 +1517,7 @@ static struct security_hook_list apparmor_hooks[] __ro_after_init = { #endif LSM_HOOK_INIT(secid_to_secctx, apparmor_secid_to_secctx), + LSM_HOOK_INIT(lsmprop_to_secctx, apparmor_lsmprop_to_secctx), LSM_HOOK_INIT(secctx_to_secid, apparmor_secctx_to_secid), LSM_HOOK_INIT(release_secctx, apparmor_release_secctx), diff --git a/security/apparmor/secid.c b/security/apparmor/secid.c index 83d3d1e6d9dc..34610888559f 100644 --- a/security/apparmor/secid.c +++ b/security/apparmor/secid.c @@ -61,10 +61,10 @@ struct aa_label *aa_secid_to_label(u32 secid) return xa_load(&aa_secids, secid); } -int apparmor_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) +static int apparmor_label_to_secctx(struct aa_label *label, char **secdata, + u32 *seclen) { /* TODO: cache secctx and ref count so we don't have to recreate */ - struct aa_label *label = aa_secid_to_label(secid); int flags = FLAG_VIEW_SUBNS | FLAG_HIDDEN_UNCONFINED | FLAG_ABS_ROOT; int len; @@ -90,6 +90,27 @@ int apparmor_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) return 0; } +int apparmor_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) +{ + struct aa_label *label = aa_secid_to_label(secid); + + return apparmor_label_to_secctx(label, secdata, seclen); +} + +int apparmor_lsmprop_to_secctx(struct lsm_prop *prop, char **secdata, + u32 *seclen) +{ + struct aa_label *label; + + /* scaffolding */ + if (!prop->apparmor.label && prop->scaffold.secid) + label = aa_secid_to_label(prop->scaffold.secid); + else + label = prop->apparmor.label; + + return apparmor_label_to_secctx(label, secdata, seclen); +} + int apparmor_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid) { struct aa_label *label; diff --git a/security/security.c b/security/security.c index deab7f912e12..1842f1325e77 100644 --- a/security/security.c +++ b/security/security.c @@ -4311,6 +4311,27 @@ int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) } EXPORT_SYMBOL(security_secid_to_secctx); +/** + * security_lsmprop_to_secctx() - Convert a lsm_prop to a secctx + * @prop: lsm specific information + * @secdata: secctx + * @seclen: secctx length + * + * Convert a @prop entry to security context. If @secdata is NULL the + * length of the result will be returned in @seclen, but no @secdata + * will be returned. This does mean that the length could change between + * calls to check the length and the next call which actually allocates + * and returns the @secdata. + * + * Return: Return 0 on success, error on failure. + */ +int security_lsmprop_to_secctx(struct lsm_prop *prop, char **secdata, + u32 *seclen) +{ + return call_int_hook(lsmprop_to_secctx, prop, secdata, seclen); +} +EXPORT_SYMBOL(security_lsmprop_to_secctx); + /** * security_secctx_to_secid() - Convert a secctx to a secid * @secdata: secctx diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index fc926d3cac6e..6e88faf3c6e5 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -6601,8 +6601,19 @@ static int selinux_ismaclabel(const char *name) static int selinux_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) { - return security_sid_to_context(secid, - secdata, seclen); + return security_sid_to_context(secid, secdata, seclen); +} + +static int selinux_lsmprop_to_secctx(struct lsm_prop *prop, char **secdata, + u32 *seclen) +{ + u32 secid = prop->selinux.secid; + + /* scaffolding */ + if (!secid) + secid = prop->scaffold.secid; + + return selinux_secid_to_secctx(secid, secdata, seclen); } static int selinux_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid) @@ -7347,6 +7358,7 @@ static struct security_hook_list selinux_hooks[] __ro_after_init = { LSM_HOOK_INIT(inode_alloc_security, selinux_inode_alloc_security), LSM_HOOK_INIT(sem_alloc_security, selinux_sem_alloc_security), LSM_HOOK_INIT(secid_to_secctx, selinux_secid_to_secctx), + LSM_HOOK_INIT(lsmprop_to_secctx, selinux_lsmprop_to_secctx), LSM_HOOK_INIT(inode_getsecctx, selinux_inode_getsecctx), LSM_HOOK_INIT(sk_alloc_security, selinux_sk_alloc_security), LSM_HOOK_INIT(tun_dev_alloc_security, selinux_tun_dev_alloc_security), diff --git a/security/selinux/include/audit.h b/security/selinux/include/audit.h index c745ea2a993d..d5b0425055e4 100644 --- a/security/selinux/include/audit.h +++ b/security/selinux/include/audit.h @@ -49,7 +49,8 @@ void selinux_audit_rule_free(void *rule); * Returns 1 if the context id matches the rule, 0 if it does not, and * -errno on failure. */ -int selinux_audit_rule_match(struct lsm_prop *prop, u32 field, u32 op, void *rule); +int selinux_audit_rule_match(struct lsm_prop *prop, u32 field, u32 op, + void *rule); /** * selinux_audit_rule_known - check to see if rule contains selinux fields. diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 535233ad7203..66da7cbcc0b7 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -4768,7 +4768,7 @@ static int smack_audit_rule_known(struct audit_krule *krule) static int smack_audit_rule_match(struct lsm_prop *prop, u32 field, u32 op, void *vrule) { - struct smack_known *skp; + struct smack_known *skp = prop->smack.skp; char *rule = vrule; if (unlikely(!rule)) { @@ -4780,10 +4780,8 @@ static int smack_audit_rule_match(struct lsm_prop *prop, u32 field, u32 op, return 0; /* scaffolding */ - if (!prop->smack.skp && prop->scaffold.secid) + if (!skp && prop->scaffold.secid) skp = smack_from_secid(prop->scaffold.secid); - else - skp = prop->smack.skp; /* * No need to do string comparisons. If a match occurs, @@ -4814,7 +4812,6 @@ static int smack_ismaclabel(const char *name) return (strcmp(name, XATTR_SMACK_SUFFIX) == 0); } - /** * smack_secid_to_secctx - return the smack label for a secid * @secid: incoming integer @@ -4833,6 +4830,29 @@ static int smack_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) return 0; } +/** + * smack_lsmprop_to_secctx - return the smack label + * @prop: includes incoming Smack data + * @secdata: destination + * @seclen: how long it is + * + * Exists for audit code. + */ +static int smack_lsmprop_to_secctx(struct lsm_prop *prop, char **secdata, + u32 *seclen) +{ + struct smack_known *skp = prop->smack.skp; + + /* scaffolding */ + if (!skp && prop->scaffold.secid) + skp = smack_from_secid(prop->scaffold.secid); + + if (secdata) + *secdata = skp->smk_known; + *seclen = strlen(skp->smk_known); + return 0; +} + /** * smack_secctx_to_secid - return the secid for a smack label * @secdata: smack label @@ -5192,6 +5212,7 @@ static struct security_hook_list smack_hooks[] __ro_after_init = { LSM_HOOK_INIT(ismaclabel, smack_ismaclabel), LSM_HOOK_INIT(secid_to_secctx, smack_secid_to_secctx), + LSM_HOOK_INIT(lsmprop_to_secctx, smack_lsmprop_to_secctx), LSM_HOOK_INIT(secctx_to_secid, smack_secctx_to_secid), LSM_HOOK_INIT(inode_notifysecctx, smack_inode_notifysecctx), LSM_HOOK_INIT(inode_setsecctx, smack_inode_setsecctx), -- cgit v1.2.3 From 7183abccd8ac2c486363e267b5d84032818eb725 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Wed, 9 Oct 2024 10:32:12 -0700 Subject: audit: maintain an lsm_prop in audit_context Replace the secid value stored in struct audit_context with a struct lsm_prop. Change the code that uses this value to accommodate the change. security_audit_rule_match() expects a lsm_prop, so existing scaffolding can be removed. A call to security_secid_to_secctx() is changed to security_lsmprop_to_secctx(). The call to security_ipc_getsecid() is scaffolded. A new function lsmprop_is_set() is introduced to identify whether an lsm_prop contains a non-zero value. Signed-off-by: Casey Schaufler [PM: subject line tweak, fix lsmprop_is_set() typo] Signed-off-by: Paul Moore --- include/linux/security.h | 24 ++++++++++++++++++++++++ kernel/audit.h | 3 ++- kernel/auditsc.c | 19 ++++++++----------- 3 files changed, 34 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/security.h b/include/linux/security.h index f1c68e38b15d..c029bfe2c5bb 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -291,6 +291,19 @@ static inline const char *kernel_load_data_id_str(enum kernel_load_data_id id) #ifdef CONFIG_SECURITY +/** + * lsmprop_is_set - report if there is a value in the lsm_prop + * @prop: Pointer to the exported LSM data + * + * Returns true if there is a value set, false otherwise + */ +static inline bool lsmprop_is_set(struct lsm_prop *prop) +{ + const struct lsm_prop empty = {}; + + return !!memcmp(prop, &empty, sizeof(*prop)); +} + int call_blocking_lsm_notifier(enum lsm_event event, void *data); int register_blocking_lsm_notifier(struct notifier_block *nb); int unregister_blocking_lsm_notifier(struct notifier_block *nb); @@ -552,6 +565,17 @@ int security_bdev_setintegrity(struct block_device *bdev, size_t size); #else /* CONFIG_SECURITY */ +/** + * lsmprop_is_set - report if there is a value in the lsm_prop + * @prop: Pointer to the exported LSM data + * + * Returns true if there is a value set, false otherwise + */ +static inline bool lsmprop_is_set(struct lsm_prop *prop) +{ + return false; +} + static inline int call_blocking_lsm_notifier(enum lsm_event event, void *data) { return 0; diff --git a/kernel/audit.h b/kernel/audit.h index a60d2840559e..d14924a887c9 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -160,7 +161,7 @@ struct audit_context { kuid_t uid; kgid_t gid; umode_t mode; - u32 osid; + struct lsm_prop oprop; int has_perm; uid_t perm_uid; gid_t perm_gid; diff --git a/kernel/auditsc.c b/kernel/auditsc.c index aaf672a962d6..d92326215f65 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -724,9 +724,7 @@ static int audit_filter_rules(struct task_struct *tsk, /* Find ipc objects that match */ if (!ctx || ctx->type != AUDIT_IPC) break; - /* scaffolding */ - prop.scaffold.secid = ctx->ipc.osid; - if (security_audit_rule_match(&prop, + if (security_audit_rule_match(&ctx->ipc.oprop, f->type, f->op, f->lsm_rule)) ++result; @@ -1394,19 +1392,17 @@ static void show_special(struct audit_context *context, int *call_panic) audit_log_format(ab, " a%d=%lx", i, context->socketcall.args[i]); break; } - case AUDIT_IPC: { - u32 osid = context->ipc.osid; - + case AUDIT_IPC: audit_log_format(ab, "ouid=%u ogid=%u mode=%#ho", from_kuid(&init_user_ns, context->ipc.uid), from_kgid(&init_user_ns, context->ipc.gid), context->ipc.mode); - if (osid) { + if (lsmprop_is_set(&context->ipc.oprop)) { char *ctx = NULL; u32 len; - if (security_secid_to_secctx(osid, &ctx, &len)) { - audit_log_format(ab, " osid=%u", osid); + if (security_lsmprop_to_secctx(&context->ipc.oprop, + &ctx, &len)) { *call_panic = 1; } else { audit_log_format(ab, " obj=%s", ctx); @@ -1426,7 +1422,7 @@ static void show_special(struct audit_context *context, int *call_panic) context->ipc.perm_gid, context->ipc.perm_mode); } - break; } + break; case AUDIT_MQ_OPEN: audit_log_format(ab, "oflag=0x%x mode=%#ho mq_flags=0x%lx mq_maxmsg=%ld " @@ -2642,7 +2638,8 @@ void __audit_ipc_obj(struct kern_ipc_perm *ipcp) context->ipc.gid = ipcp->gid; context->ipc.mode = ipcp->mode; context->ipc.has_perm = 0; - security_ipc_getsecid(ipcp, &context->ipc.osid); + /* scaffolding */ + security_ipc_getsecid(ipcp, &context->ipc.oprop.scaffold.secid); context->type = AUDIT_IPC; } -- cgit v1.2.3 From f4602f163c98bc93c118e196466c1c98186adb67 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Wed, 9 Oct 2024 10:32:13 -0700 Subject: lsm: use lsm_prop in security_ipc_getsecid There may be more than one LSM that provides IPC data for auditing. Change security_ipc_getsecid() to fill in a lsm_prop structure instead of the u32 secid. Change the name to security_ipc_getlsmprop() to reflect the change. Cc: audit@vger.kernel.org Cc: linux-security-module@vger.kernel.org Cc: selinux@vger.kernel.org Signed-off-by: Casey Schaufler [PM: subject line tweak] Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 4 ++-- include/linux/security.h | 18 +++++++++++++++--- kernel/auditsc.c | 3 +-- security/security.c | 14 +++++++------- security/selinux/hooks.c | 9 ++++++--- security/smack/smack_lsm.c | 16 +++++++++------- 6 files changed, 40 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index ed6ea0b1ec57..6ef2a345ea03 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -256,8 +256,8 @@ LSM_HOOK(void, LSM_RET_VOID, task_to_inode, struct task_struct *p, struct inode *inode) LSM_HOOK(int, 0, userns_create, const struct cred *cred) LSM_HOOK(int, 0, ipc_permission, struct kern_ipc_perm *ipcp, short flag) -LSM_HOOK(void, LSM_RET_VOID, ipc_getsecid, struct kern_ipc_perm *ipcp, - u32 *secid) +LSM_HOOK(void, LSM_RET_VOID, ipc_getlsmprop, struct kern_ipc_perm *ipcp, + struct lsm_prop *prop) LSM_HOOK(int, 0, msg_msg_alloc_security, struct msg_msg *msg) LSM_HOOK(void, LSM_RET_VOID, msg_msg_free_security, struct msg_msg *msg) LSM_HOOK(int, 0, msg_queue_alloc_security, struct kern_ipc_perm *perm) diff --git a/include/linux/security.h b/include/linux/security.h index c029bfe2c5bb..15aef5f68e77 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -289,6 +289,17 @@ static inline const char *kernel_load_data_id_str(enum kernel_load_data_id id) return kernel_load_data_str[id]; } +/** + * lsmprop_init - initialize a lsm_prop structure + * @prop: Pointer to the data to initialize + * + * Set all secid for all modules to the specified value. + */ +static inline void lsmprop_init(struct lsm_prop *prop) +{ + memset(prop, 0, sizeof(*prop)); +} + #ifdef CONFIG_SECURITY /** @@ -515,7 +526,7 @@ int security_task_prctl(int option, unsigned long arg2, unsigned long arg3, void security_task_to_inode(struct task_struct *p, struct inode *inode); int security_create_user_ns(const struct cred *cred); int security_ipc_permission(struct kern_ipc_perm *ipcp, short flag); -void security_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid); +void security_ipc_getlsmprop(struct kern_ipc_perm *ipcp, struct lsm_prop *prop); int security_msg_msg_alloc(struct msg_msg *msg); void security_msg_msg_free(struct msg_msg *msg); int security_msg_queue_alloc(struct kern_ipc_perm *msq); @@ -1377,9 +1388,10 @@ static inline int security_ipc_permission(struct kern_ipc_perm *ipcp, return 0; } -static inline void security_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid) +static inline void security_ipc_getlsmprop(struct kern_ipc_perm *ipcp, + struct lsm_prop *prop) { - *secid = 0; + lsmprop_init(prop); } static inline int security_msg_msg_alloc(struct msg_msg *msg) diff --git a/kernel/auditsc.c b/kernel/auditsc.c index d92326215f65..5019eb32a97f 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -2638,8 +2638,7 @@ void __audit_ipc_obj(struct kern_ipc_perm *ipcp) context->ipc.gid = ipcp->gid; context->ipc.mode = ipcp->mode; context->ipc.has_perm = 0; - /* scaffolding */ - security_ipc_getsecid(ipcp, &context->ipc.oprop.scaffold.secid); + security_ipc_getlsmprop(ipcp, &context->ipc.oprop); context->type = AUDIT_IPC; } diff --git a/security/security.c b/security/security.c index 1842f1325e77..f269421c2d72 100644 --- a/security/security.c +++ b/security/security.c @@ -3730,17 +3730,17 @@ int security_ipc_permission(struct kern_ipc_perm *ipcp, short flag) } /** - * security_ipc_getsecid() - Get the sysv ipc object's secid + * security_ipc_getlsmprop() - Get the sysv ipc object LSM data * @ipcp: ipc permission structure - * @secid: secid pointer + * @prop: pointer to lsm information * - * Get the secid associated with the ipc object. In case of failure, @secid - * will be set to zero. + * Get the lsm information associated with the ipc object. */ -void security_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid) + +void security_ipc_getlsmprop(struct kern_ipc_perm *ipcp, struct lsm_prop *prop) { - *secid = 0; - call_void_hook(ipc_getsecid, ipcp, secid); + lsmprop_init(prop); + call_void_hook(ipc_getlsmprop, ipcp, prop); } /** diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 6e88faf3c6e5..1d43367009ed 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -6319,10 +6319,13 @@ static int selinux_ipc_permission(struct kern_ipc_perm *ipcp, short flag) return ipc_has_perm(ipcp, av); } -static void selinux_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid) +static void selinux_ipc_getlsmprop(struct kern_ipc_perm *ipcp, + struct lsm_prop *prop) { struct ipc_security_struct *isec = selinux_ipc(ipcp); - *secid = isec->sid; + prop->selinux.secid = isec->sid; + /* scaffolding */ + prop->scaffold.secid = isec->sid; } static void selinux_d_instantiate(struct dentry *dentry, struct inode *inode) @@ -7215,7 +7218,7 @@ static struct security_hook_list selinux_hooks[] __ro_after_init = { LSM_HOOK_INIT(userns_create, selinux_userns_create), LSM_HOOK_INIT(ipc_permission, selinux_ipc_permission), - LSM_HOOK_INIT(ipc_getsecid, selinux_ipc_getsecid), + LSM_HOOK_INIT(ipc_getlsmprop, selinux_ipc_getlsmprop), LSM_HOOK_INIT(msg_queue_associate, selinux_msg_queue_associate), LSM_HOOK_INIT(msg_queue_msgctl, selinux_msg_queue_msgctl), diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 66da7cbcc0b7..fed44b4fc73d 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -3435,16 +3435,18 @@ static int smack_ipc_permission(struct kern_ipc_perm *ipp, short flag) } /** - * smack_ipc_getsecid - Extract smack security id + * smack_ipc_getlsmprop - Extract smack security data * @ipp: the object permissions - * @secid: where result will be saved + * @prop: where result will be saved */ -static void smack_ipc_getsecid(struct kern_ipc_perm *ipp, u32 *secid) +static void smack_ipc_getlsmprop(struct kern_ipc_perm *ipp, struct lsm_prop *prop) { - struct smack_known **blob = smack_ipc(ipp); - struct smack_known *iskp = *blob; + struct smack_known **iskpp = smack_ipc(ipp); + struct smack_known *iskp = *iskpp; - *secid = iskp->smk_secid; + prop->smack.skp = iskp; + /* scaffolding */ + prop->scaffold.secid = iskp->smk_secid; } /** @@ -5140,7 +5142,7 @@ static struct security_hook_list smack_hooks[] __ro_after_init = { LSM_HOOK_INIT(task_to_inode, smack_task_to_inode), LSM_HOOK_INIT(ipc_permission, smack_ipc_permission), - LSM_HOOK_INIT(ipc_getsecid, smack_ipc_getsecid), + LSM_HOOK_INIT(ipc_getlsmprop, smack_ipc_getlsmprop), LSM_HOOK_INIT(msg_msg_alloc_security, smack_msg_msg_alloc_security), -- cgit v1.2.3 From 37f670aacd481128ad9a940ac2d3372aecd92824 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Wed, 9 Oct 2024 10:32:15 -0700 Subject: lsm: use lsm_prop in security_current_getsecid Change the security_current_getsecid_subj() and security_task_getsecid_obj() interfaces to fill in a lsm_prop structure instead of a u32 secid. Audit interfaces will need to collect all possible security data for possible reporting. Cc: linux-integrity@vger.kernel.org Cc: audit@vger.kernel.org Cc: selinux@vger.kernel.org Signed-off-by: Casey Schaufler [PM: subject line tweak] Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 6 ++-- include/linux/security.h | 13 ++++---- kernel/audit.c | 11 +++---- kernel/auditfilter.c | 3 +- kernel/auditsc.c | 22 ++++++++----- net/netlabel/netlabel_unlabeled.c | 5 ++- net/netlabel/netlabel_user.h | 6 +++- security/apparmor/lsm.c | 20 ++++++++---- security/integrity/ima/ima.h | 6 ++-- security/integrity/ima/ima_api.c | 6 ++-- security/integrity/ima/ima_appraise.c | 6 ++-- security/integrity/ima/ima_main.c | 59 ++++++++++++++++++----------------- security/integrity/ima/ima_policy.c | 14 ++++----- security/security.c | 28 ++++++++--------- security/selinux/hooks.c | 17 ++++++---- security/smack/smack_lsm.c | 25 +++++++++------ 16 files changed, 139 insertions(+), 108 deletions(-) (limited to 'include') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 6ef2a345ea03..8a90fd9ff3c8 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -235,9 +235,9 @@ LSM_HOOK(int, 0, task_fix_setgroups, struct cred *new, const struct cred * old) LSM_HOOK(int, 0, task_setpgid, struct task_struct *p, pid_t pgid) LSM_HOOK(int, 0, task_getpgid, struct task_struct *p) LSM_HOOK(int, 0, task_getsid, struct task_struct *p) -LSM_HOOK(void, LSM_RET_VOID, current_getsecid_subj, u32 *secid) -LSM_HOOK(void, LSM_RET_VOID, task_getsecid_obj, - struct task_struct *p, u32 *secid) +LSM_HOOK(void, LSM_RET_VOID, current_getlsmprop_subj, struct lsm_prop *prop) +LSM_HOOK(void, LSM_RET_VOID, task_getlsmprop_obj, + struct task_struct *p, struct lsm_prop *prop) LSM_HOOK(int, 0, task_setnice, struct task_struct *p, int nice) LSM_HOOK(int, 0, task_setioprio, struct task_struct *p, int ioprio) LSM_HOOK(int, 0, task_getioprio, struct task_struct *p) diff --git a/include/linux/security.h b/include/linux/security.h index 15aef5f68e77..9bc8153f4e8b 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -507,8 +507,8 @@ int security_task_fix_setgroups(struct cred *new, const struct cred *old); int security_task_setpgid(struct task_struct *p, pid_t pgid); int security_task_getpgid(struct task_struct *p); int security_task_getsid(struct task_struct *p); -void security_current_getsecid_subj(u32 *secid); -void security_task_getsecid_obj(struct task_struct *p, u32 *secid); +void security_current_getlsmprop_subj(struct lsm_prop *prop); +void security_task_getlsmprop_obj(struct task_struct *p, struct lsm_prop *prop); int security_task_setnice(struct task_struct *p, int nice); int security_task_setioprio(struct task_struct *p, int ioprio); int security_task_getioprio(struct task_struct *p); @@ -1305,14 +1305,15 @@ static inline int security_task_getsid(struct task_struct *p) return 0; } -static inline void security_current_getsecid_subj(u32 *secid) +static inline void security_current_getlsmprop_subj(struct lsm_prop *prop) { - *secid = 0; + lsmprop_init(prop); } -static inline void security_task_getsecid_obj(struct task_struct *p, u32 *secid) +static inline void security_task_getlsmprop_obj(struct task_struct *p, + struct lsm_prop *prop) { - *secid = 0; + lsmprop_init(prop); } static inline int security_task_setnice(struct task_struct *p, int nice) diff --git a/kernel/audit.c b/kernel/audit.c index 47c41e6f9ea9..d2797e8fe182 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -2179,16 +2179,16 @@ void audit_log_key(struct audit_buffer *ab, char *key) int audit_log_task_context(struct audit_buffer *ab) { + struct lsm_prop prop; char *ctx = NULL; unsigned len; int error; - u32 sid; - security_current_getsecid_subj(&sid); - if (!sid) + security_current_getlsmprop_subj(&prop); + if (!lsmprop_is_set(&prop)) return 0; - error = security_secid_to_secctx(sid, &ctx, &len); + error = security_lsmprop_to_secctx(&prop, &ctx, &len); if (error) { if (error != -EINVAL) goto error_path; @@ -2405,8 +2405,7 @@ int audit_signal_info(int sig, struct task_struct *t) audit_sig_uid = auid; else audit_sig_uid = uid; - /* scaffolding */ - security_current_getsecid_subj(&audit_sig_lsm.scaffold.secid); + security_current_getlsmprop_subj(&audit_sig_lsm); } return audit_signal_info_syscall(t); diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 288a2092fd0d..a7de3dabe6e1 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -1371,8 +1371,7 @@ int audit_filter(int msgtype, unsigned int listtype) case AUDIT_SUBJ_CLR: if (f->lsm_rule) { /* scaffolding */ - security_current_getsecid_subj( - &prop.scaffold.secid); + security_current_getlsmprop_subj(&prop); result = security_audit_rule_match( &prop, f->type, f->op, f->lsm_rule); diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 5019eb32a97f..6b2b2a8d5647 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -470,7 +470,6 @@ static int audit_filter_rules(struct task_struct *tsk, { const struct cred *cred; int i, need_sid = 1; - u32 sid; struct lsm_prop prop = { }; unsigned int sessionid; @@ -675,15 +674,14 @@ static int audit_filter_rules(struct task_struct *tsk, * fork()/copy_process() in which case * the new @tsk creds are still a dup * of @current's creds so we can still - * use security_current_getsecid_subj() + * use + * security_current_getlsmprop_subj() * here even though it always refs * @current's creds */ - security_current_getsecid_subj(&sid); + security_current_getlsmprop_subj(&prop); need_sid = 0; } - /* scaffolding */ - prop.scaffold.secid = sid; result = security_audit_rule_match(&prop, f->type, f->op, @@ -2730,12 +2728,15 @@ int __audit_sockaddr(int len, void *a) void __audit_ptrace(struct task_struct *t) { struct audit_context *context = audit_context(); + struct lsm_prop prop; context->target_pid = task_tgid_nr(t); context->target_auid = audit_get_loginuid(t); context->target_uid = task_uid(t); context->target_sessionid = audit_get_sessionid(t); - security_task_getsecid_obj(t, &context->target_sid); + security_task_getlsmprop_obj(t, &prop); + /* scaffolding */ + context->target_sid = prop.scaffold.secid; memcpy(context->target_comm, t->comm, TASK_COMM_LEN); } @@ -2751,6 +2752,7 @@ int audit_signal_info_syscall(struct task_struct *t) struct audit_aux_data_pids *axp; struct audit_context *ctx = audit_context(); kuid_t t_uid = task_uid(t); + struct lsm_prop prop; if (!audit_signals || audit_dummy_context()) return 0; @@ -2762,7 +2764,9 @@ int audit_signal_info_syscall(struct task_struct *t) ctx->target_auid = audit_get_loginuid(t); ctx->target_uid = t_uid; ctx->target_sessionid = audit_get_sessionid(t); - security_task_getsecid_obj(t, &ctx->target_sid); + security_task_getlsmprop_obj(t, &prop); + /* scaffolding */ + ctx->target_sid = prop.scaffold.secid; memcpy(ctx->target_comm, t->comm, TASK_COMM_LEN); return 0; } @@ -2783,7 +2787,9 @@ int audit_signal_info_syscall(struct task_struct *t) axp->target_auid[axp->pid_count] = audit_get_loginuid(t); axp->target_uid[axp->pid_count] = t_uid; axp->target_sessionid[axp->pid_count] = audit_get_sessionid(t); - security_task_getsecid_obj(t, &axp->target_sid[axp->pid_count]); + security_task_getlsmprop_obj(t, &prop); + /* scaffolding */ + axp->target_sid[axp->pid_count] = prop.scaffold.secid; memcpy(axp->target_comm[axp->pid_count], t->comm, TASK_COMM_LEN); axp->pid_count++; diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 9996883bf2b7..5925f48a3ade 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1534,11 +1534,14 @@ int __init netlbl_unlabel_defconf(void) int ret_val; struct netlbl_dom_map *entry; struct netlbl_audit audit_info; + struct lsm_prop prop; /* Only the kernel is allowed to call this function and the only time * it is called is at bootup before the audit subsystem is reporting * messages so don't worry to much about these values. */ - security_current_getsecid_subj(&audit_info.secid); + security_current_getlsmprop_subj(&prop); + /* scaffolding */ + audit_info.secid = prop.scaffold.secid; audit_info.loginuid = GLOBAL_ROOT_UID; audit_info.sessionid = 0; diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h index d6c5b31eb4eb..39f4f6df5f51 100644 --- a/net/netlabel/netlabel_user.h +++ b/net/netlabel/netlabel_user.h @@ -32,7 +32,11 @@ */ static inline void netlbl_netlink_auditinfo(struct netlbl_audit *audit_info) { - security_current_getsecid_subj(&audit_info->secid); + struct lsm_prop prop; + + security_current_getlsmprop_subj(&prop); + /* scaffolding */ + audit_info->secid = prop.scaffold.secid; audit_info->loginuid = audit_get_loginuid(current); audit_info->sessionid = audit_get_sessionid(current); } diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index a58b72ed246c..6331bcb35ec0 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -982,17 +982,24 @@ static void apparmor_bprm_committed_creds(const struct linux_binprm *bprm) return; } -static void apparmor_current_getsecid_subj(u32 *secid) +static void apparmor_current_getlsmprop_subj(struct lsm_prop *prop) { struct aa_label *label = __begin_current_label_crit_section(); - *secid = label->secid; + + prop->apparmor.label = label; + /* scaffolding */ + prop->scaffold.secid = label->secid; __end_current_label_crit_section(label); } -static void apparmor_task_getsecid_obj(struct task_struct *p, u32 *secid) +static void apparmor_task_getlsmprop_obj(struct task_struct *p, + struct lsm_prop *prop) { struct aa_label *label = aa_get_task_label(p); - *secid = label->secid; + + prop->apparmor.label = label; + /* scaffolding */ + prop->scaffold.secid = label->secid; aa_put_label(label); } @@ -1503,8 +1510,9 @@ static struct security_hook_list apparmor_hooks[] __ro_after_init = { LSM_HOOK_INIT(task_free, apparmor_task_free), LSM_HOOK_INIT(task_alloc, apparmor_task_alloc), - LSM_HOOK_INIT(current_getsecid_subj, apparmor_current_getsecid_subj), - LSM_HOOK_INIT(task_getsecid_obj, apparmor_task_getsecid_obj), + LSM_HOOK_INIT(current_getlsmprop_subj, + apparmor_current_getlsmprop_subj), + LSM_HOOK_INIT(task_getlsmprop_obj, apparmor_task_getlsmprop_obj), LSM_HOOK_INIT(task_setrlimit, apparmor_task_setrlimit), LSM_HOOK_INIT(task_kill, apparmor_task_kill), LSM_HOOK_INIT(userns_create, apparmor_userns_create), diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h index cdfe8c8c7bac..c0d3b716d11f 100644 --- a/security/integrity/ima/ima.h +++ b/security/integrity/ima/ima.h @@ -369,7 +369,7 @@ static inline void ima_process_queued_keys(void) {} /* LIM API function definitions */ int ima_get_action(struct mnt_idmap *idmap, struct inode *inode, - const struct cred *cred, u32 secid, int mask, + const struct cred *cred, struct lsm_prop *prop, int mask, enum ima_hooks func, int *pcr, struct ima_template_desc **template_desc, const char *func_data, unsigned int *allowed_algos); @@ -400,8 +400,8 @@ const char *ima_d_path(const struct path *path, char **pathbuf, char *filename); /* IMA policy related functions */ int ima_match_policy(struct mnt_idmap *idmap, struct inode *inode, - const struct cred *cred, u32 secid, enum ima_hooks func, - int mask, int flags, int *pcr, + const struct cred *cred, struct lsm_prop *prop, + enum ima_hooks func, int mask, int flags, int *pcr, struct ima_template_desc **template_desc, const char *func_data, unsigned int *allowed_algos); void ima_init_policy(void); diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c index 984e861f6e33..c35ea613c9f8 100644 --- a/security/integrity/ima/ima_api.c +++ b/security/integrity/ima/ima_api.c @@ -165,7 +165,7 @@ err_out: * @idmap: idmap of the mount the inode was found from * @inode: pointer to the inode associated with the object being validated * @cred: pointer to credentials structure to validate - * @secid: secid of the task being validated + * @prop: properties of the task being validated * @mask: contains the permission mask (MAY_READ, MAY_WRITE, MAY_EXEC, * MAY_APPEND) * @func: caller identifier @@ -187,7 +187,7 @@ err_out: * */ int ima_get_action(struct mnt_idmap *idmap, struct inode *inode, - const struct cred *cred, u32 secid, int mask, + const struct cred *cred, struct lsm_prop *prop, int mask, enum ima_hooks func, int *pcr, struct ima_template_desc **template_desc, const char *func_data, unsigned int *allowed_algos) @@ -196,7 +196,7 @@ int ima_get_action(struct mnt_idmap *idmap, struct inode *inode, flags &= ima_policy_flag; - return ima_match_policy(idmap, inode, cred, secid, func, mask, + return ima_match_policy(idmap, inode, cred, prop, func, mask, flags, pcr, template_desc, func_data, allowed_algos); } diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c index 656c709b974f..884a3533f7af 100644 --- a/security/integrity/ima/ima_appraise.c +++ b/security/integrity/ima/ima_appraise.c @@ -73,13 +73,13 @@ bool is_ima_appraise_enabled(void) int ima_must_appraise(struct mnt_idmap *idmap, struct inode *inode, int mask, enum ima_hooks func) { - u32 secid; + struct lsm_prop prop; if (!ima_appraise) return 0; - security_current_getsecid_subj(&secid); - return ima_match_policy(idmap, inode, current_cred(), secid, + security_current_getlsmprop_subj(&prop); + return ima_match_policy(idmap, inode, current_cred(), &prop, func, mask, IMA_APPRAISE | IMA_HASH, NULL, NULL, NULL, NULL); } diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index 06132cf47016..553a6f54a1e2 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -206,8 +206,8 @@ static void ima_file_free(struct file *file) } static int process_measurement(struct file *file, const struct cred *cred, - u32 secid, char *buf, loff_t size, int mask, - enum ima_hooks func) + struct lsm_prop *prop, char *buf, loff_t size, + int mask, enum ima_hooks func) { struct inode *real_inode, *inode = file_inode(file); struct ima_iint_cache *iint = NULL; @@ -232,7 +232,7 @@ static int process_measurement(struct file *file, const struct cred *cred, * bitmask based on the appraise/audit/measurement policy. * Included is the appraise submask. */ - action = ima_get_action(file_mnt_idmap(file), inode, cred, secid, + action = ima_get_action(file_mnt_idmap(file), inode, cred, prop, mask, func, &pcr, &template_desc, NULL, &allowed_algos); violation_check = ((func == FILE_CHECK || func == MMAP_CHECK || @@ -443,23 +443,23 @@ out: static int ima_file_mmap(struct file *file, unsigned long reqprot, unsigned long prot, unsigned long flags) { - u32 secid; + struct lsm_prop prop; int ret; if (!file) return 0; - security_current_getsecid_subj(&secid); + security_current_getlsmprop_subj(&prop); if (reqprot & PROT_EXEC) { - ret = process_measurement(file, current_cred(), secid, NULL, + ret = process_measurement(file, current_cred(), &prop, NULL, 0, MAY_EXEC, MMAP_CHECK_REQPROT); if (ret) return ret; } if (prot & PROT_EXEC) - return process_measurement(file, current_cred(), secid, NULL, + return process_measurement(file, current_cred(), &prop, NULL, 0, MAY_EXEC, MMAP_CHECK); return 0; @@ -488,9 +488,9 @@ static int ima_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, char *pathbuf = NULL; const char *pathname = NULL; struct inode *inode; + struct lsm_prop prop; int result = 0; int action; - u32 secid; int pcr; /* Is mprotect making an mmap'ed file executable? */ @@ -498,13 +498,13 @@ static int ima_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, !(prot & PROT_EXEC) || (vma->vm_flags & VM_EXEC)) return 0; - security_current_getsecid_subj(&secid); + security_current_getlsmprop_subj(&prop); inode = file_inode(vma->vm_file); action = ima_get_action(file_mnt_idmap(vma->vm_file), inode, - current_cred(), secid, MAY_EXEC, MMAP_CHECK, + current_cred(), &prop, MAY_EXEC, MMAP_CHECK, &pcr, &template, NULL, NULL); action |= ima_get_action(file_mnt_idmap(vma->vm_file), inode, - current_cred(), secid, MAY_EXEC, + current_cred(), &prop, MAY_EXEC, MMAP_CHECK_REQPROT, &pcr, &template, NULL, NULL); @@ -542,15 +542,18 @@ static int ima_bprm_check(struct linux_binprm *bprm) { int ret; u32 secid; + struct lsm_prop prop = { }; - security_current_getsecid_subj(&secid); - ret = process_measurement(bprm->file, current_cred(), secid, NULL, 0, - MAY_EXEC, BPRM_CHECK); + security_current_getlsmprop_subj(&prop); + ret = process_measurement(bprm->file, current_cred(), + &prop, NULL, 0, MAY_EXEC, BPRM_CHECK); if (ret) return ret; security_cred_getsecid(bprm->cred, &secid); - return process_measurement(bprm->file, bprm->cred, secid, NULL, 0, + /* scaffolding */ + prop.scaffold.secid = secid; + return process_measurement(bprm->file, bprm->cred, &prop, NULL, 0, MAY_EXEC, CREDS_CHECK); } @@ -566,10 +569,10 @@ static int ima_bprm_check(struct linux_binprm *bprm) */ static int ima_file_check(struct file *file, int mask) { - u32 secid; + struct lsm_prop prop; - security_current_getsecid_subj(&secid); - return process_measurement(file, current_cred(), secid, NULL, 0, + security_current_getlsmprop_subj(&prop); + return process_measurement(file, current_cred(), &prop, NULL, 0, mask & (MAY_READ | MAY_WRITE | MAY_EXEC | MAY_APPEND), FILE_CHECK); } @@ -768,7 +771,7 @@ static int ima_read_file(struct file *file, enum kernel_read_file_id read_id, bool contents) { enum ima_hooks func; - u32 secid; + struct lsm_prop prop; /* * Do devices using pre-allocated memory run the risk of the @@ -788,9 +791,9 @@ static int ima_read_file(struct file *file, enum kernel_read_file_id read_id, /* Read entire file for all partial reads. */ func = read_idmap[read_id] ?: FILE_CHECK; - security_current_getsecid_subj(&secid); - return process_measurement(file, current_cred(), secid, NULL, - 0, MAY_READ, func); + security_current_getlsmprop_subj(&prop); + return process_measurement(file, current_cred(), &prop, NULL, 0, + MAY_READ, func); } const int read_idmap[READING_MAX_ID] = { @@ -818,7 +821,7 @@ static int ima_post_read_file(struct file *file, char *buf, loff_t size, enum kernel_read_file_id read_id) { enum ima_hooks func; - u32 secid; + struct lsm_prop prop; /* permit signed certs */ if (!file && read_id == READING_X509_CERTIFICATE) @@ -831,8 +834,8 @@ static int ima_post_read_file(struct file *file, char *buf, loff_t size, } func = read_idmap[read_id] ?: FILE_CHECK; - security_current_getsecid_subj(&secid); - return process_measurement(file, current_cred(), secid, buf, size, + security_current_getlsmprop_subj(&prop); + return process_measurement(file, current_cred(), &prop, buf, size, MAY_READ, func); } @@ -967,7 +970,7 @@ int process_buffer_measurement(struct mnt_idmap *idmap, int digest_hash_len = hash_digest_size[ima_hash_algo]; int violation = 0; int action = 0; - u32 secid; + struct lsm_prop prop; if (digest && digest_len < digest_hash_len) return -EINVAL; @@ -990,9 +993,9 @@ int process_buffer_measurement(struct mnt_idmap *idmap, * buffer measurements. */ if (func) { - security_current_getsecid_subj(&secid); + security_current_getlsmprop_subj(&prop); action = ima_get_action(idmap, inode, current_cred(), - secid, 0, func, &pcr, &template, + &prop, 0, func, &pcr, &template, func_data, NULL); if (!(action & IMA_MEASURE) && !digest) return -ENOENT; diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index 22a62e675ebc..a96dc3ff6aa0 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -557,7 +557,7 @@ static bool ima_match_rule_data(struct ima_rule_entry *rule, * @idmap: idmap of the mount the inode was found from * @inode: a pointer to an inode * @cred: a pointer to a credentials structure for user validation - * @secid: the secid of the task to be validated + * @prop: LSM properties of the task to be validated * @func: LIM hook identifier * @mask: requested action (MAY_READ | MAY_WRITE | MAY_APPEND | MAY_EXEC) * @func_data: func specific data, may be NULL @@ -567,7 +567,7 @@ static bool ima_match_rule_data(struct ima_rule_entry *rule, static bool ima_match_rules(struct ima_rule_entry *rule, struct mnt_idmap *idmap, struct inode *inode, const struct cred *cred, - u32 secid, enum ima_hooks func, int mask, + struct lsm_prop *prop, enum ima_hooks func, int mask, const char *func_data) { int i; @@ -658,8 +658,6 @@ retry: case LSM_SUBJ_USER: case LSM_SUBJ_ROLE: case LSM_SUBJ_TYPE: - /* scaffolding */ - prop.scaffold.secid = secid; rc = ima_filter_rule_match(&prop, lsm_rule->lsm[i].type, Audit_equal, lsm_rule->lsm[i].rule); @@ -723,7 +721,7 @@ static int get_subaction(struct ima_rule_entry *rule, enum ima_hooks func) * @inode: pointer to an inode for which the policy decision is being made * @cred: pointer to a credentials structure for which the policy decision is * being made - * @secid: LSM secid of the task to be validated + * @prop: LSM properties of the task to be validated * @func: IMA hook identifier * @mask: requested action (MAY_READ | MAY_WRITE | MAY_APPEND | MAY_EXEC) * @flags: IMA actions to consider (e.g. IMA_MEASURE | IMA_APPRAISE) @@ -740,8 +738,8 @@ static int get_subaction(struct ima_rule_entry *rule, enum ima_hooks func) * than writes so ima_match_policy() is classical RCU candidate. */ int ima_match_policy(struct mnt_idmap *idmap, struct inode *inode, - const struct cred *cred, u32 secid, enum ima_hooks func, - int mask, int flags, int *pcr, + const struct cred *cred, struct lsm_prop *prop, + enum ima_hooks func, int mask, int flags, int *pcr, struct ima_template_desc **template_desc, const char *func_data, unsigned int *allowed_algos) { @@ -759,7 +757,7 @@ int ima_match_policy(struct mnt_idmap *idmap, struct inode *inode, if (!(entry->action & actmask)) continue; - if (!ima_match_rules(entry, idmap, inode, cred, secid, + if (!ima_match_rules(entry, idmap, inode, cred, prop, func, mask, func_data)) continue; diff --git a/security/security.c b/security/security.c index f269421c2d72..5cdb5b171ff2 100644 --- a/security/security.c +++ b/security/security.c @@ -3492,33 +3492,33 @@ int security_task_getsid(struct task_struct *p) } /** - * security_current_getsecid_subj() - Get the current task's subjective secid - * @secid: secid value + * security_current_getlsmprop_subj() - Current task's subjective LSM data + * @prop: lsm specific information * * Retrieve the subjective security identifier of the current task and return - * it in @secid. In case of failure, @secid will be set to zero. + * it in @prop. */ -void security_current_getsecid_subj(u32 *secid) +void security_current_getlsmprop_subj(struct lsm_prop *prop) { - *secid = 0; - call_void_hook(current_getsecid_subj, secid); + lsmprop_init(prop); + call_void_hook(current_getlsmprop_subj, prop); } -EXPORT_SYMBOL(security_current_getsecid_subj); +EXPORT_SYMBOL(security_current_getlsmprop_subj); /** - * security_task_getsecid_obj() - Get a task's objective secid + * security_task_getlsmprop_obj() - Get a task's objective LSM data * @p: target task - * @secid: secid value + * @prop: lsm specific information * * Retrieve the objective security identifier of the task_struct in @p and - * return it in @secid. In case of failure, @secid will be set to zero. + * return it in @prop. */ -void security_task_getsecid_obj(struct task_struct *p, u32 *secid) +void security_task_getlsmprop_obj(struct task_struct *p, struct lsm_prop *prop) { - *secid = 0; - call_void_hook(task_getsecid_obj, p, secid); + lsmprop_init(prop); + call_void_hook(task_getlsmprop_obj, p, prop); } -EXPORT_SYMBOL(security_task_getsecid_obj); +EXPORT_SYMBOL(security_task_getlsmprop_obj); /** * security_task_setnice() - Check if setting a task's nice value is allowed diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 1d43367009ed..7d6ffd3483a8 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -4169,14 +4169,19 @@ static int selinux_task_getsid(struct task_struct *p) PROCESS__GETSESSION, NULL); } -static void selinux_current_getsecid_subj(u32 *secid) +static void selinux_current_getlsmprop_subj(struct lsm_prop *prop) { - *secid = current_sid(); + prop->selinux.secid = current_sid(); + /* scaffolding */ + prop->scaffold.secid = prop->selinux.secid; } -static void selinux_task_getsecid_obj(struct task_struct *p, u32 *secid) +static void selinux_task_getlsmprop_obj(struct task_struct *p, + struct lsm_prop *prop) { - *secid = task_sid_obj(p); + prop->selinux.secid = task_sid_obj(p); + /* scaffolding */ + prop->scaffold.secid = prop->selinux.secid; } static int selinux_task_setnice(struct task_struct *p, int nice) @@ -7203,8 +7208,8 @@ static struct security_hook_list selinux_hooks[] __ro_after_init = { LSM_HOOK_INIT(task_setpgid, selinux_task_setpgid), LSM_HOOK_INIT(task_getpgid, selinux_task_getpgid), LSM_HOOK_INIT(task_getsid, selinux_task_getsid), - LSM_HOOK_INIT(current_getsecid_subj, selinux_current_getsecid_subj), - LSM_HOOK_INIT(task_getsecid_obj, selinux_task_getsecid_obj), + LSM_HOOK_INIT(current_getlsmprop_subj, selinux_current_getlsmprop_subj), + LSM_HOOK_INIT(task_getlsmprop_obj, selinux_task_getlsmprop_obj), LSM_HOOK_INIT(task_setnice, selinux_task_setnice), LSM_HOOK_INIT(task_setioprio, selinux_task_setioprio), LSM_HOOK_INIT(task_getioprio, selinux_task_getioprio), diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index fed44b4fc73d..e74e22c4232f 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -2239,30 +2239,35 @@ static int smack_task_getsid(struct task_struct *p) } /** - * smack_current_getsecid_subj - get the subjective secid of the current task - * @secid: where to put the result + * smack_current_getlsmprop_subj - get the subjective secid of the current task + * @prop: where to put the result * * Sets the secid to contain a u32 version of the task's subjective smack label. */ -static void smack_current_getsecid_subj(u32 *secid) +static void smack_current_getlsmprop_subj(struct lsm_prop *prop) { struct smack_known *skp = smk_of_current(); - *secid = skp->smk_secid; + prop->smack.skp = skp; + /* scaffolding */ + prop->scaffold.secid = skp->smk_secid; } /** - * smack_task_getsecid_obj - get the objective secid of the task + * smack_task_getlsmprop_obj - get the objective data of the task * @p: the task - * @secid: where to put the result + * @prop: where to put the result * * Sets the secid to contain a u32 version of the task's objective smack label. */ -static void smack_task_getsecid_obj(struct task_struct *p, u32 *secid) +static void smack_task_getlsmprop_obj(struct task_struct *p, + struct lsm_prop *prop) { struct smack_known *skp = smk_of_task_struct_obj(p); - *secid = skp->smk_secid; + prop->smack.skp = skp; + /* scaffolding */ + prop->scaffold.secid = skp->smk_secid; } /** @@ -5130,8 +5135,8 @@ static struct security_hook_list smack_hooks[] __ro_after_init = { LSM_HOOK_INIT(task_setpgid, smack_task_setpgid), LSM_HOOK_INIT(task_getpgid, smack_task_getpgid), LSM_HOOK_INIT(task_getsid, smack_task_getsid), - LSM_HOOK_INIT(current_getsecid_subj, smack_current_getsecid_subj), - LSM_HOOK_INIT(task_getsecid_obj, smack_task_getsecid_obj), + LSM_HOOK_INIT(current_getlsmprop_subj, smack_current_getlsmprop_subj), + LSM_HOOK_INIT(task_getlsmprop_obj, smack_task_getlsmprop_obj), LSM_HOOK_INIT(task_setnice, smack_task_setnice), LSM_HOOK_INIT(task_setioprio, smack_task_setioprio), LSM_HOOK_INIT(task_getioprio, smack_task_getioprio), -- cgit v1.2.3 From 07f9d2c1132c9b838538b606dfcdab2506cd2ae4 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Wed, 9 Oct 2024 10:32:16 -0700 Subject: lsm: use lsm_prop in security_inode_getsecid Change the security_inode_getsecid() interface to fill in a lsm_prop structure instead of a u32 secid. This allows for its callers to gather data from all registered LSMs. Data is provided for IMA and audit. Change the name to security_inode_getlsmprop(). Cc: linux-integrity@vger.kernel.org Cc: selinux@vger.kernel.org Signed-off-by: Casey Schaufler [PM: subj line tweak] Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 3 ++- include/linux/security.h | 7 ++++--- kernel/auditsc.c | 6 +++++- security/integrity/ima/ima_policy.c | 3 +-- security/security.c | 11 +++++------ security/selinux/hooks.c | 15 +++++++++------ security/smack/smack_lsm.c | 12 +++++++----- 7 files changed, 33 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 8a90fd9ff3c8..23ad7e4f8c67 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -176,7 +176,8 @@ LSM_HOOK(int, -EOPNOTSUPP, inode_setsecurity, struct inode *inode, const char *name, const void *value, size_t size, int flags) LSM_HOOK(int, 0, inode_listsecurity, struct inode *inode, char *buffer, size_t buffer_size) -LSM_HOOK(void, LSM_RET_VOID, inode_getsecid, struct inode *inode, u32 *secid) +LSM_HOOK(void, LSM_RET_VOID, inode_getlsmprop, struct inode *inode, + struct lsm_prop *prop) LSM_HOOK(int, 0, inode_copy_up, struct dentry *src, struct cred **new) LSM_HOOK(int, -EOPNOTSUPP, inode_copy_up_xattr, struct dentry *src, const char *name) diff --git a/include/linux/security.h b/include/linux/security.h index 9bc8153f4e8b..2b19ef5d799c 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -452,7 +452,7 @@ int security_inode_getsecurity(struct mnt_idmap *idmap, void **buffer, bool alloc); int security_inode_setsecurity(struct inode *inode, const char *name, const void *value, size_t size, int flags); int security_inode_listsecurity(struct inode *inode, char *buffer, size_t buffer_size); -void security_inode_getsecid(struct inode *inode, u32 *secid); +void security_inode_getlsmprop(struct inode *inode, struct lsm_prop *prop); int security_inode_copy_up(struct dentry *src, struct cred **new); int security_inode_copy_up_xattr(struct dentry *src, const char *name); int security_inode_setintegrity(const struct inode *inode, @@ -1076,9 +1076,10 @@ static inline int security_inode_listsecurity(struct inode *inode, char *buffer, return 0; } -static inline void security_inode_getsecid(struct inode *inode, u32 *secid) +static inline void security_inode_getlsmprop(struct inode *inode, + struct lsm_prop *prop) { - *secid = 0; + lsmprop_init(prop); } static inline int security_inode_copy_up(struct dentry *src, struct cred **new) diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 6b2b2a8d5647..372302f0528b 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -2276,13 +2276,17 @@ static void audit_copy_inode(struct audit_names *name, const struct dentry *dentry, struct inode *inode, unsigned int flags) { + struct lsm_prop prop; + name->ino = inode->i_ino; name->dev = inode->i_sb->s_dev; name->mode = inode->i_mode; name->uid = inode->i_uid; name->gid = inode->i_gid; name->rdev = inode->i_rdev; - security_inode_getsecid(inode, &name->osid); + security_inode_getlsmprop(inode, &prop); + /* scaffolding */ + name->osid = prop.scaffold.secid; if (flags & AUDIT_INODE_NOEVAL) { name->fcap_ver = -1; return; diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index a96dc3ff6aa0..dbfd554b4624 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -649,8 +649,7 @@ retry: case LSM_OBJ_USER: case LSM_OBJ_ROLE: case LSM_OBJ_TYPE: - /* scaffolding */ - security_inode_getsecid(inode, &prop.scaffold.secid); + security_inode_getlsmprop(inode, &prop); rc = ima_filter_rule_match(&prop, lsm_rule->lsm[i].type, Audit_equal, lsm_rule->lsm[i].rule); diff --git a/security/security.c b/security/security.c index 5cdb5b171ff2..5e76e35dda09 100644 --- a/security/security.c +++ b/security/security.c @@ -2724,16 +2724,15 @@ int security_inode_listsecurity(struct inode *inode, EXPORT_SYMBOL(security_inode_listsecurity); /** - * security_inode_getsecid() - Get an inode's secid + * security_inode_getlsmprop() - Get an inode's LSM data * @inode: inode - * @secid: secid to return + * @prop: lsm specific information to return * - * Get the secid associated with the node. In case of failure, @secid will be - * set to zero. + * Get the lsm specific information associated with the node. */ -void security_inode_getsecid(struct inode *inode, u32 *secid) +void security_inode_getlsmprop(struct inode *inode, struct lsm_prop *prop) { - call_void_hook(inode_getsecid, inode, secid); + call_void_hook(inode_getlsmprop, inode, prop); } /** diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 7d6ffd3483a8..a9f8d10062b5 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3503,15 +3503,18 @@ static int selinux_inode_listsecurity(struct inode *inode, char *buffer, size_t return len; } -static void selinux_inode_getsecid(struct inode *inode, u32 *secid) +static void selinux_inode_getlsmprop(struct inode *inode, struct lsm_prop *prop) { struct inode_security_struct *isec = inode_security_novalidate(inode); - *secid = isec->sid; + + prop->selinux.secid = isec->sid; + /* scaffolding */ + prop->scaffold.secid = isec->sid; } static int selinux_inode_copy_up(struct dentry *src, struct cred **new) { - u32 sid; + struct lsm_prop prop; struct task_security_struct *tsec; struct cred *new_creds = *new; @@ -3523,8 +3526,8 @@ static int selinux_inode_copy_up(struct dentry *src, struct cred **new) tsec = selinux_cred(new_creds); /* Get label from overlay inode and set it in create_sid */ - selinux_inode_getsecid(d_inode(src), &sid); - tsec->create_sid = sid; + selinux_inode_getlsmprop(d_inode(src), &prop); + tsec->create_sid = prop.selinux.secid; *new = new_creds; return 0; } @@ -7174,7 +7177,7 @@ static struct security_hook_list selinux_hooks[] __ro_after_init = { LSM_HOOK_INIT(inode_getsecurity, selinux_inode_getsecurity), LSM_HOOK_INIT(inode_setsecurity, selinux_inode_setsecurity), LSM_HOOK_INIT(inode_listsecurity, selinux_inode_listsecurity), - LSM_HOOK_INIT(inode_getsecid, selinux_inode_getsecid), + LSM_HOOK_INIT(inode_getlsmprop, selinux_inode_getlsmprop), LSM_HOOK_INIT(inode_copy_up, selinux_inode_copy_up), LSM_HOOK_INIT(inode_copy_up_xattr, selinux_inode_copy_up_xattr), LSM_HOOK_INIT(path_notify, selinux_path_notify), diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index e74e22c4232f..5c3cb2837105 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -1649,15 +1649,17 @@ static int smack_inode_listsecurity(struct inode *inode, char *buffer, } /** - * smack_inode_getsecid - Extract inode's security id + * smack_inode_getlsmprop - Extract inode's security id * @inode: inode to extract the info from - * @secid: where result will be saved + * @prop: where result will be saved */ -static void smack_inode_getsecid(struct inode *inode, u32 *secid) +static void smack_inode_getlsmprop(struct inode *inode, struct lsm_prop *prop) { struct smack_known *skp = smk_of_inode(inode); - *secid = skp->smk_secid; + prop->smack.skp = skp; + /* scaffolding */ + prop->scaffold.secid = skp->smk_secid; } /* @@ -5110,7 +5112,7 @@ static struct security_hook_list smack_hooks[] __ro_after_init = { LSM_HOOK_INIT(inode_getsecurity, smack_inode_getsecurity), LSM_HOOK_INIT(inode_setsecurity, smack_inode_setsecurity), LSM_HOOK_INIT(inode_listsecurity, smack_inode_listsecurity), - LSM_HOOK_INIT(inode_getsecid, smack_inode_getsecid), + LSM_HOOK_INIT(inode_getlsmprop, smack_inode_getlsmprop), LSM_HOOK_INIT(file_alloc_security, smack_file_alloc_security), LSM_HOOK_INIT(file_ioctl, smack_file_ioctl), -- cgit v1.2.3 From b0654ca42998440df42ba2ccc3b7dbe3bf5b7bb5 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Wed, 9 Oct 2024 10:32:18 -0700 Subject: lsm: create new security_cred_getlsmprop LSM hook Create a new LSM hook security_cred_getlsmprop() which, like security_cred_getsecid(), fetches LSM specific attributes from the cred structure. The associated data elements in the audit sub-system are changed from a secid to a lsm_prop to accommodate multiple possible LSM audit users. Cc: linux-integrity@vger.kernel.org Cc: audit@vger.kernel.org Cc: selinux@vger.kernel.org Signed-off-by: Casey Schaufler [PM: subj line tweak] Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 2 ++ include/linux/security.h | 5 +++++ security/integrity/ima/ima_main.c | 7 ++----- security/security.c | 15 +++++++++++++++ security/selinux/hooks.c | 8 ++++++++ security/smack/smack_lsm.c | 18 ++++++++++++++++++ 6 files changed, 50 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 23ad7e4f8c67..eb2937599cb0 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -218,6 +218,8 @@ LSM_HOOK(int, 0, cred_prepare, struct cred *new, const struct cred *old, LSM_HOOK(void, LSM_RET_VOID, cred_transfer, struct cred *new, const struct cred *old) LSM_HOOK(void, LSM_RET_VOID, cred_getsecid, const struct cred *c, u32 *secid) +LSM_HOOK(void, LSM_RET_VOID, cred_getlsmprop, const struct cred *c, + struct lsm_prop *prop) LSM_HOOK(int, 0, kernel_act_as, struct cred *new, u32 secid) LSM_HOOK(int, 0, kernel_create_files_as, struct cred *new, struct inode *inode) LSM_HOOK(int, 0, kernel_module_request, char *kmod_name) diff --git a/include/linux/security.h b/include/linux/security.h index 2b19ef5d799c..acd2e5d1b0ff 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -488,6 +488,7 @@ void security_cred_free(struct cred *cred); int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp); void security_transfer_creds(struct cred *new, const struct cred *old); void security_cred_getsecid(const struct cred *c, u32 *secid); +void security_cred_getlsmprop(const struct cred *c, struct lsm_prop *prop); int security_kernel_act_as(struct cred *new, u32 secid); int security_kernel_create_files_as(struct cred *new, struct inode *inode); int security_kernel_module_request(char *kmod_name); @@ -1229,6 +1230,10 @@ static inline void security_cred_getsecid(const struct cred *c, u32 *secid) *secid = 0; } +static inline void security_cred_getlsmprop(const struct cred *c, + struct lsm_prop *prop) +{ } + static inline int security_kernel_act_as(struct cred *cred, u32 secid) { return 0; diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index 553a6f54a1e2..91154bb05900 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -541,8 +541,7 @@ static int ima_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, static int ima_bprm_check(struct linux_binprm *bprm) { int ret; - u32 secid; - struct lsm_prop prop = { }; + struct lsm_prop prop; security_current_getlsmprop_subj(&prop); ret = process_measurement(bprm->file, current_cred(), @@ -550,9 +549,7 @@ static int ima_bprm_check(struct linux_binprm *bprm) if (ret) return ret; - security_cred_getsecid(bprm->cred, &secid); - /* scaffolding */ - prop.scaffold.secid = secid; + security_cred_getlsmprop(bprm->cred, &prop); return process_measurement(bprm->file, bprm->cred, &prop, NULL, 0, MAY_EXEC, CREDS_CHECK); } diff --git a/security/security.c b/security/security.c index 5e76e35dda09..0003d5ace5cc 100644 --- a/security/security.c +++ b/security/security.c @@ -3272,6 +3272,21 @@ void security_cred_getsecid(const struct cred *c, u32 *secid) } EXPORT_SYMBOL(security_cred_getsecid); +/** + * security_cred_getlsmprop() - Get the LSM data from a set of credentials + * @c: credentials + * @prop: destination for the LSM data + * + * Retrieve the security data of the cred structure @c. In case of + * failure, @prop will be cleared. + */ +void security_cred_getlsmprop(const struct cred *c, struct lsm_prop *prop) +{ + lsmprop_init(prop); + call_void_hook(cred_getlsmprop, c, prop); +} +EXPORT_SYMBOL(security_cred_getlsmprop); + /** * security_kernel_act_as() - Set the kernel credentials to act as secid * @new: credentials diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index a9f8d10062b5..438dfa17faae 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -4037,6 +4037,13 @@ static void selinux_cred_getsecid(const struct cred *c, u32 *secid) *secid = cred_sid(c); } +static void selinux_cred_getlsmprop(const struct cred *c, struct lsm_prop *prop) +{ + prop->selinux.secid = cred_sid(c); + /* scaffolding */ + prop->scaffold.secid = prop->selinux.secid; +} + /* * set the security data for a kernel service * - all the creation contexts are set to unlabelled @@ -7203,6 +7210,7 @@ static struct security_hook_list selinux_hooks[] __ro_after_init = { LSM_HOOK_INIT(cred_prepare, selinux_cred_prepare), LSM_HOOK_INIT(cred_transfer, selinux_cred_transfer), LSM_HOOK_INIT(cred_getsecid, selinux_cred_getsecid), + LSM_HOOK_INIT(cred_getlsmprop, selinux_cred_getlsmprop), LSM_HOOK_INIT(kernel_act_as, selinux_kernel_act_as), LSM_HOOK_INIT(kernel_create_files_as, selinux_kernel_create_files_as), LSM_HOOK_INIT(kernel_module_request, selinux_kernel_module_request), diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 5c3cb2837105..52cc6a65d674 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -2150,6 +2150,23 @@ static void smack_cred_getsecid(const struct cred *cred, u32 *secid) rcu_read_unlock(); } +/** + * smack_cred_getlsmprop - get the Smack label for a creds structure + * @cred: the object creds + * @prop: where to put the data + * + * Sets the Smack part of the ref + */ +static void smack_cred_getlsmprop(const struct cred *cred, + struct lsm_prop *prop) +{ + rcu_read_lock(); + prop->smack.skp = smk_of_task(smack_cred(cred)); + /* scaffolding */ + prop->scaffold.secid = prop->smack.skp->smk_secid; + rcu_read_unlock(); +} + /** * smack_kernel_act_as - Set the subjective context in a set of credentials * @new: points to the set of credentials to be modified. @@ -5132,6 +5149,7 @@ static struct security_hook_list smack_hooks[] __ro_after_init = { LSM_HOOK_INIT(cred_prepare, smack_cred_prepare), LSM_HOOK_INIT(cred_transfer, smack_cred_transfer), LSM_HOOK_INIT(cred_getsecid, smack_cred_getsecid), + LSM_HOOK_INIT(cred_getlsmprop, smack_cred_getlsmprop), LSM_HOOK_INIT(kernel_act_as, smack_kernel_act_as), LSM_HOOK_INIT(kernel_create_files_as, smack_kernel_create_files_as), LSM_HOOK_INIT(task_setpgid, smack_task_setpgid), -- cgit v1.2.3 From 05a344e54d0b4892736526e4a309851da8ee9c89 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Wed, 9 Oct 2024 10:32:20 -0700 Subject: netlabel,smack: use lsm_prop for audit data Replace the secid in the netlbl_audit structure with an lsm_prop. Remove scaffolding that was required when the value was a secid. Signed-off-by: Casey Schaufler [PM: fix the subject line] Signed-off-by: Paul Moore --- include/net/netlabel.h | 2 +- net/netlabel/netlabel_unlabeled.c | 5 +---- net/netlabel/netlabel_user.c | 7 +++---- net/netlabel/netlabel_user.h | 6 +----- security/smack/smackfs.c | 4 +--- 5 files changed, 7 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/net/netlabel.h b/include/net/netlabel.h index 529160f76cac..8de8344ee93c 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -97,7 +97,7 @@ struct calipso_doi; /* NetLabel audit information */ struct netlbl_audit { - u32 secid; + struct lsm_prop prop; kuid_t loginuid; unsigned int sessionid; }; diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 5925f48a3ade..1bc2d0890a9f 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1534,14 +1534,11 @@ int __init netlbl_unlabel_defconf(void) int ret_val; struct netlbl_dom_map *entry; struct netlbl_audit audit_info; - struct lsm_prop prop; /* Only the kernel is allowed to call this function and the only time * it is called is at bootup before the audit subsystem is reporting * messages so don't worry to much about these values. */ - security_current_getlsmprop_subj(&prop); - /* scaffolding */ - audit_info.secid = prop.scaffold.secid; + security_current_getlsmprop_subj(&audit_info.prop); audit_info.loginuid = GLOBAL_ROOT_UID; audit_info.sessionid = 0; diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c index 3ed4fea2a2de..81635a13987b 100644 --- a/net/netlabel/netlabel_user.c +++ b/net/netlabel/netlabel_user.c @@ -98,10 +98,9 @@ struct audit_buffer *netlbl_audit_start_common(int type, from_kuid(&init_user_ns, audit_info->loginuid), audit_info->sessionid); - if (audit_info->secid != 0 && - security_secid_to_secctx(audit_info->secid, - &secctx, - &secctx_len) == 0) { + if (lsmprop_is_set(&audit_info->prop) && + security_lsmprop_to_secctx(&audit_info->prop, &secctx, + &secctx_len) == 0) { audit_log_format(audit_buf, " subj=%s", secctx); security_release_secctx(secctx, secctx_len); } diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h index 39f4f6df5f51..d4c434956212 100644 --- a/net/netlabel/netlabel_user.h +++ b/net/netlabel/netlabel_user.h @@ -32,11 +32,7 @@ */ static inline void netlbl_netlink_auditinfo(struct netlbl_audit *audit_info) { - struct lsm_prop prop; - - security_current_getlsmprop_subj(&prop); - /* scaffolding */ - audit_info->secid = prop.scaffold.secid; + security_current_getlsmprop_subj(&audit_info->prop); audit_info->loginuid = audit_get_loginuid(current); audit_info->sessionid = audit_get_sessionid(current); } diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c index 5dd1e164f9b1..1401412fd794 100644 --- a/security/smack/smackfs.c +++ b/security/smack/smackfs.c @@ -182,11 +182,9 @@ static inline void smack_catset_bit(unsigned int cat, char *catsetp) */ static void smk_netlabel_audit_set(struct netlbl_audit *nap) { - struct smack_known *skp = smk_of_current(); - nap->loginuid = audit_get_loginuid(current); nap->sessionid = audit_get_sessionid(current); - nap->secid = skp->smk_secid; + nap->prop.smack.skp = smk_of_current(); } /* -- cgit v1.2.3 From 8afd8c8faa24249e48f5007aee46209299377588 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Wed, 9 Oct 2024 10:32:21 -0700 Subject: lsm: remove lsm_prop scaffolding Remove the scaffold member from the lsm_prop. Remove the remaining places it is being set. Signed-off-by: Casey Schaufler [PM: subj line tweak] Signed-off-by: Paul Moore --- include/linux/security.h | 6 ------ security/apparmor/audit.c | 6 +----- security/apparmor/lsm.c | 4 ---- security/apparmor/secid.c | 6 +----- security/selinux/hooks.c | 18 +----------------- security/selinux/ss/services.c | 4 ---- security/smack/smack_lsm.c | 33 ++++----------------------------- 7 files changed, 7 insertions(+), 70 deletions(-) (limited to 'include') diff --git a/include/linux/security.h b/include/linux/security.h index acd2e5d1b0ff..fd690fa73162 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -156,11 +156,6 @@ enum lockdown_reason { LOCKDOWN_CONFIDENTIALITY_MAX, }; -/* scaffolding */ -struct lsm_prop_scaffold { - u32 secid; -}; - /* * Data exported by the security modules */ @@ -169,7 +164,6 @@ struct lsm_prop { struct lsm_prop_smack smack; struct lsm_prop_apparmor apparmor; struct lsm_prop_bpf bpf; - struct lsm_prop_scaffold scaffold; }; extern const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1]; diff --git a/security/apparmor/audit.c b/security/apparmor/audit.c index 87df6fa2a48d..73087d76f649 100644 --- a/security/apparmor/audit.c +++ b/security/apparmor/audit.c @@ -270,11 +270,7 @@ int aa_audit_rule_match(struct lsm_prop *prop, u32 field, u32 op, void *vrule) struct aa_label *label; int found = 0; - /* scaffolding */ - if (!prop->apparmor.label && prop->scaffold.secid) - label = aa_secid_to_label(prop->scaffold.secid); - else - label = prop->apparmor.label; + label = prop->apparmor.label; if (!label) return -ENOENT; diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 6331bcb35ec0..1edc12862a7d 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -987,8 +987,6 @@ static void apparmor_current_getlsmprop_subj(struct lsm_prop *prop) struct aa_label *label = __begin_current_label_crit_section(); prop->apparmor.label = label; - /* scaffolding */ - prop->scaffold.secid = label->secid; __end_current_label_crit_section(label); } @@ -998,8 +996,6 @@ static void apparmor_task_getlsmprop_obj(struct task_struct *p, struct aa_label *label = aa_get_task_label(p); prop->apparmor.label = label; - /* scaffolding */ - prop->scaffold.secid = label->secid; aa_put_label(label); } diff --git a/security/apparmor/secid.c b/security/apparmor/secid.c index 34610888559f..6350d107013a 100644 --- a/security/apparmor/secid.c +++ b/security/apparmor/secid.c @@ -102,11 +102,7 @@ int apparmor_lsmprop_to_secctx(struct lsm_prop *prop, char **secdata, { struct aa_label *label; - /* scaffolding */ - if (!prop->apparmor.label && prop->scaffold.secid) - label = aa_secid_to_label(prop->scaffold.secid); - else - label = prop->apparmor.label; + label = prop->apparmor.label; return apparmor_label_to_secctx(label, secdata, seclen); } diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 438dfa17faae..025b60c5b605 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3508,8 +3508,6 @@ static void selinux_inode_getlsmprop(struct inode *inode, struct lsm_prop *prop) struct inode_security_struct *isec = inode_security_novalidate(inode); prop->selinux.secid = isec->sid; - /* scaffolding */ - prop->scaffold.secid = isec->sid; } static int selinux_inode_copy_up(struct dentry *src, struct cred **new) @@ -4040,8 +4038,6 @@ static void selinux_cred_getsecid(const struct cred *c, u32 *secid) static void selinux_cred_getlsmprop(const struct cred *c, struct lsm_prop *prop) { prop->selinux.secid = cred_sid(c); - /* scaffolding */ - prop->scaffold.secid = prop->selinux.secid; } /* @@ -4182,16 +4178,12 @@ static int selinux_task_getsid(struct task_struct *p) static void selinux_current_getlsmprop_subj(struct lsm_prop *prop) { prop->selinux.secid = current_sid(); - /* scaffolding */ - prop->scaffold.secid = prop->selinux.secid; } static void selinux_task_getlsmprop_obj(struct task_struct *p, struct lsm_prop *prop) { prop->selinux.secid = task_sid_obj(p); - /* scaffolding */ - prop->scaffold.secid = prop->selinux.secid; } static int selinux_task_setnice(struct task_struct *p, int nice) @@ -6339,8 +6331,6 @@ static void selinux_ipc_getlsmprop(struct kern_ipc_perm *ipcp, { struct ipc_security_struct *isec = selinux_ipc(ipcp); prop->selinux.secid = isec->sid; - /* scaffolding */ - prop->scaffold.secid = isec->sid; } static void selinux_d_instantiate(struct dentry *dentry, struct inode *inode) @@ -6625,13 +6615,7 @@ static int selinux_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) static int selinux_lsmprop_to_secctx(struct lsm_prop *prop, char **secdata, u32 *seclen) { - u32 secid = prop->selinux.secid; - - /* scaffolding */ - if (!secid) - secid = prop->scaffold.secid; - - return selinux_secid_to_secctx(secid, secdata, seclen); + return selinux_secid_to_secctx(prop->selinux.secid, secdata, seclen); } static int selinux_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid) diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index e0c14773a7b7..07ba5b3643dd 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -3661,10 +3661,6 @@ int selinux_audit_rule_match(struct lsm_prop *prop, u32 field, u32 op, void *vru goto out; } - /* scaffolding */ - if (!prop->selinux.secid && prop->scaffold.secid) - prop->selinux.secid = prop->scaffold.secid; - ctxt = sidtab_search(policy->sidtab, prop->selinux.secid); if (unlikely(!ctxt)) { WARN_ONCE(1, "selinux_audit_rule_match: unrecognized SID %d\n", diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 52cc6a65d674..0c476282e279 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -1655,11 +1655,7 @@ static int smack_inode_listsecurity(struct inode *inode, char *buffer, */ static void smack_inode_getlsmprop(struct inode *inode, struct lsm_prop *prop) { - struct smack_known *skp = smk_of_inode(inode); - - prop->smack.skp = skp; - /* scaffolding */ - prop->scaffold.secid = skp->smk_secid; + prop->smack.skp = smk_of_inode(inode); } /* @@ -2162,8 +2158,6 @@ static void smack_cred_getlsmprop(const struct cred *cred, { rcu_read_lock(); prop->smack.skp = smk_of_task(smack_cred(cred)); - /* scaffolding */ - prop->scaffold.secid = prop->smack.skp->smk_secid; rcu_read_unlock(); } @@ -2265,11 +2259,7 @@ static int smack_task_getsid(struct task_struct *p) */ static void smack_current_getlsmprop_subj(struct lsm_prop *prop) { - struct smack_known *skp = smk_of_current(); - - prop->smack.skp = skp; - /* scaffolding */ - prop->scaffold.secid = skp->smk_secid; + prop->smack.skp = smk_of_current(); } /** @@ -2282,11 +2272,7 @@ static void smack_current_getlsmprop_subj(struct lsm_prop *prop) static void smack_task_getlsmprop_obj(struct task_struct *p, struct lsm_prop *prop) { - struct smack_known *skp = smk_of_task_struct_obj(p); - - prop->smack.skp = skp; - /* scaffolding */ - prop->scaffold.secid = skp->smk_secid; + prop->smack.skp = smk_of_task_struct_obj(p); } /** @@ -3466,11 +3452,8 @@ static int smack_ipc_permission(struct kern_ipc_perm *ipp, short flag) static void smack_ipc_getlsmprop(struct kern_ipc_perm *ipp, struct lsm_prop *prop) { struct smack_known **iskpp = smack_ipc(ipp); - struct smack_known *iskp = *iskpp; - prop->smack.skp = iskp; - /* scaffolding */ - prop->scaffold.secid = iskp->smk_secid; + prop->smack.skp = *iskpp; } /** @@ -4805,10 +4788,6 @@ static int smack_audit_rule_match(struct lsm_prop *prop, u32 field, u32 op, if (field != AUDIT_SUBJ_USER && field != AUDIT_OBJ_USER) return 0; - /* scaffolding */ - if (!skp && prop->scaffold.secid) - skp = smack_from_secid(prop->scaffold.secid); - /* * No need to do string comparisons. If a match occurs, * both pointers will point to the same smack_known @@ -4869,10 +4848,6 @@ static int smack_lsmprop_to_secctx(struct lsm_prop *prop, char **secdata, { struct smack_known *skp = prop->smack.skp; - /* scaffolding */ - if (!skp && prop->scaffold.secid) - skp = smack_from_secid(prop->scaffold.secid); - if (secdata) *secdata = skp->smk_known; *seclen = strlen(skp->smk_known); -- cgit v1.2.3 From 7e6c0cb33f7c2aa78b20724239bd7bda3a882652 Mon Sep 17 00:00:00 2001 From: Clint Taylor Date: Thu, 10 Oct 2024 15:43:02 -0700 Subject: drm/i915/xe3lpd: reuse xe2lpd definition xe3_lpd display is functionally identical to xe2_lpd for now so reuse the device description. A separate xe3 definition will be added in the future if/when new feature flags are required. Signed-off-by: Clint Taylor Signed-off-by: Matt Atwood Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20241010224311.50133-2-matthew.s.atwood@intel.com --- drivers/gpu/drm/i915/display/intel_display_device.c | 6 ++++++ drivers/gpu/drm/i915/display/intel_display_device.h | 2 ++ include/drm/intel/i915_pciids.h | 12 ++++++++++++ 3 files changed, 20 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/i915/display/intel_display_device.c b/drivers/gpu/drm/i915/display/intel_display_device.c index f33062322c66..aa22189e3853 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.c +++ b/drivers/gpu/drm/i915/display/intel_display_device.c @@ -1252,6 +1252,10 @@ static const struct platform_desc bmg_desc = { PLATFORM(BATTLEMAGE), }; +static const struct platform_desc ptl_desc = { + PLATFORM(PANTHERLAKE), +}; + __diag_pop(); /* @@ -1322,6 +1326,7 @@ static const struct { INTEL_MTL_IDS(INTEL_DISPLAY_DEVICE, &mtl_desc), INTEL_LNL_IDS(INTEL_DISPLAY_DEVICE, &lnl_desc), INTEL_BMG_IDS(INTEL_DISPLAY_DEVICE, &bmg_desc), + INTEL_PTL_IDS(INTEL_DISPLAY_DEVICE, &ptl_desc), }; static const struct { @@ -1332,6 +1337,7 @@ static const struct { { 14, 0, &xe_lpdp_display }, { 14, 1, &xe2_hpd_display }, { 20, 0, &xe2_lpd_display }, + { 30, 0, &xe2_lpd_display }, }; static const struct intel_display_device_info * diff --git a/drivers/gpu/drm/i915/display/intel_display_device.h b/drivers/gpu/drm/i915/display/intel_display_device.h index 3ef537fa551a..071a36b51f79 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.h +++ b/drivers/gpu/drm/i915/display/intel_display_device.h @@ -70,6 +70,8 @@ enum intel_display_platform { INTEL_DISPLAY_LUNARLAKE, /* Display ver 14.1 (based on GMD ID) */ INTEL_DISPLAY_BATTLEMAGE, + /* Display ver 30 (based on GMD ID) */ + INTEL_DISPLAY_PANTHERLAKE, }; enum intel_display_subplatform { diff --git a/include/drm/intel/i915_pciids.h b/include/drm/intel/i915_pciids.h index 02156c6f79b6..6b92f8c3731b 100644 --- a/include/drm/intel/i915_pciids.h +++ b/include/drm/intel/i915_pciids.h @@ -794,4 +794,16 @@ MACRO__(0xE20D, ## __VA_ARGS__), \ MACRO__(0xE212, ## __VA_ARGS__) +/* PTL */ +#define INTEL_PTL_IDS(MACRO__, ...) \ + MACRO__(0xB080, ## __VA_ARGS__), \ + MACRO__(0xB081, ## __VA_ARGS__), \ + MACRO__(0xB082, ## __VA_ARGS__), \ + MACRO__(0xB090, ## __VA_ARGS__), \ + MACRO__(0xB091, ## __VA_ARGS__), \ + MACRO__(0xB092, ## __VA_ARGS__), \ + MACRO__(0xB0A0, ## __VA_ARGS__), \ + MACRO__(0xB0A1, ## __VA_ARGS__), \ + MACRO__(0xB0A2, ## __VA_ARGS__) + #endif /* _I915_PCIIDS_H */ -- cgit v1.2.3 From a716ff52bebfe806fbcf5227cee4c7bda4e6724b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Oct 2024 18:44:01 +0000 Subject: fib: rules: use READ_ONCE()/WRITE_ONCE() on ops->fib_rules_seq Using RTNL to protect ops->fib_rules_seq reads seems a big hammer. Writes are protected by RTNL. We can use READ_ONCE() on readers. Constify 'struct net' argument of fib_rules_seq_read() and lookup_rules_ops(). Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Reviewed-by: David Ahern Link: https://patch.msgid.link/20241009184405.3752829-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/fib_rules.h | 2 +- net/core/fib_rules.c | 14 ++++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index d17855c52ef9..04383d90a1e3 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -176,7 +176,7 @@ int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table); bool fib_rule_matchall(const struct fib_rule *rule); int fib_rules_dump(struct net *net, struct notifier_block *nb, int family, struct netlink_ext_ack *extack); -unsigned int fib_rules_seq_read(struct net *net, int family); +unsigned int fib_rules_seq_read(const struct net *net, int family); int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 154a2681f55c..82ef090c0037 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -101,7 +101,8 @@ static void notify_rule_change(int event, struct fib_rule *rule, struct fib_rules_ops *ops, struct nlmsghdr *nlh, u32 pid); -static struct fib_rules_ops *lookup_rules_ops(struct net *net, int family) +static struct fib_rules_ops *lookup_rules_ops(const struct net *net, + int family) { struct fib_rules_ops *ops; @@ -370,7 +371,9 @@ static int call_fib_rule_notifiers(struct net *net, .rule = rule, }; - ops->fib_rules_seq++; + ASSERT_RTNL(); + /* Paired with READ_ONCE() in fib_rules_seq() */ + WRITE_ONCE(ops->fib_rules_seq, ops->fib_rules_seq + 1); return call_fib_notifiers(net, event_type, &info.info); } @@ -397,17 +400,16 @@ int fib_rules_dump(struct net *net, struct notifier_block *nb, int family, } EXPORT_SYMBOL_GPL(fib_rules_dump); -unsigned int fib_rules_seq_read(struct net *net, int family) +unsigned int fib_rules_seq_read(const struct net *net, int family) { unsigned int fib_rules_seq; struct fib_rules_ops *ops; - ASSERT_RTNL(); - ops = lookup_rules_ops(net, family); if (!ops) return 0; - fib_rules_seq = ops->fib_rules_seq; + /* Paired with WRITE_ONCE() in call_fib_rule_notifiers() */ + fib_rules_seq = READ_ONCE(ops->fib_rules_seq); rules_ops_put(ops); return fib_rules_seq; -- cgit v1.2.3 From 16207384d29287a19f81436e1953b41946aa8258 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Oct 2024 18:44:02 +0000 Subject: ipv4: use READ_ONCE()/WRITE_ONCE() on net->ipv4.fib_seq Using RTNL to protect ops->fib_rules_seq reads seems a big hammer. Writes are protected by RTNL. We can use READ_ONCE() when reading it. Constify 'struct net' argument of fib4_rules_seq_read() Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Reviewed-by: David Ahern Link: https://patch.msgid.link/20241009184405.3752829-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/ip_fib.h | 4 ++-- include/net/netns/ipv4.h | 2 +- net/ipv4/fib_notifier.c | 8 ++++---- net/ipv4/fib_rules.c | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 06130933542d..b6e44f4eaa4c 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -347,7 +347,7 @@ static inline int fib4_rules_dump(struct net *net, struct notifier_block *nb, return 0; } -static inline unsigned int fib4_rules_seq_read(struct net *net) +static inline unsigned int fib4_rules_seq_read(const struct net *net) { return 0; } @@ -411,7 +411,7 @@ static inline bool fib4_has_custom_rules(const struct net *net) bool fib4_rule_default(const struct fib_rule *rule); int fib4_rules_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack); -unsigned int fib4_rules_seq_read(struct net *net); +unsigned int fib4_rules_seq_read(const struct net *net); static inline bool fib4_rules_early_flow_dissect(struct net *net, struct sk_buff *skb, diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 3b1de80b5c25..3c014170e001 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -262,7 +262,7 @@ struct netns_ipv4 { #endif struct fib_notifier_ops *notifier_ops; - unsigned int fib_seq; /* protected by rtnl_mutex */ + unsigned int fib_seq; /* writes protected by rtnl_mutex */ struct fib_notifier_ops *ipmr_notifier_ops; unsigned int ipmr_seq; /* protected by rtnl_mutex */ diff --git a/net/ipv4/fib_notifier.c b/net/ipv4/fib_notifier.c index 0e23ade74493..21c85c80de64 100644 --- a/net/ipv4/fib_notifier.c +++ b/net/ipv4/fib_notifier.c @@ -22,15 +22,15 @@ int call_fib4_notifiers(struct net *net, enum fib_event_type event_type, ASSERT_RTNL(); info->family = AF_INET; - net->ipv4.fib_seq++; + /* Paired with READ_ONCE() in fib4_seq_read() */ + WRITE_ONCE(net->ipv4.fib_seq, net->ipv4.fib_seq + 1); return call_fib_notifiers(net, event_type, info); } static unsigned int fib4_seq_read(struct net *net) { - ASSERT_RTNL(); - - return net->ipv4.fib_seq + fib4_rules_seq_read(net); + /* Paired with WRITE_ONCE() in call_fib4_notifiers() */ + return READ_ONCE(net->ipv4.fib_seq) + fib4_rules_seq_read(net); } static int fib4_dump(struct net *net, struct notifier_block *nb, diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index b07292d50ee7..8325224ef072 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -74,7 +74,7 @@ int fib4_rules_dump(struct net *net, struct notifier_block *nb, return fib_rules_dump(net, nb, AF_INET, extack); } -unsigned int fib4_rules_seq_read(struct net *net) +unsigned int fib4_rules_seq_read(const struct net *net) { return fib_rules_seq_read(net, AF_INET); } -- cgit v1.2.3 From e60ea45447768c48309b944596a8a34f6bae50e2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Oct 2024 18:44:03 +0000 Subject: ipv6: use READ_ONCE()/WRITE_ONCE() on fib6_table->fib_seq Using RTNL to protect ops->fib_rules_seq reads seems a big hammer. Writes are protected by RTNL. We can use READ_ONCE() when reading it. Constify 'struct net' argument of fib6_tables_seq_read() and fib6_rules_seq_read(). Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Reviewed-by: David Ahern Link: https://patch.msgid.link/20241009184405.3752829-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/ip6_fib.h | 8 ++++---- net/ipv6/fib6_rules.c | 2 +- net/ipv6/ip6_fib.c | 14 +++++++------- 3 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 6cb867ce4878..7c87873ae211 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -394,7 +394,7 @@ struct fib6_table { struct fib6_node tb6_root; struct inet_peer_base tb6_peers; unsigned int flags; - unsigned int fib_seq; + unsigned int fib_seq; /* writes protected by rtnl_mutex */ struct hlist_head tb6_gc_hlist; /* GC candidates */ #define RT6_TABLE_HAS_DFLT_ROUTER BIT(0) }; @@ -563,7 +563,7 @@ int call_fib6_notifiers(struct net *net, enum fib_event_type event_type, int __net_init fib6_notifier_init(struct net *net); void __net_exit fib6_notifier_exit(struct net *net); -unsigned int fib6_tables_seq_read(struct net *net); +unsigned int fib6_tables_seq_read(const struct net *net); int fib6_tables_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack); @@ -632,7 +632,7 @@ void fib6_rules_cleanup(void); bool fib6_rule_default(const struct fib_rule *rule); int fib6_rules_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack); -unsigned int fib6_rules_seq_read(struct net *net); +unsigned int fib6_rules_seq_read(const struct net *net); static inline bool fib6_rules_early_flow_dissect(struct net *net, struct sk_buff *skb, @@ -676,7 +676,7 @@ static inline int fib6_rules_dump(struct net *net, struct notifier_block *nb, { return 0; } -static inline unsigned int fib6_rules_seq_read(struct net *net) +static inline unsigned int fib6_rules_seq_read(const struct net *net) { return 0; } diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 04a9ed5e8310..c85c1627cb16 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -56,7 +56,7 @@ int fib6_rules_dump(struct net *net, struct notifier_block *nb, return fib_rules_dump(net, nb, AF_INET6, extack); } -unsigned int fib6_rules_seq_read(struct net *net) +unsigned int fib6_rules_seq_read(const struct net *net) { return fib_rules_seq_read(net, AF_INET6); } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index eb111d20615c..cea160b249d2 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -345,17 +345,17 @@ static void __net_init fib6_tables_init(struct net *net) #endif -unsigned int fib6_tables_seq_read(struct net *net) +unsigned int fib6_tables_seq_read(const struct net *net) { unsigned int h, fib_seq = 0; rcu_read_lock(); for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { - struct hlist_head *head = &net->ipv6.fib_table_hash[h]; - struct fib6_table *tb; + const struct hlist_head *head = &net->ipv6.fib_table_hash[h]; + const struct fib6_table *tb; hlist_for_each_entry_rcu(tb, head, tb6_hlist) - fib_seq += tb->fib_seq; + fib_seq += READ_ONCE(tb->fib_seq); } rcu_read_unlock(); @@ -400,7 +400,7 @@ int call_fib6_entry_notifiers(struct net *net, .rt = rt, }; - rt->fib6_table->fib_seq++; + WRITE_ONCE(rt->fib6_table->fib_seq, rt->fib6_table->fib_seq + 1); return call_fib6_notifiers(net, event_type, &info.info); } @@ -416,7 +416,7 @@ int call_fib6_multipath_entry_notifiers(struct net *net, .nsiblings = nsiblings, }; - rt->fib6_table->fib_seq++; + WRITE_ONCE(rt->fib6_table->fib_seq, rt->fib6_table->fib_seq + 1); return call_fib6_notifiers(net, event_type, &info.info); } @@ -427,7 +427,7 @@ int call_fib6_entry_notifiers_replace(struct net *net, struct fib6_info *rt) .nsiblings = rt->fib6_nsiblings, }; - rt->fib6_table->fib_seq++; + WRITE_ONCE(rt->fib6_table->fib_seq, rt->fib6_table->fib_seq + 1); return call_fib6_notifiers(net, FIB_EVENT_ENTRY_REPLACE, &info.info); } -- cgit v1.2.3 From 2698acd6ea4770809af7e65bb8b3250e0a3a807e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Oct 2024 18:44:05 +0000 Subject: net: do not acquire rtnl in fib_seq_sum() After we made sure no fib_seq_read() handlers needs RTNL anymore, we can remove RTNL from fib_seq_sum(). Note that after RTNL was dropped, fib_seq_sum() result was possibly outdated anyway. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Reviewed-by: David Ahern Link: https://patch.msgid.link/20241009184405.3752829-6-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/fib_notifier.h | 2 +- net/core/fib_notifier.c | 2 -- net/ipv4/fib_notifier.c | 2 +- net/ipv4/ipmr.c | 2 +- net/ipv6/fib6_notifier.c | 2 +- net/ipv6/ip6mr.c | 2 +- 6 files changed, 5 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/fib_notifier.h b/include/net/fib_notifier.h index 6d59221ff05a..48aad6128fea 100644 --- a/include/net/fib_notifier.h +++ b/include/net/fib_notifier.h @@ -28,7 +28,7 @@ enum fib_event_type { struct fib_notifier_ops { int family; struct list_head list; - unsigned int (*fib_seq_read)(struct net *net); + unsigned int (*fib_seq_read)(const struct net *net); int (*fib_dump)(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack); struct module *owner; diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c index fc96259807b6..5cdca49b1d7c 100644 --- a/net/core/fib_notifier.c +++ b/net/core/fib_notifier.c @@ -43,7 +43,6 @@ static unsigned int fib_seq_sum(struct net *net) struct fib_notifier_ops *ops; unsigned int fib_seq = 0; - rtnl_lock(); rcu_read_lock(); list_for_each_entry_rcu(ops, &fn_net->fib_notifier_ops, list) { if (!try_module_get(ops->owner)) @@ -52,7 +51,6 @@ static unsigned int fib_seq_sum(struct net *net) module_put(ops->owner); } rcu_read_unlock(); - rtnl_unlock(); return fib_seq; } diff --git a/net/ipv4/fib_notifier.c b/net/ipv4/fib_notifier.c index 21c85c80de64..b1551c26554b 100644 --- a/net/ipv4/fib_notifier.c +++ b/net/ipv4/fib_notifier.c @@ -27,7 +27,7 @@ int call_fib4_notifiers(struct net *net, enum fib_event_type event_type, return call_fib_notifiers(net, event_type, info); } -static unsigned int fib4_seq_read(struct net *net) +static unsigned int fib4_seq_read(const struct net *net) { /* Paired with WRITE_ONCE() in call_fib4_notifiers() */ return READ_ONCE(net->ipv4.fib_seq) + fib4_rules_seq_read(net); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 35ed03165184..7a95daeb1946 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -3035,7 +3035,7 @@ static const struct net_protocol pim_protocol = { }; #endif -static unsigned int ipmr_seq_read(struct net *net) +static unsigned int ipmr_seq_read(const struct net *net) { return READ_ONCE(net->ipv4.ipmr_seq) + ipmr_rules_seq_read(net); } diff --git a/net/ipv6/fib6_notifier.c b/net/ipv6/fib6_notifier.c index f87ae33e1d01..949b72610df7 100644 --- a/net/ipv6/fib6_notifier.c +++ b/net/ipv6/fib6_notifier.c @@ -22,7 +22,7 @@ int call_fib6_notifiers(struct net *net, enum fib_event_type event_type, return call_fib_notifiers(net, event_type, info); } -static unsigned int fib6_seq_read(struct net *net) +static unsigned int fib6_seq_read(const struct net *net) { return fib6_tables_seq_read(net) + fib6_rules_seq_read(net); } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 3f9501fd8c1a..9528e17665fd 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1260,7 +1260,7 @@ static int ip6mr_device_event(struct notifier_block *this, return NOTIFY_DONE; } -static unsigned int ip6mr_seq_read(struct net *net) +static unsigned int ip6mr_seq_read(const struct net *net) { return READ_ONCE(net->ipv6.ipmr_seq) + ip6mr_rules_seq_read(net); } -- cgit v1.2.3 From ccb32f2357c08f64947e0fc32f9a9551ae69c6b3 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 28 Aug 2024 16:24:07 +0200 Subject: media: v4l2-core: add v4l2_debugfs_root() This new function returns the dentry of the top-level debugfs "v4l2" directory. If it does not exist yet, then it is created first. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/v4l2-core/v4l2-dev.c | 14 ++++++++++++++ include/media/v4l2-dev.h | 15 +++++++++++++++ 2 files changed, 29 insertions(+) (limited to 'include') diff --git a/drivers/media/v4l2-core/v4l2-dev.c b/drivers/media/v4l2-core/v4l2-dev.c index 3d7711cc42bc..dbd6fba93179 100644 --- a/drivers/media/v4l2-core/v4l2-dev.c +++ b/drivers/media/v4l2-core/v4l2-dev.c @@ -93,6 +93,8 @@ static struct attribute *video_device_attrs[] = { }; ATTRIBUTE_GROUPS(video_device); +static struct dentry *v4l2_debugfs_root_dir; + /* * Active devices */ @@ -1118,6 +1120,16 @@ void video_unregister_device(struct video_device *vdev) } EXPORT_SYMBOL(video_unregister_device); +#ifdef CONFIG_DEBUG_FS +struct dentry *v4l2_debugfs_root(void) +{ + if (!v4l2_debugfs_root_dir) + v4l2_debugfs_root_dir = debugfs_create_dir("v4l2", NULL); + return v4l2_debugfs_root_dir; +} +EXPORT_SYMBOL_GPL(v4l2_debugfs_root); +#endif + #if defined(CONFIG_MEDIA_CONTROLLER) __must_check int video_device_pipeline_start(struct video_device *vdev, @@ -1222,6 +1234,8 @@ static void __exit videodev_exit(void) class_unregister(&video_class); unregister_chrdev_region(dev, VIDEO_NUM_DEVICES); + debugfs_remove_recursive(v4l2_debugfs_root_dir); + v4l2_debugfs_root_dir = NULL; } subsys_initcall(videodev_init); diff --git a/include/media/v4l2-dev.h b/include/media/v4l2-dev.h index d82dfdbf6e58..1b6222fab24e 100644 --- a/include/media/v4l2-dev.h +++ b/include/media/v4l2-dev.h @@ -62,6 +62,7 @@ struct v4l2_ioctl_callbacks; struct video_device; struct v4l2_device; struct v4l2_ctrl_handler; +struct dentry; /** * enum v4l2_video_device_flags - Flags used by &struct video_device @@ -539,6 +540,20 @@ static inline int video_is_registered(struct video_device *vdev) return test_bit(V4L2_FL_REGISTERED, &vdev->flags); } +/** + * v4l2_debugfs_root - returns the dentry of the top-level "v4l2" debugfs dir + * + * If this directory does not yet exist, then it will be created. + */ +#ifdef CONFIG_DEBUG_FS +struct dentry *v4l2_debugfs_root(void); +#else +static inline struct dentry *v4l2_debugfs_root(void) +{ + return NULL; +} +#endif + #if defined(CONFIG_MEDIA_CONTROLLER) /** -- cgit v1.2.3 From 01c76fc995cdb1e701fa721c5b373777f0469b37 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 28 Aug 2024 16:24:08 +0200 Subject: media: v4l2-core: add v4l2_debugfs_if_alloc/free() Add new helpers to export received or transmitted HDMI InfoFrames to debugfs. This complements similar code in drm where the transmitted HDMI infoframes are exported to debugfs. The same names have been used as in drm, so this is consistent. The exported infoframes can be parsed with the edid-decode utility. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/v4l2-core/v4l2-dv-timings.c | 67 +++++++++++++++++++++++++++++++ include/media/v4l2-dv-timings.h | 48 ++++++++++++++++++++++ 2 files changed, 115 insertions(+) (limited to 'include') diff --git a/drivers/media/v4l2-core/v4l2-dv-timings.c b/drivers/media/v4l2-core/v4l2-dv-timings.c index 942d0005c55e..39b5fc1807c4 100644 --- a/drivers/media/v4l2-core/v4l2-dv-timings.c +++ b/drivers/media/v4l2-core/v4l2-dv-timings.c @@ -1154,3 +1154,70 @@ int v4l2_phys_addr_validate(u16 phys_addr, u16 *parent, u16 *port) return 0; } EXPORT_SYMBOL_GPL(v4l2_phys_addr_validate); + +#ifdef CONFIG_DEBUG_FS + +#define DEBUGFS_FOPS(type, flag) \ +static ssize_t \ +infoframe_read_##type(struct file *filp, \ + char __user *ubuf, size_t count, loff_t *ppos) \ +{ \ + struct v4l2_debugfs_if *infoframes = filp->private_data; \ + \ + return infoframes->if_read((flag), infoframes->priv, filp, \ + ubuf, count, ppos); \ +} \ + \ +static const struct file_operations infoframe_##type##_fops = { \ + .owner = THIS_MODULE, \ + .open = simple_open, \ + .read = infoframe_read_##type, \ +} + +DEBUGFS_FOPS(avi, V4L2_DEBUGFS_IF_AVI); +DEBUGFS_FOPS(audio, V4L2_DEBUGFS_IF_AUDIO); +DEBUGFS_FOPS(spd, V4L2_DEBUGFS_IF_SPD); +DEBUGFS_FOPS(hdmi, V4L2_DEBUGFS_IF_HDMI); + +struct v4l2_debugfs_if *v4l2_debugfs_if_alloc(struct dentry *root, u32 if_types, + void *priv, + v4l2_debugfs_if_read_t if_read) +{ + struct v4l2_debugfs_if *infoframes; + + if (IS_ERR_OR_NULL(root) || !if_types || !if_read) + return NULL; + + infoframes = kzalloc(sizeof(*infoframes), GFP_KERNEL); + if (!infoframes) + return NULL; + + infoframes->if_dir = debugfs_create_dir("infoframes", root); + infoframes->priv = priv; + infoframes->if_read = if_read; + if (if_types & V4L2_DEBUGFS_IF_AVI) + debugfs_create_file("avi", 0400, infoframes->if_dir, + infoframes, &infoframe_avi_fops); + if (if_types & V4L2_DEBUGFS_IF_AUDIO) + debugfs_create_file("audio", 0400, infoframes->if_dir, + infoframes, &infoframe_audio_fops); + if (if_types & V4L2_DEBUGFS_IF_SPD) + debugfs_create_file("spd", 0400, infoframes->if_dir, + infoframes, &infoframe_spd_fops); + if (if_types & V4L2_DEBUGFS_IF_HDMI) + debugfs_create_file("hdmi", 0400, infoframes->if_dir, + infoframes, &infoframe_hdmi_fops); + return infoframes; +} +EXPORT_SYMBOL_GPL(v4l2_debugfs_if_alloc); + +void v4l2_debugfs_if_free(struct v4l2_debugfs_if *infoframes) +{ + if (infoframes) { + debugfs_remove_recursive(infoframes->if_dir); + kfree(infoframes); + } +} +EXPORT_SYMBOL_GPL(v4l2_debugfs_if_free); + +#endif diff --git a/include/media/v4l2-dv-timings.h b/include/media/v4l2-dv-timings.h index 8fa963326bf6..13830411bd6c 100644 --- a/include/media/v4l2-dv-timings.h +++ b/include/media/v4l2-dv-timings.h @@ -8,6 +8,7 @@ #ifndef __V4L2_DV_TIMINGS_H #define __V4L2_DV_TIMINGS_H +#include #include /** @@ -251,4 +252,51 @@ void v4l2_set_edid_phys_addr(u8 *edid, unsigned int size, u16 phys_addr); u16 v4l2_phys_addr_for_input(u16 phys_addr, u8 input); int v4l2_phys_addr_validate(u16 phys_addr, u16 *parent, u16 *port); +/* Add support for exporting InfoFrames to debugfs */ + +/* + * HDMI InfoFrames start with a 3 byte header, then a checksum, + * followed by the actual IF payload. + * + * The payload length is limited to 30 bytes according to the HDMI spec, + * but since the length is encoded in 5 bits, it can be 31 bytes theoretically. + * So set the max length as 31 + 3 (header) + 1 (checksum) = 35. + */ +#define V4L2_DEBUGFS_IF_MAX_LEN (35) + +#define V4L2_DEBUGFS_IF_AVI BIT(0) +#define V4L2_DEBUGFS_IF_AUDIO BIT(1) +#define V4L2_DEBUGFS_IF_SPD BIT(2) +#define V4L2_DEBUGFS_IF_HDMI BIT(3) + +typedef ssize_t (*v4l2_debugfs_if_read_t)(u32 type, void *priv, + struct file *filp, char __user *ubuf, + size_t count, loff_t *ppos); + +struct v4l2_debugfs_if { + struct dentry *if_dir; + void *priv; + + v4l2_debugfs_if_read_t if_read; +}; + +#ifdef CONFIG_DEBUG_FS +struct v4l2_debugfs_if *v4l2_debugfs_if_alloc(struct dentry *root, u32 if_types, + void *priv, + v4l2_debugfs_if_read_t if_read); +void v4l2_debugfs_if_free(struct v4l2_debugfs_if *infoframes); +#else +static inline +struct v4l2_debugfs_if *v4l2_debugfs_if_alloc(struct dentry *root, u32 if_types, + void *priv, + v4l2_debugfs_if_read_t if_read) +{ + return NULL; +} + +static inline void v4l2_debugfs_if_free(struct v4l2_debugfs_if *infoframes) +{ +} +#endif + #endif -- cgit v1.2.3 From 8fa714ca334e0880665a14fed13b16e3e01a67b2 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 10 Oct 2024 21:15:35 +0300 Subject: iio: Convert unsigned to unsigned int Simple type conversion with no functional change implied. Signed-off-by: Andy Shevchenko Link: https://patch.msgid.link/20241010181535.3083262-1-andriy.shevchenko@linux.intel.com Signed-off-by: Jonathan Cameron --- include/linux/iio/iio-opaque.h | 2 +- include/linux/iio/iio.h | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/iio/iio-opaque.h b/include/linux/iio/iio-opaque.h index 5aec3945555b..a89e7e43e441 100644 --- a/include/linux/iio/iio-opaque.h +++ b/include/linux/iio/iio-opaque.h @@ -70,7 +70,7 @@ struct iio_dev_opaque { #if defined(CONFIG_DEBUG_FS) struct dentry *debugfs_dentry; - unsigned cached_reg_addr; + unsigned int cached_reg_addr; char read_buf[20]; unsigned int read_buf_len; #endif diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 18779b631e90..3a9b57187a95 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -282,11 +282,11 @@ struct iio_chan_spec { const struct iio_chan_spec_ext_info *ext_info; const char *extend_name; const char *datasheet_name; - unsigned modified:1; - unsigned indexed:1; - unsigned output:1; - unsigned differential:1; - unsigned has_ext_scan_type:1; + unsigned int modified:1; + unsigned int indexed:1; + unsigned int output:1; + unsigned int differential:1; + unsigned int has_ext_scan_type:1; }; @@ -541,13 +541,13 @@ struct iio_info { int (*update_scan_mode)(struct iio_dev *indio_dev, const unsigned long *scan_mask); int (*debugfs_reg_access)(struct iio_dev *indio_dev, - unsigned reg, unsigned writeval, - unsigned *readval); + unsigned int reg, unsigned int writeval, + unsigned int *readval); int (*fwnode_xlate)(struct iio_dev *indio_dev, const struct fwnode_reference_args *iiospec); - int (*hwfifo_set_watermark)(struct iio_dev *indio_dev, unsigned val); + int (*hwfifo_set_watermark)(struct iio_dev *indio_dev, unsigned int val); int (*hwfifo_flush_to_buffer)(struct iio_dev *indio_dev, - unsigned count); + unsigned int count); }; /** @@ -609,7 +609,7 @@ struct iio_dev { int scan_bytes; const unsigned long *available_scan_masks; - unsigned __private masklength; + unsigned int __private masklength; const unsigned long *active_scan_mask; bool scan_timestamp; struct iio_trigger *trig; -- cgit v1.2.3 From 06f5531958dd5decaabb21c6fa1da3dcaf8dfc24 Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Mon, 26 Aug 2024 17:24:09 +0000 Subject: media: videodev2: Add flag to unconditionally enumerate pixel formats When the index is ORed with V4L2_FMTDESC_FLAG_ENUM_ALL the driver clears the flag and enumerate all the possible formats, ignoring any limitations from the current configuration. Drivers which do not support this flag yet always return an EINVAL. Signed-off-by: Benjamin Gaignard Signed-off-by: Hans Verkuil [hverkuil: improved doc when the new flag is not supported by the driver] --- .../userspace-api/media/v4l/vidioc-enum-fmt.rst | 18 +++++++++++++++++- .../userspace-api/media/videodev2.h.rst.exceptions | 1 + include/uapi/linux/videodev2.h | 3 +++ 3 files changed, 21 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/Documentation/userspace-api/media/v4l/vidioc-enum-fmt.rst b/Documentation/userspace-api/media/v4l/vidioc-enum-fmt.rst index 3adb3d205531..0f69aa04607f 100644 --- a/Documentation/userspace-api/media/v4l/vidioc-enum-fmt.rst +++ b/Documentation/userspace-api/media/v4l/vidioc-enum-fmt.rst @@ -85,7 +85,17 @@ the ``mbus_code`` field is handled differently: * - __u32 - ``index`` - Number of the format in the enumeration, set by the application. - This is in no way related to the ``pixelformat`` field. + This is in no way related to the ``pixelformat`` field. + When the index is ORed with ``V4L2_FMTDESC_FLAG_ENUM_ALL`` the + driver clears the flag and enumerates all the possible formats, + ignoring any limitations from the current configuration. Drivers + which do not support this flag always return an ``EINVAL`` + error code without clearing this flag. + Formats enumerated when using ``V4L2_FMTDESC_FLAG_ENUM_ALL`` flag + shouldn't be used when calling :c:func:`VIDIOC_ENUM_FRAMESIZES` + or :c:func:`VIDIOC_ENUM_FRAMEINTERVALS`. + ``V4L2_FMTDESC_FLAG_ENUM_ALL`` should only be used by drivers that + can return different format list depending on this flag. * - __u32 - ``type`` - Type of the data stream, set by the application. Only these types @@ -234,6 +244,12 @@ the ``mbus_code`` field is handled differently: valid. The buffer consists of ``height`` lines, each having ``width`` Data Units of data and the offset (in bytes) between the beginning of each two consecutive lines is ``bytesperline``. + * - ``V4L2_FMTDESC_FLAG_ENUM_ALL`` + - 0x80000000 + - When the applications ORs ``index`` with ``V4L2_FMTDESC_FLAG_ENUM_ALL`` flag + the driver enumerates all the possible pixel formats without taking care + of any already set configuration. Drivers which do not support this flag, + always return ``EINVAL`` without clearing this flag. Return Value ============ diff --git a/Documentation/userspace-api/media/videodev2.h.rst.exceptions b/Documentation/userspace-api/media/videodev2.h.rst.exceptions index d67fd4038d22..429b5cdf05c3 100644 --- a/Documentation/userspace-api/media/videodev2.h.rst.exceptions +++ b/Documentation/userspace-api/media/videodev2.h.rst.exceptions @@ -217,6 +217,7 @@ replace define V4L2_FMT_FLAG_CSC_YCBCR_ENC fmtdesc-flags replace define V4L2_FMT_FLAG_CSC_HSV_ENC fmtdesc-flags replace define V4L2_FMT_FLAG_CSC_QUANTIZATION fmtdesc-flags replace define V4L2_FMT_FLAG_META_LINE_BASED fmtdesc-flags +replace define V4L2_FMTDESC_FLAG_ENUM_ALL fmtdesc-flags # V4L2 timecode types replace define V4L2_TC_TYPE_24FPS timecode-type diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 21a8aa575ea3..ded023edac70 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -907,6 +907,9 @@ struct v4l2_fmtdesc { #define V4L2_FMT_FLAG_CSC_QUANTIZATION 0x0100 #define V4L2_FMT_FLAG_META_LINE_BASED 0x0200 +/* Format description flag, to be ORed with the index */ +#define V4L2_FMTDESC_FLAG_ENUM_ALL 0x80000000 + /* Frame Size and frame rate enumeration */ /* * F R A M E S I Z E E N U M E R A T I O N -- cgit v1.2.3 From 454bbde8f0d465e93e5a3a4003ac6c7e62fa4473 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Wed, 9 Oct 2024 10:28:19 +0800 Subject: net: skb: add pskb_network_may_pull_reason() helper Introduce the function pskb_network_may_pull_reason() and make pskb_network_may_pull() a simple inline call to it. The drop reasons of it just come from pskb_may_pull_reason. Signed-off-by: Menglong Dong Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/skbuff.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 39f1d16f3628..48f1e0fa2a13 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3130,9 +3130,15 @@ static inline int skb_inner_network_offset(const struct sk_buff *skb) return skb_inner_network_header(skb) - skb->data; } +static inline enum skb_drop_reason +pskb_network_may_pull_reason(struct sk_buff *skb, unsigned int len) +{ + return pskb_may_pull_reason(skb, skb_network_offset(skb) + len); +} + static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len) { - return pskb_may_pull(skb, skb_network_offset(skb) + len); + return pskb_network_may_pull_reason(skb, len) == SKB_NOT_DROPPED_YET; } /* -- cgit v1.2.3 From 7f20dbd7de7b9b2804e6bf54b0c22f2bc447cd64 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Wed, 9 Oct 2024 10:28:20 +0800 Subject: net: tunnel: add pskb_inet_may_pull_reason() helper Introduce the function pskb_inet_may_pull_reason() and make pskb_inet_may_pull a simple inline call to it. The drop reasons of it just come from pskb_may_pull_reason(). Signed-off-by: Menglong Dong Reviewed-by: Simon Horman Reviewed-by: Kalesh AP Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 6194fbb564c6..7fc2f7bf837a 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -439,7 +439,8 @@ int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *op, int ip_tunnel_encap_setup(struct ip_tunnel *t, struct ip_tunnel_encap *ipencap); -static inline bool pskb_inet_may_pull(struct sk_buff *skb) +static inline enum skb_drop_reason +pskb_inet_may_pull_reason(struct sk_buff *skb) { int nhlen; @@ -456,7 +457,12 @@ static inline bool pskb_inet_may_pull(struct sk_buff *skb) nhlen = 0; } - return pskb_network_may_pull(skb, nhlen); + return pskb_network_may_pull_reason(skb, nhlen); +} + +static inline bool pskb_inet_may_pull(struct sk_buff *skb) +{ + return pskb_inet_may_pull_reason(skb) == SKB_NOT_DROPPED_YET; } /* Variant of pskb_inet_may_pull(). -- cgit v1.2.3 From 9990ddf47d4168088e2246c3d418bf526e40830d Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Wed, 9 Oct 2024 10:28:21 +0800 Subject: net: tunnel: make skb_vlan_inet_prepare() return drop reasons Make skb_vlan_inet_prepare return the skb drop reasons, which is just what pskb_may_pull_reason() returns. Meanwhile, adjust all the call of it. Signed-off-by: Menglong Dong Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/bareudp.c | 4 ++-- drivers/net/geneve.c | 4 ++-- drivers/net/vxlan/vxlan_core.c | 2 +- include/net/ip_tunnels.h | 13 ++++++++----- 4 files changed, 13 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index e057526448d7..fa2dd76ba3d9 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -317,7 +317,7 @@ static int bareudp_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be32 saddr; int err; - if (!skb_vlan_inet_prepare(skb, skb->protocol != htons(ETH_P_TEB))) + if (skb_vlan_inet_prepare(skb, skb->protocol != htons(ETH_P_TEB))) return -EINVAL; if (!sock) @@ -387,7 +387,7 @@ static int bareudp6_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 sport; int err; - if (!skb_vlan_inet_prepare(skb, skb->protocol != htons(ETH_P_TEB))) + if (skb_vlan_inet_prepare(skb, skb->protocol != htons(ETH_P_TEB))) return -EINVAL; if (!sock) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 7f611c74eb62..2f29b1386b1c 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -827,7 +827,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 sport; int err; - if (!skb_vlan_inet_prepare(skb, inner_proto_inherit)) + if (skb_vlan_inet_prepare(skb, inner_proto_inherit)) return -EINVAL; if (!gs4) @@ -937,7 +937,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 sport; int err; - if (!skb_vlan_inet_prepare(skb, inner_proto_inherit)) + if (skb_vlan_inet_prepare(skb, inner_proto_inherit)) return -EINVAL; if (!gs6) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 6e9a3795846a..916c3880832e 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -2356,7 +2356,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, __be32 vni = 0; no_eth_encap = flags & VXLAN_F_GPE && skb->protocol != htons(ETH_P_TEB); - if (!skb_vlan_inet_prepare(skb, no_eth_encap)) + if (skb_vlan_inet_prepare(skb, no_eth_encap)) goto drop; old_iph = ip_hdr(skb); diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 7fc2f7bf837a..4e4f9e24c9c1 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -467,11 +467,12 @@ static inline bool pskb_inet_may_pull(struct sk_buff *skb) /* Variant of pskb_inet_may_pull(). */ -static inline bool skb_vlan_inet_prepare(struct sk_buff *skb, - bool inner_proto_inherit) +static inline enum skb_drop_reason +skb_vlan_inet_prepare(struct sk_buff *skb, bool inner_proto_inherit) { int nhlen = 0, maclen = inner_proto_inherit ? 0 : ETH_HLEN; __be16 type = skb->protocol; + enum skb_drop_reason reason; /* Essentially this is skb_protocol(skb, true) * And we get MAC len. @@ -492,11 +493,13 @@ static inline bool skb_vlan_inet_prepare(struct sk_buff *skb, /* For ETH_P_IPV6/ETH_P_IP we make sure to pull * a base network header in skb->head. */ - if (!pskb_may_pull(skb, maclen + nhlen)) - return false; + reason = pskb_may_pull_reason(skb, maclen + nhlen); + if (reason) + return reason; skb_set_network_header(skb, maclen); - return true; + + return SKB_NOT_DROPPED_YET; } static inline int ip_encap_hlen(struct ip_tunnel_encap *e) -- cgit v1.2.3 From 4c06d9daf8e6215447ca8a2ddd59fa09862c9bae Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Wed, 9 Oct 2024 10:28:22 +0800 Subject: net: vxlan: add skb drop reasons to vxlan_rcv() Introduce skb drop reasons to the function vxlan_rcv(). Following new drop reasons are added: SKB_DROP_REASON_VXLAN_INVALID_HDR SKB_DROP_REASON_VXLAN_VNI_NOT_FOUND SKB_DROP_REASON_IP_TUNNEL_ECN Signed-off-by: Menglong Dong Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/vxlan/vxlan_core.c | 26 ++++++++++++++++++++------ include/net/dropreason-core.h | 16 ++++++++++++++++ 2 files changed, 36 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 916c3880832e..40111d05d18e 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -1671,13 +1671,15 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) struct vxlan_metadata _md; struct vxlan_metadata *md = &_md; __be16 protocol = htons(ETH_P_TEB); + enum skb_drop_reason reason; bool raw_proto = false; void *oiph; __be32 vni = 0; int nh; /* Need UDP and VXLAN header to be present */ - if (!pskb_may_pull(skb, VXLAN_HLEN)) + reason = pskb_may_pull_reason(skb, VXLAN_HLEN); + if (reason) goto drop; unparsed = *vxlan_hdr(skb); @@ -1686,6 +1688,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n", ntohl(vxlan_hdr(skb)->vx_flags), ntohl(vxlan_hdr(skb)->vx_vni)); + reason = SKB_DROP_REASON_VXLAN_INVALID_HDR; /* Return non vxlan pkt */ goto drop; } @@ -1699,8 +1702,10 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) vni = vxlan_vni(vxlan_hdr(skb)->vx_vni); vxlan = vxlan_vs_find_vni(vs, skb->dev->ifindex, vni, &vninode); - if (!vxlan) + if (!vxlan) { + reason = SKB_DROP_REASON_VXLAN_VNI_NOT_FOUND; goto drop; + } /* For backwards compatibility, only allow reserved fields to be * used by VXLAN extensions if explicitly requested. @@ -1713,8 +1718,10 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) } if (__iptunnel_pull_header(skb, VXLAN_HLEN, protocol, raw_proto, - !net_eq(vxlan->net, dev_net(vxlan->dev)))) + !net_eq(vxlan->net, dev_net(vxlan->dev)))) { + reason = SKB_DROP_REASON_NOMEM; goto drop; + } if (vs->flags & VXLAN_F_REMCSUM_RX) if (unlikely(!vxlan_remcsum(&unparsed, skb, vs->flags))) @@ -1728,8 +1735,10 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) tun_dst = udp_tun_rx_dst(skb, vxlan_get_sk_family(vs), flags, key32_to_tunnel_id(vni), sizeof(*md)); - if (!tun_dst) + if (!tun_dst) { + reason = SKB_DROP_REASON_NOMEM; goto drop; + } md = ip_tunnel_info_opts(&tun_dst->u.tun_info); @@ -1753,6 +1762,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) * is more robust and provides a little more security in * adding extensions to VXLAN. */ + reason = SKB_DROP_REASON_VXLAN_INVALID_HDR; goto drop; } @@ -1773,7 +1783,8 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) skb_reset_network_header(skb); - if (!pskb_inet_may_pull(skb)) { + reason = pskb_inet_may_pull_reason(skb); + if (reason) { DEV_STATS_INC(vxlan->dev, rx_length_errors); DEV_STATS_INC(vxlan->dev, rx_errors); vxlan_vnifilter_count(vxlan, vni, vninode, @@ -1785,6 +1796,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) oiph = skb->head + nh; if (!vxlan_ecn_decapsulate(vs, oiph, skb)) { + reason = SKB_DROP_REASON_IP_TUNNEL_ECN; DEV_STATS_INC(vxlan->dev, rx_frame_errors); DEV_STATS_INC(vxlan->dev, rx_errors); vxlan_vnifilter_count(vxlan, vni, vninode, @@ -1799,6 +1811,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) dev_core_stats_rx_dropped_inc(vxlan->dev); vxlan_vnifilter_count(vxlan, vni, vninode, VXLAN_VNI_STATS_RX_DROPS, 0); + reason = SKB_DROP_REASON_DEV_READY; goto drop; } @@ -1811,8 +1824,9 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) return 0; drop: + reason = reason ?: SKB_DROP_REASON_NOT_SPECIFIED; /* Consume bad packet */ - kfree_skb(skb); + kfree_skb_reason(skb, reason); return 0; } diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index 4748680e8c88..98259d2b3e92 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -92,6 +92,9 @@ FN(PACKET_SOCK_ERROR) \ FN(TC_CHAIN_NOTFOUND) \ FN(TC_RECLASSIFY_LOOP) \ + FN(VXLAN_INVALID_HDR) \ + FN(VXLAN_VNI_NOT_FOUND) \ + FN(IP_TUNNEL_ECN) \ FNe(MAX) /** @@ -418,6 +421,19 @@ enum skb_drop_reason { * iterations. */ SKB_DROP_REASON_TC_RECLASSIFY_LOOP, + /** + * @SKB_DROP_REASON_VXLAN_INVALID_HDR: VXLAN header is invalid. E.g.: + * 1) reserved fields are not zero + * 2) "I" flag is not set + */ + SKB_DROP_REASON_VXLAN_INVALID_HDR, + /** @SKB_DROP_REASON_VXLAN_VNI_NOT_FOUND: no VXLAN device found for VNI */ + SKB_DROP_REASON_VXLAN_VNI_NOT_FOUND, + /** + * @SKB_DROP_REASON_IP_TUNNEL_ECN: skb is dropped according to + * RFC 6040 4.2, see __INET_ECN_decapsulate() for detail. + */ + SKB_DROP_REASON_IP_TUNNEL_ECN, /** * @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which * shouldn't be used as a real 'reason' - only for tracing code gen -- cgit v1.2.3 From 289fd4e75219a96f77c5d679166035cd5118d139 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Wed, 9 Oct 2024 10:28:24 +0800 Subject: net: vxlan: make vxlan_snoop() return drop reasons Change the return type of vxlan_snoop() from bool to enum skb_drop_reason. In this commit, two drop reasons are introduced: SKB_DROP_REASON_MAC_INVALID_SOURCE SKB_DROP_REASON_VXLAN_ENTRY_EXISTS Signed-off-by: Menglong Dong Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/vxlan/vxlan_core.c | 17 +++++++++-------- include/net/dropreason-core.h | 9 +++++++++ 2 files changed, 18 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index e3ac46d6e148..947c29d6605c 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -1437,9 +1437,10 @@ errout: * and Tunnel endpoint. * Return true if packet is bogus and should be dropped. */ -static bool vxlan_snoop(struct net_device *dev, - union vxlan_addr *src_ip, const u8 *src_mac, - u32 src_ifindex, __be32 vni) +static enum skb_drop_reason vxlan_snoop(struct net_device *dev, + union vxlan_addr *src_ip, + const u8 *src_mac, u32 src_ifindex, + __be32 vni) { struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_fdb *f; @@ -1447,7 +1448,7 @@ static bool vxlan_snoop(struct net_device *dev, /* Ignore packets from invalid src-address */ if (!is_valid_ether_addr(src_mac)) - return true; + return SKB_DROP_REASON_MAC_INVALID_SOURCE; #if IS_ENABLED(CONFIG_IPV6) if (src_ip->sa.sa_family == AF_INET6 && @@ -1461,15 +1462,15 @@ static bool vxlan_snoop(struct net_device *dev, if (likely(vxlan_addr_equal(&rdst->remote_ip, src_ip) && rdst->remote_ifindex == ifindex)) - return false; + return SKB_NOT_DROPPED_YET; /* Don't migrate static entries, drop packets */ if (f->state & (NUD_PERMANENT | NUD_NOARP)) - return true; + return SKB_DROP_REASON_VXLAN_ENTRY_EXISTS; /* Don't override an fdb with nexthop with a learnt entry */ if (rcu_access_pointer(f->nh)) - return true; + return SKB_DROP_REASON_VXLAN_ENTRY_EXISTS; if (net_ratelimit()) netdev_info(dev, @@ -1497,7 +1498,7 @@ static bool vxlan_snoop(struct net_device *dev, spin_unlock(&vxlan->hash_lock[hash_index]); } - return false; + return SKB_NOT_DROPPED_YET; } static bool __vxlan_sock_release_prep(struct vxlan_sock *vs) diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index 98259d2b3e92..1cb8d7c953be 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -94,6 +94,8 @@ FN(TC_RECLASSIFY_LOOP) \ FN(VXLAN_INVALID_HDR) \ FN(VXLAN_VNI_NOT_FOUND) \ + FN(MAC_INVALID_SOURCE) \ + FN(VXLAN_ENTRY_EXISTS) \ FN(IP_TUNNEL_ECN) \ FNe(MAX) @@ -429,6 +431,13 @@ enum skb_drop_reason { SKB_DROP_REASON_VXLAN_INVALID_HDR, /** @SKB_DROP_REASON_VXLAN_VNI_NOT_FOUND: no VXLAN device found for VNI */ SKB_DROP_REASON_VXLAN_VNI_NOT_FOUND, + /** @SKB_DROP_REASON_MAC_INVALID_SOURCE: source mac is invalid */ + SKB_DROP_REASON_MAC_INVALID_SOURCE, + /** + * @SKB_DROP_REASON_VXLAN_ENTRY_EXISTS: trying to migrate a static + * entry or an entry pointing to a nexthop. + */ + SKB_DROP_REASON_VXLAN_ENTRY_EXISTS, /** * @SKB_DROP_REASON_IP_TUNNEL_ECN: skb is dropped according to * RFC 6040 4.2, see __INET_ECN_decapsulate() for detail. -- cgit v1.2.3 From d209706f562ee4fa81bdf24cf6b679c3222aa06c Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Wed, 9 Oct 2024 10:28:25 +0800 Subject: net: vxlan: make vxlan_set_mac() return drop reasons Change the return type of vxlan_set_mac() from bool to enum skb_drop_reason. In this commit, the drop reason "SKB_DROP_REASON_LOCAL_MAC" is introduced for the case that the source mac of the packet is a local mac. Signed-off-by: Menglong Dong Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/vxlan/vxlan_core.c | 19 ++++++++++--------- include/net/dropreason-core.h | 6 ++++++ 2 files changed, 16 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 947c29d6605c..b4f9f23797de 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -1609,9 +1609,9 @@ out: unparsed->vx_flags &= ~VXLAN_GBP_USED_BITS; } -static bool vxlan_set_mac(struct vxlan_dev *vxlan, - struct vxlan_sock *vs, - struct sk_buff *skb, __be32 vni) +static enum skb_drop_reason vxlan_set_mac(struct vxlan_dev *vxlan, + struct vxlan_sock *vs, + struct sk_buff *skb, __be32 vni) { union vxlan_addr saddr; u32 ifindex = skb->dev->ifindex; @@ -1622,7 +1622,7 @@ static bool vxlan_set_mac(struct vxlan_dev *vxlan, /* Ignore packet loops (and multicast echo) */ if (ether_addr_equal(eth_hdr(skb)->h_source, vxlan->dev->dev_addr)) - return false; + return SKB_DROP_REASON_LOCAL_MAC; /* Get address from the outer IP header */ if (vxlan_get_sk_family(vs) == AF_INET) { @@ -1635,11 +1635,11 @@ static bool vxlan_set_mac(struct vxlan_dev *vxlan, #endif } - if ((vxlan->cfg.flags & VXLAN_F_LEARN) && - vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source, ifindex, vni)) - return false; + if (!(vxlan->cfg.flags & VXLAN_F_LEARN)) + return SKB_NOT_DROPPED_YET; - return true; + return vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source, + ifindex, vni); } static bool vxlan_ecn_decapsulate(struct vxlan_sock *vs, void *oiph, @@ -1774,7 +1774,8 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) } if (!raw_proto) { - if (!vxlan_set_mac(vxlan, vs, skb, vni)) + reason = vxlan_set_mac(vxlan, vs, skb, vni); + if (reason) goto drop; } else { skb_reset_mac_header(skb); diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index 1cb8d7c953be..fbf92d442c1b 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -97,6 +97,7 @@ FN(MAC_INVALID_SOURCE) \ FN(VXLAN_ENTRY_EXISTS) \ FN(IP_TUNNEL_ECN) \ + FN(LOCAL_MAC) \ FNe(MAX) /** @@ -443,6 +444,11 @@ enum skb_drop_reason { * RFC 6040 4.2, see __INET_ECN_decapsulate() for detail. */ SKB_DROP_REASON_IP_TUNNEL_ECN, + /** + * @SKB_DROP_REASON_LOCAL_MAC: the source MAC address is equal to + * the MAC address of the local netdev. + */ + SKB_DROP_REASON_LOCAL_MAC, /** * @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which * shouldn't be used as a real 'reason' - only for tracing code gen -- cgit v1.2.3 From b71a576e452b800efeac49ecca116d954601d911 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Wed, 9 Oct 2024 10:28:26 +0800 Subject: net: vxlan: use kfree_skb_reason() in vxlan_xmit() Replace kfree_skb() with kfree_skb_reason() in vxlan_xmit(). Following new skb drop reasons are introduced for vxlan: /* no remote found for xmit */ SKB_DROP_REASON_VXLAN_NO_REMOTE /* packet without necessary metadata reached a device which is * in "external" mode */ SKB_DROP_REASON_TUNNEL_TXINFO Signed-off-by: Menglong Dong Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/vxlan/vxlan_core.c | 6 +++--- include/net/dropreason-core.h | 9 +++++++++ 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index b4f9f23797de..649b4ea5c362 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -2730,7 +2730,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) if (info && info->mode & IP_TUNNEL_INFO_TX) vxlan_xmit_one(skb, dev, vni, NULL, false); else - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_TUNNEL_TXINFO); return NETDEV_TX_OK; } } @@ -2793,7 +2793,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) dev_core_stats_tx_dropped_inc(dev); vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX_DROPS, 0); - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_VXLAN_NO_REMOTE); return NETDEV_TX_OK; } } @@ -2816,7 +2816,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) if (fdst) vxlan_xmit_one(skb, dev, vni, fdst, did_rsc); else - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_VXLAN_NO_REMOTE); } return NETDEV_TX_OK; diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index fbf92d442c1b..d59bb96c5a02 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -96,7 +96,9 @@ FN(VXLAN_VNI_NOT_FOUND) \ FN(MAC_INVALID_SOURCE) \ FN(VXLAN_ENTRY_EXISTS) \ + FN(VXLAN_NO_REMOTE) \ FN(IP_TUNNEL_ECN) \ + FN(TUNNEL_TXINFO) \ FN(LOCAL_MAC) \ FNe(MAX) @@ -439,11 +441,18 @@ enum skb_drop_reason { * entry or an entry pointing to a nexthop. */ SKB_DROP_REASON_VXLAN_ENTRY_EXISTS, + /** @SKB_DROP_REASON_VXLAN_NO_REMOTE: no remote found for xmit */ + SKB_DROP_REASON_VXLAN_NO_REMOTE, /** * @SKB_DROP_REASON_IP_TUNNEL_ECN: skb is dropped according to * RFC 6040 4.2, see __INET_ECN_decapsulate() for detail. */ SKB_DROP_REASON_IP_TUNNEL_ECN, + /** + * @SKB_DROP_REASON_TUNNEL_TXINFO: packet without necessary metadata + * reached a device which is in "external" mode. + */ + SKB_DROP_REASON_TUNNEL_TXINFO, /** * @SKB_DROP_REASON_LOCAL_MAC: the source MAC address is equal to * the MAC address of the local netdev. -- cgit v1.2.3 From 7948483001039c00dcff4b94c181d7e14693a38c Mon Sep 17 00:00:00 2001 From: Gerhard Engleder Date: Fri, 11 Oct 2024 21:12:52 +0200 Subject: misc: keba: Add SPI controller device Add support for the SPI controller auxiliary device. This enables access to the SPI flash of the FPGA and some other SPI devices. The actual list of SPI devices is detected by reading some bits out of the previously registered I2C EEPROM. Signed-off-by: Gerhard Engleder Link: https://lore.kernel.org/r/20241011191257.19702-4-gerhard@engleder-embedded.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/keba/cp500.c | 219 ++++++++++++++++++++++++++++++++++++++++++---- include/linux/misc/keba.h | 15 ++++ 2 files changed, 219 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/drivers/misc/keba/cp500.c b/drivers/misc/keba/cp500.c index 1eee130c3a7f..7cebf2929390 100644 --- a/drivers/misc/keba/cp500.c +++ b/drivers/misc/keba/cp500.c @@ -12,7 +12,11 @@ #include #include #include +#include +#include #include +#include +#include #define CP500 "cp500" @@ -43,6 +47,16 @@ /* EEPROM */ #define CP500_HW_CPU_EEPROM_NAME "cp500_cpu_eeprom" +#define CP500_EEPROM_DA_OFFSET 0x016F +#define CP500_EEPROM_DA_ESC_TYPE_MASK 0x01 +#define CP500_EEPROM_ESC_LAN9252 0x00 +#define CP500_EEPROM_ESC_ET1100 0x01 + +/* SPI flash running at full speed */ +#define CP500_FLASH_HZ (33 * 1000 * 1000) + +/* LAN9252 */ +#define CP500_LAN9252_HZ (10 * 1000 * 1000) #define CP500_IS_CP035(dev) ((dev)->pci_dev->device == PCI_DEVICE_ID_KEBA_CP035) #define CP500_IS_CP505(dev) ((dev)->pci_dev->device == PCI_DEVICE_ID_KEBA_CP505) @@ -55,25 +69,29 @@ struct cp500_dev_info { struct cp500_devs { struct cp500_dev_info startup; + struct cp500_dev_info spi; struct cp500_dev_info i2c; }; /* list of devices within FPGA of CP035 family (CP035, CP056, CP057) */ static struct cp500_devs cp035_devices = { - .startup = { 0x0000, SZ_4K }, - .i2c = { 0x4000, SZ_4K }, + .startup = { 0x0000, SZ_4K }, + .spi = { 0x1000, SZ_4K }, + .i2c = { 0x4000, SZ_4K }, }; /* list of devices within FPGA of CP505 family (CP503, CP505, CP507) */ static struct cp500_devs cp505_devices = { - .startup = { 0x0000, SZ_4K }, - .i2c = { 0x5000, SZ_4K }, + .startup = { 0x0000, SZ_4K }, + .spi = { 0x4000, SZ_4K }, + .i2c = { 0x5000, SZ_4K }, }; /* list of devices within FPGA of CP520 family (CP520, CP530) */ static struct cp500_devs cp520_devices = { - .startup = { 0x0000, SZ_4K }, - .i2c = { 0x5000, SZ_4K }, + .startup = { 0x0000, SZ_4K }, + .spi = { 0x4000, SZ_4K }, + .i2c = { 0x5000, SZ_4K }, }; struct cp500 { @@ -85,9 +103,12 @@ struct cp500 { int minor; int build; } version; + struct notifier_block nvmem_notifier; + atomic_t nvmem_notified; /* system FPGA BAR */ resource_size_t sys_hwbase; + struct keba_spi_auxdev *spi; struct keba_i2c_auxdev *i2c; /* ECM EtherCAT BAR */ @@ -97,6 +118,7 @@ struct cp500 { }; /* I2C devices */ +#define CP500_EEPROM_ADDR 0x50 static struct i2c_board_info cp500_i2c_info[] = { { /* temperature sensor */ I2C_BOARD_INFO("emc1403", 0x4c), @@ -107,30 +129,67 @@ static struct i2c_board_info cp500_i2c_info[] = { * CP505 family: bridge board * CP520 family: carrier board */ - I2C_BOARD_INFO("24c32", 0x50), + I2C_BOARD_INFO("24c32", CP500_EEPROM_ADDR), .dev_name = CP500_HW_CPU_EEPROM_NAME, }, { /* interface board EEPROM */ - I2C_BOARD_INFO("24c32", 0x51), + I2C_BOARD_INFO("24c32", CP500_EEPROM_ADDR + 1), }, { /* * EEPROM (optional) * CP505 family: CPU board * CP520 family: MMI board */ - I2C_BOARD_INFO("24c32", 0x52), + I2C_BOARD_INFO("24c32", CP500_EEPROM_ADDR + 2), }, { /* extension module 0 EEPROM (optional) */ - I2C_BOARD_INFO("24c32", 0x53), + I2C_BOARD_INFO("24c32", CP500_EEPROM_ADDR + 3), }, { /* extension module 1 EEPROM (optional) */ - I2C_BOARD_INFO("24c32", 0x54), + I2C_BOARD_INFO("24c32", CP500_EEPROM_ADDR + 4), }, { /* extension module 2 EEPROM (optional) */ - I2C_BOARD_INFO("24c32", 0x55), + I2C_BOARD_INFO("24c32", CP500_EEPROM_ADDR + 5), }, { /* extension module 3 EEPROM (optional) */ - I2C_BOARD_INFO("24c32", 0x56), + I2C_BOARD_INFO("24c32", CP500_EEPROM_ADDR + 6), + } +}; + +/* SPI devices */ +static struct mtd_partition cp500_partitions[] = { + { + .name = "system-flash-parts", + .size = MTDPART_SIZ_FULL, + .offset = 0, + .mask_flags = 0 + } +}; +static const struct flash_platform_data cp500_w25q32 = { + .type = "w25q32", + .name = "system-flash", + .parts = cp500_partitions, + .nr_parts = ARRAY_SIZE(cp500_partitions), +}; +static const struct flash_platform_data cp500_m25p16 = { + .type = "m25p16", + .name = "system-flash", + .parts = cp500_partitions, + .nr_parts = ARRAY_SIZE(cp500_partitions), +}; +static struct spi_board_info cp500_spi_info[] = { + { /* system FPGA configuration bitstream flash */ + .modalias = "m25p80", + .platform_data = &cp500_m25p16, + .max_speed_hz = CP500_FLASH_HZ, + .chip_select = 0, + .mode = SPI_MODE_3, + }, { /* LAN9252 EtherCAT slave controller */ + .modalias = "lan9252", + .platform_data = NULL, + .max_speed_hz = CP500_LAN9252_HZ, + .chip_select = 1, + .mode = SPI_MODE_3, } }; @@ -269,6 +328,125 @@ static int cp500_register_i2c(struct cp500 *cp500) return 0; } +static void cp500_spi_release(struct device *dev) +{ + struct keba_spi_auxdev *spi = + container_of(dev, struct keba_spi_auxdev, auxdev.dev); + + kfree(spi); +} + +static int cp500_register_spi(struct cp500 *cp500, u8 esc_type) +{ + int info_size; + int ret; + + cp500->spi = kzalloc(sizeof(*cp500->spi), GFP_KERNEL); + if (!cp500->spi) + return -ENOMEM; + + if (CP500_IS_CP035(cp500)) + cp500_spi_info[0].platform_data = &cp500_w25q32; + if (esc_type == CP500_EEPROM_ESC_LAN9252) + info_size = ARRAY_SIZE(cp500_spi_info); + else + info_size = ARRAY_SIZE(cp500_spi_info) - 1; + + cp500->spi->auxdev.name = "spi"; + cp500->spi->auxdev.id = 0; + cp500->spi->auxdev.dev.release = cp500_spi_release; + cp500->spi->auxdev.dev.parent = &cp500->pci_dev->dev; + cp500->spi->io = (struct resource) { + /* SPI register area */ + .start = (resource_size_t) cp500->sys_hwbase + + cp500->devs->spi.offset, + .end = (resource_size_t) cp500->sys_hwbase + + cp500->devs->spi.offset + + cp500->devs->spi.size - 1, + .flags = IORESOURCE_MEM, + }; + cp500->spi->info_size = info_size; + cp500->spi->info = cp500_spi_info; + + ret = auxiliary_device_init(&cp500->spi->auxdev); + if (ret) { + kfree(cp500->spi); + cp500->spi = NULL; + + return ret; + } + ret = __auxiliary_device_add(&cp500->spi->auxdev, "keba"); + if (ret) { + auxiliary_device_uninit(&cp500->spi->auxdev); + cp500->spi = NULL; + + return ret; + } + + return 0; +} + +static int cp500_nvmem_match(struct device *dev, const void *data) +{ + const struct cp500 *cp500 = data; + struct i2c_client *client; + + /* match only CPU EEPROM below the cp500 device */ + dev = dev->parent; + client = i2c_verify_client(dev); + if (!client || client->addr != CP500_EEPROM_ADDR) + return 0; + while ((dev = dev->parent)) + if (dev == &cp500->pci_dev->dev) + return 1; + + return 0; +} + +static int cp500_nvmem(struct notifier_block *nb, unsigned long action, + void *data) +{ + struct nvmem_device *nvmem; + struct cp500 *cp500; + struct device *dev; + int notified; + u8 esc_type; + int ret; + + if (action != NVMEM_ADD) + return NOTIFY_DONE; + cp500 = container_of(nb, struct cp500, nvmem_notifier); + dev = &cp500->pci_dev->dev; + + /* process CPU EEPROM content only once */ + notified = atomic_read(&cp500->nvmem_notified); + if (notified) + return NOTIFY_DONE; + nvmem = nvmem_device_find(cp500, cp500_nvmem_match); + if (IS_ERR_OR_NULL(nvmem)) + return NOTIFY_DONE; + if (!atomic_try_cmpxchg_relaxed(&cp500->nvmem_notified, ¬ified, 1)) { + nvmem_device_put(nvmem); + + return NOTIFY_DONE; + } + + ret = nvmem_device_read(nvmem, CP500_EEPROM_DA_OFFSET, sizeof(esc_type), + (void *)&esc_type); + nvmem_device_put(nvmem); + if (ret != sizeof(esc_type)) { + dev_warn(dev, "Failed to read device assembly!\n"); + + return NOTIFY_DONE; + } + esc_type &= CP500_EEPROM_DA_ESC_TYPE_MASK; + + if (cp500_register_spi(cp500, esc_type)) + dev_warn(dev, "Failed to register SPI!\n"); + + return NOTIFY_OK; +} + static void cp500_register_auxiliary_devs(struct cp500 *cp500) { struct device *dev = &cp500->pci_dev->dev; @@ -285,7 +463,10 @@ static void cp500_unregister_dev(struct auxiliary_device *auxdev) static void cp500_unregister_auxiliary_devs(struct cp500 *cp500) { - + if (cp500->spi) { + cp500_unregister_dev(&cp500->spi->auxdev); + cp500->spi = NULL; + } if (cp500->i2c) { cp500_unregister_dev(&cp500->i2c->auxdev); cp500->i2c = NULL; @@ -396,15 +577,21 @@ static int cp500_probe(struct pci_dev *pci_dev, const struct pci_device_id *id) pci_set_drvdata(pci_dev, cp500); + cp500->nvmem_notifier.notifier_call = cp500_nvmem; + ret = nvmem_register_notifier(&cp500->nvmem_notifier); + if (ret != 0) + goto out_free_irq; ret = cp500_enable(cp500); if (ret != 0) - goto out_free_irq; + goto out_unregister_nvmem; cp500_register_auxiliary_devs(cp500); return 0; +out_unregister_nvmem: + nvmem_unregister_notifier(&cp500->nvmem_notifier); out_free_irq: pci_free_irq_vectors(pci_dev); out_disable: @@ -422,6 +609,8 @@ static void cp500_remove(struct pci_dev *pci_dev) cp500_disable(cp500); + nvmem_unregister_notifier(&cp500->nvmem_notifier); + pci_set_drvdata(pci_dev, 0); pci_free_irq_vectors(pci_dev); diff --git a/include/linux/misc/keba.h b/include/linux/misc/keba.h index 323b31a847c5..1bd5409c6f6f 100644 --- a/include/linux/misc/keba.h +++ b/include/linux/misc/keba.h @@ -7,6 +7,7 @@ #include struct i2c_board_info; +struct spi_board_info; /** * struct keba_i2c_auxdev - KEBA I2C auxiliary device @@ -22,4 +23,18 @@ struct keba_i2c_auxdev { struct i2c_board_info *info; }; +/** + * struct keba_spi_auxdev - KEBA SPI auxiliary device + * @auxdev: auxiliary device object + * @io: address range of SPI controller IO memory + * @info_size: number of SPI devices to be probed + * @info: SPI devices to be probed + */ +struct keba_spi_auxdev { + struct auxiliary_device auxdev; + struct resource io; + int info_size; + struct spi_board_info *info; +}; + #endif /* _LINUX_MISC_KEBA_H */ -- cgit v1.2.3 From f965d315bcbd65adfe5e3c161e46b5dc0a463f68 Mon Sep 17 00:00:00 2001 From: Gerhard Engleder Date: Fri, 11 Oct 2024 21:12:55 +0200 Subject: misc: keba: Add fan device Add support for the fan auxiliary device. This enables monitoring of the fan. Signed-off-by: Gerhard Engleder Link: https://lore.kernel.org/r/20241011191257.19702-7-gerhard@engleder-embedded.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/keba/cp500.c | 83 ++++++++++++++++++++++++++++++++++++++++++----- include/linux/misc/keba.h | 10 ++++++ 2 files changed, 84 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/misc/keba/cp500.c b/drivers/misc/keba/cp500.c index 3cf99eaf45c4..ae3ed1cece32 100644 --- a/drivers/misc/keba/cp500.c +++ b/drivers/misc/keba/cp500.c @@ -32,6 +32,7 @@ /* BAR 0 registers */ #define CP500_VERSION_REG 0x00 #define CP500_RECONFIG_REG 0x11 /* upper 8-bits of STARTUP register */ +#define CP500_PRESENT_REG 0x20 #define CP500_AXI_REG 0x40 /* Bits in BUILD_REG */ @@ -40,6 +41,9 @@ /* Bits in RECONFIG_REG */ #define CP500_RECFG_REQ 0x01 /* reconfigure FPGA on next reset */ +/* Bits in PRESENT_REG */ +#define CP500_PRESENT_FAN0 0x01 + /* MSIX */ #define CP500_AXI_MSIX 3 #define CP500_NUM_MSIX 8 @@ -77,27 +81,31 @@ struct cp500_devs { struct cp500_dev_info startup; struct cp500_dev_info spi; struct cp500_dev_info i2c; + struct cp500_dev_info fan; }; /* list of devices within FPGA of CP035 family (CP035, CP056, CP057) */ static struct cp500_devs cp035_devices = { - .startup = { 0x0000, SZ_4K }, - .spi = { 0x1000, SZ_4K }, - .i2c = { 0x4000, SZ_4K }, + .startup = { 0x0000, SZ_4K }, + .spi = { 0x1000, SZ_4K }, + .i2c = { 0x4000, SZ_4K }, + .fan = { 0x9000, SZ_4K }, }; /* list of devices within FPGA of CP505 family (CP503, CP505, CP507) */ static struct cp500_devs cp505_devices = { - .startup = { 0x0000, SZ_4K }, - .spi = { 0x4000, SZ_4K }, - .i2c = { 0x5000, SZ_4K }, + .startup = { 0x0000, SZ_4K }, + .spi = { 0x4000, SZ_4K }, + .i2c = { 0x5000, SZ_4K }, + .fan = { 0x9000, SZ_4K }, }; /* list of devices within FPGA of CP520 family (CP520, CP530) */ static struct cp500_devs cp520_devices = { - .startup = { 0x0000, SZ_4K }, - .spi = { 0x4000, SZ_4K }, - .i2c = { 0x5000, SZ_4K }, + .startup = { 0x0000, SZ_4K }, + .spi = { 0x4000, SZ_4K }, + .i2c = { 0x5000, SZ_4K }, + .fan = { 0x8000, SZ_4K }, }; struct cp500_nvmem { @@ -121,6 +129,7 @@ struct cp500 { resource_size_t sys_hwbase; struct keba_spi_auxdev *spi; struct keba_i2c_auxdev *i2c; + struct keba_fan_auxdev *fan; /* ECM EtherCAT BAR */ resource_size_t ecm_hwbase; @@ -400,6 +409,54 @@ static int cp500_register_spi(struct cp500 *cp500, u8 esc_type) return 0; } +static void cp500_fan_release(struct device *dev) +{ + struct keba_fan_auxdev *fan = + container_of(dev, struct keba_fan_auxdev, auxdev.dev); + + kfree(fan); +} + +static int cp500_register_fan(struct cp500 *cp500) +{ + int ret; + + cp500->fan = kzalloc(sizeof(*cp500->fan), GFP_KERNEL); + if (!cp500->fan) + return -ENOMEM; + + cp500->fan->auxdev.name = "fan"; + cp500->fan->auxdev.id = 0; + cp500->fan->auxdev.dev.release = cp500_fan_release; + cp500->fan->auxdev.dev.parent = &cp500->pci_dev->dev; + cp500->fan->io = (struct resource) { + /* fan register area */ + .start = (resource_size_t) cp500->sys_hwbase + + cp500->devs->fan.offset, + .end = (resource_size_t) cp500->sys_hwbase + + cp500->devs->fan.offset + + cp500->devs->fan.size - 1, + .flags = IORESOURCE_MEM, + }; + + ret = auxiliary_device_init(&cp500->fan->auxdev); + if (ret) { + kfree(cp500->fan); + cp500->fan = NULL; + + return ret; + } + ret = __auxiliary_device_add(&cp500->fan->auxdev, "keba"); + if (ret) { + auxiliary_device_uninit(&cp500->fan->auxdev); + cp500->fan = NULL; + + return ret; + } + + return 0; +} + static int cp500_nvmem_read(void *priv, unsigned int offset, void *val, size_t bytes) { @@ -549,9 +606,13 @@ static int cp500_nvmem(struct notifier_block *nb, unsigned long action, static void cp500_register_auxiliary_devs(struct cp500 *cp500) { struct device *dev = &cp500->pci_dev->dev; + u8 present = ioread8(cp500->system_startup_addr + CP500_PRESENT_REG); if (cp500_register_i2c(cp500)) dev_warn(dev, "Failed to register I2C!\n"); + if (present & CP500_PRESENT_FAN0) + if (cp500_register_fan(cp500)) + dev_warn(dev, "Failed to register fan!\n"); } static void cp500_unregister_dev(struct auxiliary_device *auxdev) @@ -570,6 +631,10 @@ static void cp500_unregister_auxiliary_devs(struct cp500 *cp500) cp500_unregister_dev(&cp500->i2c->auxdev); cp500->i2c = NULL; } + if (cp500->fan) { + cp500_unregister_dev(&cp500->fan->auxdev); + cp500->fan = NULL; + } } static irqreturn_t cp500_axi_handler(int irq, void *dev) diff --git a/include/linux/misc/keba.h b/include/linux/misc/keba.h index 1bd5409c6f6f..451777acc262 100644 --- a/include/linux/misc/keba.h +++ b/include/linux/misc/keba.h @@ -37,4 +37,14 @@ struct keba_spi_auxdev { struct spi_board_info *info; }; +/** + * struct keba_fan_auxdev - KEBA fan auxiliary device + * @auxdev: auxiliary device object + * @io: address range of fan controller IO memory + */ +struct keba_fan_auxdev { + struct auxiliary_device auxdev; + struct resource io; +}; + #endif /* _LINUX_MISC_KEBA_H */ -- cgit v1.2.3 From ca7b844b91920573835ad11daaa30630ce112fe1 Mon Sep 17 00:00:00 2001 From: Gerhard Engleder Date: Fri, 11 Oct 2024 21:12:56 +0200 Subject: misc: keba: Add battery device Add support for the battery auxiliary device. This enables monitoring of the battery. Signed-off-by: Gerhard Engleder Link: https://lore.kernel.org/r/20241011191257.19702-8-gerhard@engleder-embedded.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/keba/cp500.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/misc/keba.h | 10 ++++++++ 2 files changed, 69 insertions(+) (limited to 'include') diff --git a/drivers/misc/keba/cp500.c b/drivers/misc/keba/cp500.c index ae3ed1cece32..afd4d7c06cee 100644 --- a/drivers/misc/keba/cp500.c +++ b/drivers/misc/keba/cp500.c @@ -82,6 +82,7 @@ struct cp500_devs { struct cp500_dev_info spi; struct cp500_dev_info i2c; struct cp500_dev_info fan; + struct cp500_dev_info batt; }; /* list of devices within FPGA of CP035 family (CP035, CP056, CP057) */ @@ -90,6 +91,7 @@ static struct cp500_devs cp035_devices = { .spi = { 0x1000, SZ_4K }, .i2c = { 0x4000, SZ_4K }, .fan = { 0x9000, SZ_4K }, + .batt = { 0xA000, SZ_4K }, }; /* list of devices within FPGA of CP505 family (CP503, CP505, CP507) */ @@ -98,6 +100,7 @@ static struct cp500_devs cp505_devices = { .spi = { 0x4000, SZ_4K }, .i2c = { 0x5000, SZ_4K }, .fan = { 0x9000, SZ_4K }, + .batt = { 0xA000, SZ_4K }, }; /* list of devices within FPGA of CP520 family (CP520, CP530) */ @@ -106,6 +109,7 @@ static struct cp500_devs cp520_devices = { .spi = { 0x4000, SZ_4K }, .i2c = { 0x5000, SZ_4K }, .fan = { 0x8000, SZ_4K }, + .batt = { 0x9000, SZ_4K }, }; struct cp500_nvmem { @@ -130,6 +134,7 @@ struct cp500 { struct keba_spi_auxdev *spi; struct keba_i2c_auxdev *i2c; struct keba_fan_auxdev *fan; + struct keba_batt_auxdev *batt; /* ECM EtherCAT BAR */ resource_size_t ecm_hwbase; @@ -457,6 +462,54 @@ static int cp500_register_fan(struct cp500 *cp500) return 0; } +static void cp500_batt_release(struct device *dev) +{ + struct keba_batt_auxdev *fan = + container_of(dev, struct keba_batt_auxdev, auxdev.dev); + + kfree(fan); +} + +static int cp500_register_batt(struct cp500 *cp500) +{ + int ret; + + cp500->batt = kzalloc(sizeof(*cp500->batt), GFP_KERNEL); + if (!cp500->batt) + return -ENOMEM; + + cp500->batt->auxdev.name = "batt"; + cp500->batt->auxdev.id = 0; + cp500->batt->auxdev.dev.release = cp500_batt_release; + cp500->batt->auxdev.dev.parent = &cp500->pci_dev->dev; + cp500->batt->io = (struct resource) { + /* battery register area */ + .start = (resource_size_t) cp500->sys_hwbase + + cp500->devs->batt.offset, + .end = (resource_size_t) cp500->sys_hwbase + + cp500->devs->batt.offset + + cp500->devs->batt.size - 1, + .flags = IORESOURCE_MEM, + }; + + ret = auxiliary_device_init(&cp500->batt->auxdev); + if (ret) { + kfree(cp500->batt); + cp500->batt = NULL; + + return ret; + } + ret = __auxiliary_device_add(&cp500->batt->auxdev, "keba"); + if (ret) { + auxiliary_device_uninit(&cp500->batt->auxdev); + cp500->batt = NULL; + + return ret; + } + + return 0; +} + static int cp500_nvmem_read(void *priv, unsigned int offset, void *val, size_t bytes) { @@ -613,6 +666,8 @@ static void cp500_register_auxiliary_devs(struct cp500 *cp500) if (present & CP500_PRESENT_FAN0) if (cp500_register_fan(cp500)) dev_warn(dev, "Failed to register fan!\n"); + if (cp500_register_batt(cp500)) + dev_warn(dev, "Failed to register battery!\n"); } static void cp500_unregister_dev(struct auxiliary_device *auxdev) @@ -635,6 +690,10 @@ static void cp500_unregister_auxiliary_devs(struct cp500 *cp500) cp500_unregister_dev(&cp500->fan->auxdev); cp500->fan = NULL; } + if (cp500->batt) { + cp500_unregister_dev(&cp500->batt->auxdev); + cp500->batt = NULL; + } } static irqreturn_t cp500_axi_handler(int irq, void *dev) diff --git a/include/linux/misc/keba.h b/include/linux/misc/keba.h index 451777acc262..ca52716f8437 100644 --- a/include/linux/misc/keba.h +++ b/include/linux/misc/keba.h @@ -47,4 +47,14 @@ struct keba_fan_auxdev { struct resource io; }; +/** + * struct keba_batt_auxdev - KEBA battery auxiliary device + * @auxdev: auxiliary device object + * @io: address range of battery controller IO memory + */ +struct keba_batt_auxdev { + struct auxiliary_device auxdev; + struct resource io; +}; + #endif /* _LINUX_MISC_KEBA_H */ -- cgit v1.2.3 From a27b406a49225a17849c86221a32f2d598702719 Mon Sep 17 00:00:00 2001 From: Gerhard Engleder Date: Fri, 11 Oct 2024 21:12:57 +0200 Subject: misc: keba: Add UART devices Add support for the UART auxiliary devices. This enables access to up to 3 different UARTs, which are implemented in the FPGA. Signed-off-by: Gerhard Engleder Link: https://lore.kernel.org/r/20241011191257.19702-9-gerhard@engleder-embedded.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/keba/cp500.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/misc/keba.h | 12 ++++++ 2 files changed, 116 insertions(+) (limited to 'include') diff --git a/drivers/misc/keba/cp500.c b/drivers/misc/keba/cp500.c index afd4d7c06cee..255d3022dae8 100644 --- a/drivers/misc/keba/cp500.c +++ b/drivers/misc/keba/cp500.c @@ -46,6 +46,9 @@ /* MSIX */ #define CP500_AXI_MSIX 3 +#define CP500_RFB_UART_MSIX 4 +#define CP500_DEBUG_UART_MSIX 5 +#define CP500_SI1_UART_MSIX 6 #define CP500_NUM_MSIX 8 #define CP500_NUM_MSIX_NO_MMI 2 #define CP500_NUM_MSIX_NO_AXI 3 @@ -75,6 +78,7 @@ struct cp500_dev_info { off_t offset; size_t size; + unsigned int msix; }; struct cp500_devs { @@ -83,6 +87,9 @@ struct cp500_devs { struct cp500_dev_info i2c; struct cp500_dev_info fan; struct cp500_dev_info batt; + struct cp500_dev_info uart0_rfb; + struct cp500_dev_info uart1_dbg; + struct cp500_dev_info uart2_si1; }; /* list of devices within FPGA of CP035 family (CP035, CP056, CP057) */ @@ -92,6 +99,8 @@ static struct cp500_devs cp035_devices = { .i2c = { 0x4000, SZ_4K }, .fan = { 0x9000, SZ_4K }, .batt = { 0xA000, SZ_4K }, + .uart0_rfb = { 0xB000, SZ_4K, CP500_RFB_UART_MSIX }, + .uart2_si1 = { 0xD000, SZ_4K, CP500_SI1_UART_MSIX }, }; /* list of devices within FPGA of CP505 family (CP503, CP505, CP507) */ @@ -101,6 +110,8 @@ static struct cp500_devs cp505_devices = { .i2c = { 0x5000, SZ_4K }, .fan = { 0x9000, SZ_4K }, .batt = { 0xA000, SZ_4K }, + .uart0_rfb = { 0xB000, SZ_4K, CP500_RFB_UART_MSIX }, + .uart2_si1 = { 0xD000, SZ_4K, CP500_SI1_UART_MSIX }, }; /* list of devices within FPGA of CP520 family (CP520, CP530) */ @@ -110,6 +121,8 @@ static struct cp500_devs cp520_devices = { .i2c = { 0x5000, SZ_4K }, .fan = { 0x8000, SZ_4K }, .batt = { 0x9000, SZ_4K }, + .uart0_rfb = { 0xC000, SZ_4K, CP500_RFB_UART_MSIX }, + .uart1_dbg = { 0xD000, SZ_4K, CP500_DEBUG_UART_MSIX }, }; struct cp500_nvmem { @@ -135,6 +148,9 @@ struct cp500 { struct keba_i2c_auxdev *i2c; struct keba_fan_auxdev *fan; struct keba_batt_auxdev *batt; + struct keba_uart_auxdev *uart0_rfb; + struct keba_uart_auxdev *uart1_dbg; + struct keba_uart_auxdev *uart2_si1; /* ECM EtherCAT BAR */ resource_size_t ecm_hwbase; @@ -510,6 +526,55 @@ static int cp500_register_batt(struct cp500 *cp500) return 0; } +static void cp500_uart_release(struct device *dev) +{ + struct keba_uart_auxdev *uart = + container_of(dev, struct keba_uart_auxdev, auxdev.dev); + + kfree(uart); +} + +static int cp500_register_uart(struct cp500 *cp500, + struct keba_uart_auxdev **uart, const char *name, + struct cp500_dev_info *info, unsigned int irq) +{ + int ret; + + *uart = kzalloc(sizeof(**uart), GFP_KERNEL); + if (!*uart) + return -ENOMEM; + + (*uart)->auxdev.name = name; + (*uart)->auxdev.id = 0; + (*uart)->auxdev.dev.release = cp500_uart_release; + (*uart)->auxdev.dev.parent = &cp500->pci_dev->dev; + (*uart)->io = (struct resource) { + /* UART register area */ + .start = (resource_size_t) cp500->sys_hwbase + info->offset, + .end = (resource_size_t) cp500->sys_hwbase + info->offset + + info->size - 1, + .flags = IORESOURCE_MEM, + }; + (*uart)->irq = irq; + + ret = auxiliary_device_init(&(*uart)->auxdev); + if (ret) { + kfree(*uart); + *uart = NULL; + + return ret; + } + ret = __auxiliary_device_add(&(*uart)->auxdev, "keba"); + if (ret) { + auxiliary_device_uninit(&(*uart)->auxdev); + *uart = NULL; + + return ret; + } + + return 0; +} + static int cp500_nvmem_read(void *priv, unsigned int offset, void *val, size_t bytes) { @@ -668,6 +733,33 @@ static void cp500_register_auxiliary_devs(struct cp500 *cp500) dev_warn(dev, "Failed to register fan!\n"); if (cp500_register_batt(cp500)) dev_warn(dev, "Failed to register battery!\n"); + if (cp500->devs->uart0_rfb.size && + cp500->devs->uart0_rfb.msix < cp500->msix_num) { + int irq = pci_irq_vector(cp500->pci_dev, + cp500->devs->uart0_rfb.msix); + + if (cp500_register_uart(cp500, &cp500->uart0_rfb, "rs485-uart", + &cp500->devs->uart0_rfb, irq)) + dev_warn(dev, "Failed to register RFB UART!\n"); + } + if (cp500->devs->uart1_dbg.size && + cp500->devs->uart1_dbg.msix < cp500->msix_num) { + int irq = pci_irq_vector(cp500->pci_dev, + cp500->devs->uart1_dbg.msix); + + if (cp500_register_uart(cp500, &cp500->uart1_dbg, "rs232-uart", + &cp500->devs->uart1_dbg, irq)) + dev_warn(dev, "Failed to register debug UART!\n"); + } + if (cp500->devs->uart2_si1.size && + cp500->devs->uart2_si1.msix < cp500->msix_num) { + int irq = pci_irq_vector(cp500->pci_dev, + cp500->devs->uart2_si1.msix); + + if (cp500_register_uart(cp500, &cp500->uart2_si1, "uart", + &cp500->devs->uart2_si1, irq)) + dev_warn(dev, "Failed to register SI1 UART!\n"); + } } static void cp500_unregister_dev(struct auxiliary_device *auxdev) @@ -694,6 +786,18 @@ static void cp500_unregister_auxiliary_devs(struct cp500 *cp500) cp500_unregister_dev(&cp500->batt->auxdev); cp500->batt = NULL; } + if (cp500->uart0_rfb) { + cp500_unregister_dev(&cp500->uart0_rfb->auxdev); + cp500->uart0_rfb = NULL; + } + if (cp500->uart1_dbg) { + cp500_unregister_dev(&cp500->uart1_dbg->auxdev); + cp500->uart1_dbg = NULL; + } + if (cp500->uart2_si1) { + cp500_unregister_dev(&cp500->uart2_si1->auxdev); + cp500->uart2_si1 = NULL; + } } static irqreturn_t cp500_axi_handler(int irq, void *dev) diff --git a/include/linux/misc/keba.h b/include/linux/misc/keba.h index ca52716f8437..a81d6fa70851 100644 --- a/include/linux/misc/keba.h +++ b/include/linux/misc/keba.h @@ -57,4 +57,16 @@ struct keba_batt_auxdev { struct resource io; }; +/** + * struct keba_uart_auxdev - KEBA UART auxiliary device + * @auxdev: auxiliary device object + * @io: address range of UART controller IO memory + * @irq: number of UART controller interrupt + */ +struct keba_uart_auxdev { + struct auxiliary_device auxdev; + struct resource io; + unsigned int irq; +}; + #endif /* _LINUX_MISC_KEBA_H */ -- cgit v1.2.3 From cec78a59abc9b1a06f742cb96479fc0bb1fe4e90 Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Tue, 17 Sep 2024 18:42:56 +0800 Subject: list: Remove duplicated and unused macro list_for_each_reverse Remove macro list_for_each_reverse due to below reasons: - it is same as list_for_each_prev. - it is not used by current kernel tree. Signed-off-by: Zijun Hu Link: https://lore.kernel.org/r/20240917-fix_list-v2-1-d2914665e89f@quicinc.com Signed-off-by: Greg Kroah-Hartman --- include/linux/list.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/list.h b/include/linux/list.h index 5f4b0a39cf46..29a375889fb8 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -686,14 +686,6 @@ static inline void list_splice_tail_init(struct list_head *list, #define list_for_each(pos, head) \ for (pos = (head)->next; !list_is_head(pos, (head)); pos = pos->next) -/** - * list_for_each_reverse - iterate backwards over a list - * @pos: the &struct list_head to use as a loop cursor. - * @head: the head for your list. - */ -#define list_for_each_reverse(pos, head) \ - for (pos = (head)->prev; pos != (head); pos = pos->prev) - /** * list_for_each_rcu - Iterate over a list in an RCU-safe fashion * @pos: the &struct list_head to use as a loop cursor. -- cgit v1.2.3 From 0ebe74c53b8b62bde7b02415d28e75aa25d6c2e6 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sun, 29 Sep 2024 15:11:12 +0100 Subject: drivers/base: Remove unused auxiliary_find_device auxiliary_find_device has been unused since commit 1c5de097bea3 ("net/mlx5: Fix mlx5_get_next_dev() peer device matching") which was the only use since it was originally added. Remove it. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Ira Weiny Link: https://lore.kernel.org/r/20240929141112.69824-1-linux@treblig.org Signed-off-by: Greg Kroah-Hartman --- Documentation/driver-api/auxiliary_bus.rst | 1 - drivers/base/auxiliary.c | 29 ----------------------------- include/linux/auxiliary_bus.h | 4 ---- 3 files changed, 34 deletions(-) (limited to 'include') diff --git a/Documentation/driver-api/auxiliary_bus.rst b/Documentation/driver-api/auxiliary_bus.rst index cec84908fbc0..b236de773e1d 100644 --- a/Documentation/driver-api/auxiliary_bus.rst +++ b/Documentation/driver-api/auxiliary_bus.rst @@ -24,7 +24,6 @@ Auxiliary Device Creation .. kernel-doc:: drivers/base/auxiliary.c :identifiers: auxiliary_device_init __auxiliary_device_add - auxiliary_find_device Auxiliary Device Memory Model and Lifespan ------------------------------------------ diff --git a/drivers/base/auxiliary.c b/drivers/base/auxiliary.c index 7823888af4f6..69b7c93613d6 100644 --- a/drivers/base/auxiliary.c +++ b/drivers/base/auxiliary.c @@ -335,35 +335,6 @@ int __auxiliary_device_add(struct auxiliary_device *auxdev, const char *modname) } EXPORT_SYMBOL_GPL(__auxiliary_device_add); -/** - * auxiliary_find_device - auxiliary device iterator for locating a particular device. - * @start: Device to begin with - * @data: Data to pass to match function - * @match: Callback function to check device - * - * This function returns a reference to a device that is 'found' - * for later use, as determined by the @match callback. - * - * The reference returned should be released with put_device(). - * - * The callback should return 0 if the device doesn't match and non-zero - * if it does. If the callback returns non-zero, this function will - * return to the caller and not iterate over any more devices. - */ -struct auxiliary_device *auxiliary_find_device(struct device *start, - const void *data, - device_match_t match) -{ - struct device *dev; - - dev = bus_find_device(&auxiliary_bus_type, start, data, match); - if (!dev) - return NULL; - - return to_auxiliary_dev(dev); -} -EXPORT_SYMBOL_GPL(auxiliary_find_device); - /** * __auxiliary_driver_register - register a driver for auxiliary bus devices * @auxdrv: auxiliary_driver structure diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h index 31762324bcc9..65dd7f154374 100644 --- a/include/linux/auxiliary_bus.h +++ b/include/linux/auxiliary_bus.h @@ -269,8 +269,4 @@ void auxiliary_driver_unregister(struct auxiliary_driver *auxdrv); #define module_auxiliary_driver(__auxiliary_driver) \ module_driver(__auxiliary_driver, auxiliary_driver_register, auxiliary_driver_unregister) -struct auxiliary_device *auxiliary_find_device(struct device *start, - const void *data, - device_match_t match); - #endif /* _AUXILIARY_BUS_H_ */ -- cgit v1.2.3 From 52345d35622026b99271edc1c58ad7cfef3b7567 Mon Sep 17 00:00:00 2001 From: Amadeusz Sławiński Date: Mon, 14 Oct 2024 11:49:58 +0200 Subject: ALSA: hda: Fix all stream interrupts definition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is defined in header to 0xFF, which only allows to set values for 8 streams. In specification it is defined as bits from 0 to 29. In practice there is no HW with 29 streams, but as the only place where the value is used is chip initialization, it is best to make sure that all bits are reset properly. Reviewed-by: Cezary Rojewski Signed-off-by: Amadeusz Sławiński Link: https://patch.msgid.link/20241014094958.708563-1-amadeuszx.slawinski@linux.intel.com Signed-off-by: Takashi Iwai --- include/sound/hda_register.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/hda_register.h b/include/sound/hda_register.h index 5ff31e6d41c1..db1cc0b897fd 100644 --- a/include/sound/hda_register.h +++ b/include/sound/hda_register.h @@ -180,7 +180,7 @@ enum { SDI0, SDI1, SDI2, SDI3, SDO0, SDO1, SDO2, SDO3 }; #define SD_STS_FIFO_READY 0x20 /* FIFO ready */ /* INTCTL and INTSTS */ -#define AZX_INT_ALL_STREAM 0xff /* all stream interrupts */ +#define AZX_INT_ALL_STREAM 0x3fffffff /* all stream interrupts */ #define AZX_INT_CTRL_EN 0x40000000 /* controller interrupt enable bit */ #define AZX_INT_GLOBAL_EN 0x80000000 /* global interrupt enable bit */ -- cgit v1.2.3 From cd068d51594d9635bf6688fc78717572b78bce6a Mon Sep 17 00:00:00 2001 From: Keita Aihara Date: Fri, 13 Sep 2024 18:44:17 +0900 Subject: mmc: core: Add SD card quirk for broken poweroff notification GIGASTONE Gaming Plus microSD cards manufactured on 02/2022 report that they support poweroff notification and cache, but they are not working correctly. Flush Cache bit never gets cleared in sd_flush_cache() and Poweroff Notification Ready bit also never gets set to 1 within 1 second from the end of busy of CMD49 in sd_poweroff_notify(). This leads to I/O error and runtime PM error state. I observed that the same card manufactured on 01/2024 works as expected. This problem seems similar to the Kingston cards fixed with commit c467c8f08185 ("mmc: Add MMC_QUIRK_BROKEN_SD_CACHE for Kingston Canvas Go Plus from 11/2019") and should be handled using quirks. CID for the problematic card is here. 12345641535443002000000145016200 Manufacturer ID is 0x12 and defined as CID_MANFID_GIGASTONE as of now, but would like comments on what naming is appropriate because MID list is not public and not sure it's right. Signed-off-by: Keita Aihara Link: https://lore.kernel.org/r/20240913094417.GA4191647@sony.com Signed-off-by: Ulf Hansson --- drivers/mmc/core/card.h | 7 +++++++ drivers/mmc/core/quirks.h | 9 +++++++++ drivers/mmc/core/sd.c | 2 +- include/linux/mmc/card.h | 1 + 4 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/mmc/core/card.h b/drivers/mmc/core/card.h index b7754a1b8d97..8476754b1b17 100644 --- a/drivers/mmc/core/card.h +++ b/drivers/mmc/core/card.h @@ -82,6 +82,7 @@ struct mmc_fixup { #define CID_MANFID_SANDISK_SD 0x3 #define CID_MANFID_ATP 0x9 #define CID_MANFID_TOSHIBA 0x11 +#define CID_MANFID_GIGASTONE 0x12 #define CID_MANFID_MICRON 0x13 #define CID_MANFID_SAMSUNG 0x15 #define CID_MANFID_APACER 0x27 @@ -284,4 +285,10 @@ static inline int mmc_card_broken_cache_flush(const struct mmc_card *c) { return c->quirks & MMC_QUIRK_BROKEN_CACHE_FLUSH; } + +static inline int mmc_card_broken_sd_poweroff_notify(const struct mmc_card *c) +{ + return c->quirks & MMC_QUIRK_BROKEN_SD_POWEROFF_NOTIFY; +} + #endif diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h index 92905fc46436..89b512905be1 100644 --- a/drivers/mmc/core/quirks.h +++ b/drivers/mmc/core/quirks.h @@ -25,6 +25,15 @@ static const struct mmc_fixup __maybe_unused mmc_sd_fixups[] = { 0, -1ull, SDIO_ANY_ID, SDIO_ANY_ID, add_quirk_sd, MMC_QUIRK_BROKEN_SD_CACHE, EXT_CSD_REV_ANY), + /* + * GIGASTONE Gaming Plus microSD cards manufactured on 02/2022 never + * clear Flush Cache bit and set Poweroff Notification Ready bit. + */ + _FIXUP_EXT("ASTC", CID_MANFID_GIGASTONE, 0x3456, 2022, 2, + 0, -1ull, SDIO_ANY_ID, SDIO_ANY_ID, add_quirk_sd, + MMC_QUIRK_BROKEN_SD_CACHE | MMC_QUIRK_BROKEN_SD_POWEROFF_NOTIFY, + EXT_CSD_REV_ANY), + END_FIXUP }; diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index 12fe282bea77..9e62cb7055fe 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -1107,7 +1107,7 @@ static int sd_parse_ext_reg_power(struct mmc_card *card, u8 fno, u8 page, card->ext_power.rev = reg_buf[0] & 0xf; /* Power Off Notification support at bit 4. */ - if (reg_buf[1] & BIT(4)) + if ((reg_buf[1] & BIT(4)) && !mmc_card_broken_sd_poweroff_notify(card)) card->ext_power.feature_support |= SD_EXT_POWER_OFF_NOTIFY; /* Power Sustenance support at bit 5. */ diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index f34407cc2788..543446392776 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -294,6 +294,7 @@ struct mmc_card { #define MMC_QUIRK_BROKEN_SD_DISCARD (1<<14) /* Disable broken SD discard support */ #define MMC_QUIRK_BROKEN_SD_CACHE (1<<15) /* Disable broken SD cache support */ #define MMC_QUIRK_BROKEN_CACHE_FLUSH (1<<16) /* Don't flush cache until the write has occurred */ +#define MMC_QUIRK_BROKEN_SD_POWEROFF_NOTIFY (1<<17) /* Disable broken SD poweroff notify support */ bool written_flag; /* Indicates eMMC has been written since power on */ bool reenable_cmdq; /* Re-enable Command Queue */ -- cgit v1.2.3 From fce2ce78af1e14dc1316aaddb5b3308be05cf452 Mon Sep 17 00:00:00 2001 From: Avri Altman Date: Sun, 6 Oct 2024 08:11:39 +0300 Subject: mmc: sd: SDUC Support Recognition Ultra Capacity SD cards (SDUC) was already introduced in SD7.0. Those cards support capacity larger than 2TB and up to including 128TB. ACMD41 was extended to support the host-card handshake during initialization. The card expects that the HCS & HO2T bits to be set in the command argument, and sets the applicable bits in the R3 returned response. On the contrary, if a SDUC card is inserted to a non-supporting host, it will never respond to this ACMD41 until eventually, the host will timed out and give up. Also, add SD CSD version 3.0 - designated for SDUC, and properly parse the csd register as the c_size field got expanded to 28 bits. Do not enable SDUC for now - leave it to the last patch in the series. Tested-by: Ricky WU Reviewed-by: Adrian Hunter Signed-off-by: Avri Altman Link: https://lore.kernel.org/r/20241006051148.160278-2-avri.altman@wdc.com Signed-off-by: Ulf Hansson --- drivers/mmc/core/bus.c | 4 +++- drivers/mmc/core/card.h | 3 +++ drivers/mmc/core/sd.c | 28 +++++++++++++++++----------- drivers/mmc/core/sd.h | 2 +- drivers/mmc/core/sdio.c | 2 +- include/linux/mmc/card.h | 2 +- include/linux/mmc/sd.h | 1 + 7 files changed, 27 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c index 0ddaee0eae54..30763b342bd3 100644 --- a/drivers/mmc/core/bus.c +++ b/drivers/mmc/core/bus.c @@ -321,7 +321,9 @@ int mmc_add_card(struct mmc_card *card) case MMC_TYPE_SD: type = "SD"; if (mmc_card_blockaddr(card)) { - if (mmc_card_ext_capacity(card)) + if (mmc_card_ult_capacity(card)) + type = "SDUC"; + else if (mmc_card_ext_capacity(card)) type = "SDXC"; else type = "SDHC"; diff --git a/drivers/mmc/core/card.h b/drivers/mmc/core/card.h index 8476754b1b17..3205feb1e8ff 100644 --- a/drivers/mmc/core/card.h +++ b/drivers/mmc/core/card.h @@ -23,6 +23,7 @@ #define MMC_CARD_SDXC (1<<3) /* card is SDXC */ #define MMC_CARD_REMOVED (1<<4) /* card has been removed */ #define MMC_STATE_SUSPENDED (1<<5) /* card is suspended */ +#define MMC_CARD_SDUC (1<<6) /* card is SDUC */ #define mmc_card_present(c) ((c)->state & MMC_STATE_PRESENT) #define mmc_card_readonly(c) ((c)->state & MMC_STATE_READONLY) @@ -30,11 +31,13 @@ #define mmc_card_ext_capacity(c) ((c)->state & MMC_CARD_SDXC) #define mmc_card_removed(c) ((c) && ((c)->state & MMC_CARD_REMOVED)) #define mmc_card_suspended(c) ((c)->state & MMC_STATE_SUSPENDED) +#define mmc_card_ult_capacity(c) ((c)->state & MMC_CARD_SDUC) #define mmc_card_set_present(c) ((c)->state |= MMC_STATE_PRESENT) #define mmc_card_set_readonly(c) ((c)->state |= MMC_STATE_READONLY) #define mmc_card_set_blockaddr(c) ((c)->state |= MMC_STATE_BLOCKADDR) #define mmc_card_set_ext_capacity(c) ((c)->state |= MMC_CARD_SDXC) +#define mmc_card_set_ult_capacity(c) ((c)->state |= MMC_CARD_SDUC) #define mmc_card_set_removed(c) ((c)->state |= MMC_CARD_REMOVED) #define mmc_card_set_suspended(c) ((c)->state |= MMC_STATE_SUSPENDED) #define mmc_card_clr_suspended(c) ((c)->state &= ~MMC_STATE_SUSPENDED) diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index 9e62cb7055fe..63915541c0e4 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -100,7 +100,7 @@ void mmc_decode_cid(struct mmc_card *card) /* * Given a 128-bit response, decode to our card CSD structure. */ -static int mmc_decode_csd(struct mmc_card *card) +static int mmc_decode_csd(struct mmc_card *card, bool is_sduc) { struct mmc_csd *csd = &card->csd; unsigned int e, m, csd_struct; @@ -144,9 +144,10 @@ static int mmc_decode_csd(struct mmc_card *card) mmc_card_set_readonly(card); break; case 1: + case 2: /* - * This is a block-addressed SDHC or SDXC card. Most - * interesting fields are unused and have fixed + * This is a block-addressed SDHC, SDXC or SDUC card. + * Most interesting fields are unused and have fixed * values. To avoid getting tripped by buggy cards, * we assume those fixed values ourselves. */ @@ -159,14 +160,19 @@ static int mmc_decode_csd(struct mmc_card *card) e = unstuff_bits(resp, 96, 3); csd->max_dtr = tran_exp[e] * tran_mant[m]; csd->cmdclass = unstuff_bits(resp, 84, 12); - csd->c_size = unstuff_bits(resp, 48, 22); - /* SDXC cards have a minimum C_SIZE of 0x00FFFF */ - if (csd->c_size >= 0xFFFF) + if (csd_struct == 1) + m = unstuff_bits(resp, 48, 22); + else + m = unstuff_bits(resp, 48, 28); + csd->c_size = m; + + if (csd->c_size >= 0x400000 && is_sduc) + mmc_card_set_ult_capacity(card); + else if (csd->c_size >= 0xFFFF) mmc_card_set_ext_capacity(card); - m = unstuff_bits(resp, 48, 22); - csd->capacity = (1 + m) << 10; + csd->capacity = (1 + (typeof(sector_t))m) << 10; csd->read_blkbits = 9; csd->read_partial = 0; @@ -876,7 +882,7 @@ try_again: return err; } -int mmc_sd_get_csd(struct mmc_card *card) +int mmc_sd_get_csd(struct mmc_card *card, bool is_sduc) { int err; @@ -887,7 +893,7 @@ int mmc_sd_get_csd(struct mmc_card *card) if (err) return err; - err = mmc_decode_csd(card); + err = mmc_decode_csd(card, is_sduc); if (err) return err; @@ -1442,7 +1448,7 @@ retry: } if (!oldcard) { - err = mmc_sd_get_csd(card); + err = mmc_sd_get_csd(card, false); if (err) goto free_card; diff --git a/drivers/mmc/core/sd.h b/drivers/mmc/core/sd.h index fe6dd46927a4..7e8beface2ca 100644 --- a/drivers/mmc/core/sd.h +++ b/drivers/mmc/core/sd.h @@ -10,7 +10,7 @@ struct mmc_host; struct mmc_card; int mmc_sd_get_cid(struct mmc_host *host, u32 ocr, u32 *cid, u32 *rocr); -int mmc_sd_get_csd(struct mmc_card *card); +int mmc_sd_get_csd(struct mmc_card *card, bool is_sduc); void mmc_decode_cid(struct mmc_card *card); int mmc_sd_setup_card(struct mmc_host *host, struct mmc_card *card, bool reinit); diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index 4fb247fde5c0..9566837c9848 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -769,7 +769,7 @@ try_again: * Read CSD, before selecting the card */ if (!oldcard && mmc_card_sd_combo(card)) { - err = mmc_sd_get_csd(card); + err = mmc_sd_get_csd(card, false); if (err) goto remove; diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 543446392776..eb67d3d5ff5b 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -35,7 +35,7 @@ struct mmc_csd { unsigned int wp_grp_size; unsigned int read_blkbits; unsigned int write_blkbits; - unsigned int capacity; + sector_t capacity; unsigned int read_partial:1, read_misalign:1, write_partial:1, diff --git a/include/linux/mmc/sd.h b/include/linux/mmc/sd.h index 6727576a8755..865cc0ca8543 100644 --- a/include/linux/mmc/sd.h +++ b/include/linux/mmc/sd.h @@ -36,6 +36,7 @@ /* OCR bit definitions */ #define SD_OCR_S18R (1 << 24) /* 1.8V switching request */ #define SD_ROCR_S18A SD_OCR_S18R /* 1.8V switching accepted by card */ +#define SD_OCR_2T (1 << 27) /* HO2T/CO2T - SDUC support */ #define SD_OCR_XPC (1 << 28) /* SDXC power control */ #define SD_OCR_CCS (1 << 30) /* Card Capacity Status */ -- cgit v1.2.3 From 375b535941bea65b37451f0fd398e28bf4f3bdc3 Mon Sep 17 00:00:00 2001 From: Avri Altman Date: Sun, 6 Oct 2024 08:11:40 +0300 Subject: mmc: sd: Add Extension memory addressing SDUC memory addressing spans beyond 2TB and up to 128TB. Therefore, 38 bits are required to access the entire memory space of all sectors. Those extra 6 bits are to be carried by CMD22 prior of sending read/write/erase commands: CMD17, CMD18, CMD24, CMD25, CMD32, and CMD33. CMD22 will carry the higher order 6 bits, and must precedes any of the above commands even if it targets sector < 2TB. No error related to address or length is indicated in CMD22 but rather in the read/write command itself. Tested-by: Ricky WU Reviewed-by: Adrian Hunter Signed-off-by: Avri Altman Link: https://lore.kernel.org/r/20241006051148.160278-3-avri.altman@wdc.com Signed-off-by: Ulf Hansson --- drivers/mmc/core/sd_ops.c | 15 +++++++++++++++ drivers/mmc/core/sd_ops.h | 1 + include/linux/mmc/sd.h | 3 +++ 3 files changed, 19 insertions(+) (limited to 'include') diff --git a/drivers/mmc/core/sd_ops.c b/drivers/mmc/core/sd_ops.c index f93c392040ae..50d1380e93b8 100644 --- a/drivers/mmc/core/sd_ops.c +++ b/drivers/mmc/core/sd_ops.c @@ -16,6 +16,7 @@ #include #include "core.h" +#include "card.h" #include "sd_ops.h" #include "mmc_ops.h" @@ -188,6 +189,20 @@ int mmc_send_app_op_cond(struct mmc_host *host, u32 ocr, u32 *rocr) return 0; } +int mmc_send_ext_addr(struct mmc_host *host, u32 addr) +{ + struct mmc_command cmd = { + .opcode = SD_ADDR_EXT, + .arg = addr, + .flags = MMC_RSP_R1 | MMC_CMD_AC, + }; + + if (!mmc_card_ult_capacity(host->card)) + return 0; + + return mmc_wait_for_cmd(host, &cmd, 0); +} + static int __mmc_send_if_cond(struct mmc_host *host, u32 ocr, u8 pcie_bits, u32 *resp) { diff --git a/drivers/mmc/core/sd_ops.h b/drivers/mmc/core/sd_ops.h index 7667fc223b74..fd3f10b9cf86 100644 --- a/drivers/mmc/core/sd_ops.h +++ b/drivers/mmc/core/sd_ops.h @@ -21,6 +21,7 @@ int mmc_send_relative_addr(struct mmc_host *host, unsigned int *rca); int mmc_app_send_scr(struct mmc_card *card); int mmc_app_sd_status(struct mmc_card *card, void *ssr); int mmc_app_cmd(struct mmc_host *host, struct mmc_card *card); +int mmc_send_ext_addr(struct mmc_host *host, u32 addr); #endif diff --git a/include/linux/mmc/sd.h b/include/linux/mmc/sd.h index 865cc0ca8543..af5fc70e09a2 100644 --- a/include/linux/mmc/sd.h +++ b/include/linux/mmc/sd.h @@ -15,6 +15,9 @@ #define SD_SEND_IF_COND 8 /* bcr [11:0] See below R7 */ #define SD_SWITCH_VOLTAGE 11 /* ac R1 */ +/* Class 2 */ +#define SD_ADDR_EXT 22 /* ac [5:0] R1 */ + /* class 10 */ #define SD_SWITCH 6 /* adtc [31:0] See below R1 */ -- cgit v1.2.3 From 403a0293f1c230524e0185b31f69c02a6aed12c7 Mon Sep 17 00:00:00 2001 From: Avri Altman Date: Sun, 6 Oct 2024 08:11:42 +0300 Subject: mmc: core: Add open-ended Ext memory addressing For open-ended read/write - just send CMD22 before issuing the command. Reviewed-by: Adrian Hunter Signed-off-by: Avri Altman Link: https://lore.kernel.org/r/20241006051148.160278-5-avri.altman@wdc.com Signed-off-by: Ulf Hansson --- drivers/mmc/core/block.c | 5 +++++ drivers/mmc/core/core.c | 3 +++ include/linux/mmc/core.h | 4 ++++ 3 files changed, 12 insertions(+) (limited to 'include') diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index f4817ea3017b..c9325febc31a 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -1759,6 +1759,11 @@ static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq, brq->sbc.flags = MMC_RSP_R1 | MMC_CMD_AC; brq->mrq.sbc = &brq->sbc; } + + if (mmc_card_ult_capacity(card)) { + brq->cmd.ext_addr = blk_rq_pos(req) >> 32; + brq->cmd.has_ext_addr = true; + } } #define MMC_MAX_RETRIES 5 diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index d6c819dd68ed..a0b2999684b3 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -336,6 +336,9 @@ int mmc_start_request(struct mmc_host *host, struct mmc_request *mrq) { int err; + if (mrq->cmd && mrq->cmd->has_ext_addr) + mmc_send_ext_addr(host, mrq->cmd->ext_addr); + init_completion(&mrq->cmd_completion); mmc_retune_hold(host); diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index f0ac2e469b32..a890a71288ef 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -96,6 +96,10 @@ struct mmc_command { unsigned int busy_timeout; /* busy detect timeout in ms */ struct mmc_data *data; /* data segment associated with cmd */ struct mmc_request *mrq; /* associated request */ + + /* for SDUC */ + bool has_ext_addr; + u8 ext_addr; }; struct mmc_data { -- cgit v1.2.3 From 79daeb241db7901e4bd53cce9ab046f376a63a4c Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 13 Sep 2024 18:28:16 +0800 Subject: mmc: core: Prepare to support SD UHS-II cards The SD UHS-II interface was introduced to the SD spec v4.00 several years ago. The interface is fundamentally different from an electrical and a protocol point of view, comparing to the legacy SD interface. However, the legacy SD protocol is supported through a specific transport layer (SD-TRAN) defined in the UHS-II addendum of the spec. This allows the SD card to be managed in a very similar way as a legacy SD card, hence a lot of code can be re-used to support these new types of cards through the mmc subsystem. Moreover, an SD card that supports the UHS-II interface shall also be backwards compatible with the legacy SD interface, which allows a UHS-II card to be inserted into a legacy slot. As a matter of fact, this is already supported by mmc subsystem as of today. To prepare to add support for UHS-II, this change puts the basic foundation in the mmc core in place, allowing it to be more easily reviewed before subsequent changes implements the actual support. Basically, the approach here adds a new UHS-II bus_ops type and adds a separate initialization path for the UHS-II card. The intent is to avoid us from sprinkling the legacy initialization path, but also to simplify implementation of the UHS-II specific bits. At this point, there is only one new host ops added to manage the various ios settings needed for UHS-II. Additional host ops that are needed, are being added from subsequent changes. Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20240913102836.6144-3-victorshihgli@gmail.com --- drivers/mmc/core/Makefile | 2 +- drivers/mmc/core/core.c | 17 ++- drivers/mmc/core/core.h | 1 + drivers/mmc/core/sd_uhs2.c | 294 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/mmc/card.h | 7 ++ include/linux/mmc/host.h | 23 ++++ 6 files changed, 342 insertions(+), 2 deletions(-) create mode 100644 drivers/mmc/core/sd_uhs2.c (limited to 'include') diff --git a/drivers/mmc/core/Makefile b/drivers/mmc/core/Makefile index 6a907736cd7a..15b067e8b0d1 100644 --- a/drivers/mmc/core/Makefile +++ b/drivers/mmc/core/Makefile @@ -7,7 +7,7 @@ obj-$(CONFIG_MMC) += mmc_core.o mmc_core-y := core.o bus.o host.o \ mmc.o mmc_ops.o sd.o sd_ops.o \ sdio.o sdio_ops.o sdio_bus.o \ - sdio_cis.o sdio_io.o sdio_irq.o \ + sdio_cis.o sdio_io.o sdio_irq.o sd_uhs2.o\ slot-gpio.o regulator.o mmc_core-$(CONFIG_OF) += pwrseq.o obj-$(CONFIG_PWRSEQ_SIMPLE) += pwrseq_simple.o diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 01a7142eada3..54ca9dc2114c 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -2267,6 +2267,18 @@ void mmc_rescan(struct work_struct *work) goto out; } + /* + * Ideally we should favor initialization of legacy SD cards and defer + * UHS-II enumeration. However, it seems like cards doesn't reliably + * announce their support for UHS-II in the response to the ACMD41, + * while initializing the legacy SD interface. Therefore, let's start + * with UHS-II for now. + */ + if (!mmc_attach_sd_uhs2(host)) { + mmc_release_host(host); + goto out; + } + for (i = 0; i < ARRAY_SIZE(freqs); i++) { unsigned int freq = freqs[i]; if (freq > host->f_max) { @@ -2299,10 +2311,13 @@ void mmc_rescan(struct work_struct *work) void mmc_start_host(struct mmc_host *host) { + bool power_up = !(host->caps2 & + (MMC_CAP2_NO_PRESCAN_POWERUP | MMC_CAP2_SD_UHS2)); + host->f_init = max(min(freqs[0], host->f_max), host->f_min); host->rescan_disable = 0; - if (!(host->caps2 & MMC_CAP2_NO_PRESCAN_POWERUP)) { + if (power_up) { mmc_claim_host(host); mmc_power_up(host, host->ocr_avail); mmc_release_host(host); diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h index fc9d5e9620b1..fc9c066e6468 100644 --- a/drivers/mmc/core/core.h +++ b/drivers/mmc/core/core.h @@ -81,6 +81,7 @@ int mmc_detect_card_removed(struct mmc_host *host); int mmc_attach_mmc(struct mmc_host *host); int mmc_attach_sd(struct mmc_host *host); int mmc_attach_sdio(struct mmc_host *host); +int mmc_attach_sd_uhs2(struct mmc_host *host); /* Module parameters */ extern bool use_spi_crc; diff --git a/drivers/mmc/core/sd_uhs2.c b/drivers/mmc/core/sd_uhs2.c new file mode 100644 index 000000000000..19d62d45e1ec --- /dev/null +++ b/drivers/mmc/core/sd_uhs2.c @@ -0,0 +1,294 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021 Linaro Ltd + * + * Author: Ulf Hansson + * + * Support for SD UHS-II cards + */ +#include + +#include +#include + +#include "core.h" +#include "bus.h" +#include "sd.h" +#include "mmc_ops.h" + +static const unsigned int sd_uhs2_freqs[] = { 52000000, 26000000 }; + +static int sd_uhs2_power_up(struct mmc_host *host) +{ + int err; + + if (host->ios.power_mode == MMC_POWER_ON) + return 0; + + host->ios.vdd = fls(host->ocr_avail) - 1; + host->ios.clock = host->f_init; + host->ios.timing = MMC_TIMING_UHS2_SPEED_A; + host->ios.power_mode = MMC_POWER_ON; + + err = host->ops->uhs2_control(host, UHS2_SET_IOS); + + return err; +} + +static int sd_uhs2_power_off(struct mmc_host *host) +{ + if (host->ios.power_mode == MMC_POWER_OFF) + return 0; + + host->ios.vdd = 0; + host->ios.clock = 0; + host->ios.timing = MMC_TIMING_LEGACY; + host->ios.power_mode = MMC_POWER_OFF; + + return host->ops->uhs2_control(host, UHS2_SET_IOS); +} + +/* + * Run the phy initialization sequence, which mainly relies on the UHS-II host + * to check that we reach the expected electrical state, between the host and + * the card. + */ +static int sd_uhs2_phy_init(struct mmc_host *host) +{ + return 0; +} + +/* + * Do the early initialization of the card, by sending the device init broadcast + * command and wait for the process to be completed. + */ +static int sd_uhs2_dev_init(struct mmc_host *host) +{ + return 0; +} + +/* + * Run the enumeration process by sending the enumerate command to the card. + * Note that, we currently support only the point to point connection, which + * means only one card can be attached per host/slot. + */ +static int sd_uhs2_enum(struct mmc_host *host, u32 *node_id) +{ + return 0; +} + +/* + * Read the UHS-II configuration registers (CFG_REG) of the card, by sending it + * commands and by parsing the responses. Store a copy of the relevant data in + * card->uhs2_config. + */ +static int sd_uhs2_config_read(struct mmc_host *host, struct mmc_card *card) +{ + return 0; +} + +/* + * Based on the card's and host's UHS-II capabilities, let's update the + * configuration of the card and the host. This may also include to move to a + * greater speed range/mode. Depending on the updated configuration, we may need + * to do a soft reset of the card via sending it a GO_DORMANT_STATE command. + * + * In the final step, let's check if the card signals "config completion", which + * indicates that the card has moved from config state into active state. + */ +static int sd_uhs2_config_write(struct mmc_host *host, struct mmc_card *card) +{ + return 0; +} + +/* + * Initialize the UHS-II card through the SD-TRAN transport layer. This enables + * commands/requests to be backwards compatible through the legacy SD protocol. + * UHS-II cards has a specific power limit specified for VDD1/VDD2, that should + * be set through a legacy CMD6. Note that, the power limit that becomes set, + * survives a soft reset through the GO_DORMANT_STATE command. + */ +static int sd_uhs2_legacy_init(struct mmc_host *host, struct mmc_card *card) +{ + return 0; +} + +/* + * Allocate the data structure for the mmc_card and run the UHS-II specific + * initialization sequence. + */ +static int sd_uhs2_init_card(struct mmc_host *host) +{ + struct mmc_card *card; + u32 node_id = 0; + int err; + + err = sd_uhs2_dev_init(host); + if (err) + return err; + + err = sd_uhs2_enum(host, &node_id); + if (err) + return err; + + card = mmc_alloc_card(host, &sd_type); + if (IS_ERR(card)) + return PTR_ERR(card); + + card->uhs2_config.node_id = node_id; + card->type = MMC_TYPE_SD; + + err = sd_uhs2_config_read(host, card); + if (err) + goto err; + + err = sd_uhs2_config_write(host, card); + if (err) + goto err; + + host->card = card; + return 0; + +err: + mmc_remove_card(card); + return err; +} + +static void sd_uhs2_remove(struct mmc_host *host) +{ + mmc_remove_card(host->card); + host->card = NULL; +} + +static int sd_uhs2_alive(struct mmc_host *host) +{ + return mmc_send_status(host->card, NULL); +} + +static void sd_uhs2_detect(struct mmc_host *host) +{ + int err; + + mmc_get_card(host->card, NULL); + err = _mmc_detect_card_removed(host); + mmc_put_card(host->card, NULL); + + if (err) { + sd_uhs2_remove(host); + + mmc_claim_host(host); + mmc_detach_bus(host); + sd_uhs2_power_off(host); + mmc_release_host(host); + } +} + +static int sd_uhs2_suspend(struct mmc_host *host) +{ + return 0; +} + +static int sd_uhs2_resume(struct mmc_host *host) +{ + return 0; +} + +static int sd_uhs2_runtime_suspend(struct mmc_host *host) +{ + return 0; +} + +static int sd_uhs2_runtime_resume(struct mmc_host *host) +{ + return 0; +} + +static int sd_uhs2_shutdown(struct mmc_host *host) +{ + return 0; +} + +static int sd_uhs2_hw_reset(struct mmc_host *host) +{ + return 0; +} + +static const struct mmc_bus_ops sd_uhs2_ops = { + .remove = sd_uhs2_remove, + .alive = sd_uhs2_alive, + .detect = sd_uhs2_detect, + .suspend = sd_uhs2_suspend, + .resume = sd_uhs2_resume, + .runtime_suspend = sd_uhs2_runtime_suspend, + .runtime_resume = sd_uhs2_runtime_resume, + .shutdown = sd_uhs2_shutdown, + .hw_reset = sd_uhs2_hw_reset, +}; + +static int sd_uhs2_attach(struct mmc_host *host) +{ + int err; + + err = sd_uhs2_power_up(host); + if (err) + goto err; + + err = sd_uhs2_phy_init(host); + if (err) + goto err; + + err = sd_uhs2_init_card(host); + if (err) + goto err; + + err = sd_uhs2_legacy_init(host, host->card); + if (err) + goto err; + + mmc_attach_bus(host, &sd_uhs2_ops); + + mmc_release_host(host); + + err = mmc_add_card(host->card); + if (err) + goto remove_card; + + mmc_claim_host(host); + return 0; + +remove_card: + mmc_remove_card(host->card); + host->card = NULL; + mmc_claim_host(host); + mmc_detach_bus(host); +err: + sd_uhs2_power_off(host); + return err; +} + +int mmc_attach_sd_uhs2(struct mmc_host *host) +{ + int i, err = 0; + + if (!(host->caps2 & MMC_CAP2_SD_UHS2)) + return -EOPNOTSUPP; + + /* Turn off the legacy SD interface before trying with UHS-II. */ + mmc_power_off(host); + + /* + * Start UHS-II initialization at 52MHz and possibly make a retry at + * 26MHz according to the spec. It's required that the host driver + * validates ios->clock, to set a rate within the correct range. + */ + for (i = 0; i < ARRAY_SIZE(sd_uhs2_freqs); i++) { + host->f_init = sd_uhs2_freqs[i]; + pr_debug("%s: %s: trying to init UHS-II card at %u Hz\n", + mmc_hostname(host), __func__, host->f_init); + err = sd_uhs2_attach(host); + if (!err) + break; + } + + return err; +} diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index eb67d3d5ff5b..0bb1ad1ea4dc 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -209,6 +209,11 @@ struct sd_ext_reg { #define SD_EXT_PERF_CMD_QUEUE (1<<4) }; +struct sd_uhs2_config { + u32 node_id; + /* TODO: Extend with more register configs. */ +}; + struct sdio_cccr { unsigned int sdio_vsn; unsigned int sd_vsn; @@ -320,6 +325,8 @@ struct mmc_card { struct sd_ext_reg ext_power; /* SD extension reg for PM */ struct sd_ext_reg ext_perf; /* SD extension reg for PERF */ + struct sd_uhs2_config uhs2_config; /* SD UHS-II config */ + unsigned int sdio_funcs; /* number of SDIO functions */ atomic_t sdio_funcs_probed; /* number of probed SDIO funcs */ struct sdio_cccr cccr; /* common card info */ diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 8fc2b328ec4d..e1d380de3aaa 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -64,6 +64,10 @@ struct mmc_ios { #define MMC_TIMING_MMC_HS400 10 #define MMC_TIMING_SD_EXP 11 #define MMC_TIMING_SD_EXP_1_2V 12 +#define MMC_TIMING_UHS2_SPEED_A 13 +#define MMC_TIMING_UHS2_SPEED_A_HD 14 +#define MMC_TIMING_UHS2_SPEED_B 15 +#define MMC_TIMING_UHS2_SPEED_B_HD 16 unsigned char signal_voltage; /* signalling voltage (1.8V or 3.3V) */ @@ -92,6 +96,14 @@ struct mmc_clk_phase_map { struct mmc_clk_phase phase[MMC_NUM_CLK_PHASES]; }; +struct sd_uhs2_caps { + /* TODO: Add UHS-II capabilities for the host. */ +}; + +enum sd_uhs2_operation { + UHS2_SET_IOS, +}; + struct mmc_host; enum mmc_err_stat { @@ -219,6 +231,14 @@ struct mmc_host_ops { /* Initialize an SD express card, mandatory for MMC_CAP2_SD_EXP. */ int (*init_sd_express)(struct mmc_host *host, struct mmc_ios *ios); + + /* + * The uhs2_control callback is used to execute SD UHS-II specific + * operations. It's mandatory to implement for hosts that supports the + * SD UHS-II interface (MMC_CAP2_SD_UHS2). Expected return values are a + * negative errno in case of a failure or zero for success. + */ + int (*uhs2_control)(struct mmc_host *host, enum sd_uhs2_operation op); }; struct mmc_cqe_ops { @@ -379,6 +399,7 @@ struct mmc_host { MMC_CAP2_HS200_1_2V_SDR) #define MMC_CAP2_SD_EXP (1 << 7) /* SD express via PCIe */ #define MMC_CAP2_SD_EXP_1_2V (1 << 8) /* SD express 1.2V */ +#define MMC_CAP2_SD_UHS2 (1 << 9) /* SD UHS-II support */ #define MMC_CAP2_CD_ACTIVE_HIGH (1 << 10) /* Card-detect signal active high */ #define MMC_CAP2_RO_ACTIVE_HIGH (1 << 11) /* Write-protect signal active high */ #define MMC_CAP2_NO_PRESCAN_POWERUP (1 << 14) /* Don't power up before scan */ @@ -405,6 +426,8 @@ struct mmc_host { #endif #define MMC_CAP2_ALT_GPT_TEGRA (1 << 28) /* Host with eMMC that has GPT entry at a non-standard location */ + struct sd_uhs2_caps uhs2_caps; /* Host UHS-II capabilities */ + int fixed_drv_type; /* fixed driver type for non-removable media */ mmc_pm_flag_t pm_caps; /* supported pm features */ -- cgit v1.2.3 From 153196d550c747367bdbec5cd545a572c5310451 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 13 Sep 2024 18:28:17 +0800 Subject: mmc: core: Announce successful insertion of an SD UHS-II card To inform the users about SD UHS-II cards, let's extend the print at card insertion with a "UHS-II" substring. Within this change, it seems reasonable to convert from using "ultra high speed" into "UHS-I speed", for the UHS-I type, as it should makes it more clear. Note that, the new print for UHS-II cards doesn't include the actual selected speed mode. Instead, this is going to be added from subsequent change. Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20240913102836.6144-4-victorshihgli@gmail.com --- drivers/mmc/core/bus.c | 4 +++- include/linux/mmc/host.h | 8 ++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c index 1470f78acc6f..9283b28bc69f 100644 --- a/drivers/mmc/core/bus.c +++ b/drivers/mmc/core/bus.c @@ -346,7 +346,9 @@ int mmc_add_card(struct mmc_card *card) if (mmc_card_hs(card)) speed_mode = "high speed "; else if (mmc_card_uhs(card)) - speed_mode = "ultra high speed "; + speed_mode = "UHS-I speed "; + else if (mmc_card_uhs2(card->host)) + speed_mode = "UHS-II speed "; else if (mmc_card_ddr52(card)) speed_mode = "high speed DDR "; else if (mmc_card_hs200(card)) diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index e1d380de3aaa..9a148280bf42 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -638,6 +638,14 @@ static inline int mmc_card_uhs(struct mmc_card *card) card->host->ios.timing <= MMC_TIMING_UHS_DDR50; } +static inline bool mmc_card_uhs2(struct mmc_host *host) +{ + return host->ios.timing == MMC_TIMING_UHS2_SPEED_A || + host->ios.timing == MMC_TIMING_UHS2_SPEED_A_HD || + host->ios.timing == MMC_TIMING_UHS2_SPEED_B || + host->ios.timing == MMC_TIMING_UHS2_SPEED_B_HD; +} + void mmc_retune_timer_stop(struct mmc_host *host); static inline void mmc_retune_needed(struct mmc_host *host) -- cgit v1.2.3 From a56ffd3a83ed2e10e0d9e0b199547bfa0d206aac Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 13 Sep 2024 18:28:18 +0800 Subject: mmc: core: Extend support for mmc regulators with a vqmmc2 To allow an additional external regulator to be controlled by an mmc host driver, let's add support for a vqmmc2 regulator to the mmc core. For an SD UHS-II interface the vqmmc2 regulator may correspond to the so called vdd2 supply, as described by the SD spec. Initially, only 1.8V is needed, hence limit the new helper function, mmc_regulator_set_vqmmc2() to this too. Note that, to allow for flexibility mmc host drivers need to manage the enable/disable of the vqmmc2 regulator themselves, while the regulator is looked up through the common mmc_regulator_get_supply(). Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20240913102836.6144-5-victorshihgli@gmail.com --- drivers/mmc/core/regulator.c | 34 ++++++++++++++++++++++++++++++++++ include/linux/mmc/host.h | 11 +++++++++++ 2 files changed, 45 insertions(+) (limited to 'include') diff --git a/drivers/mmc/core/regulator.c b/drivers/mmc/core/regulator.c index 01747ab1024e..3dae2e9b7978 100644 --- a/drivers/mmc/core/regulator.c +++ b/drivers/mmc/core/regulator.c @@ -226,6 +226,33 @@ int mmc_regulator_set_vqmmc(struct mmc_host *mmc, struct mmc_ios *ios) } EXPORT_SYMBOL_GPL(mmc_regulator_set_vqmmc); +/** + * mmc_regulator_set_vqmmc2 - Set vqmmc2 as per the ios->vqmmc2_voltage + * @mmc: The mmc host to regulate + * @ios: The io bus settings + * + * Sets a new voltage level for the vqmmc2 regulator, which may correspond to + * the vdd2 regulator for an SD UHS-II interface. This function is expected to + * be called by mmc host drivers. + * + * Returns a negative error code on failure, zero if the voltage level was + * changed successfully or a positive value if the level didn't need to change. + */ +int mmc_regulator_set_vqmmc2(struct mmc_host *mmc, struct mmc_ios *ios) +{ + if (IS_ERR(mmc->supply.vqmmc2)) + return -EINVAL; + + switch (ios->vqmmc2_voltage) { + case MMC_VQMMC2_VOLTAGE_180: + return mmc_regulator_set_voltage_if_supported( + mmc->supply.vqmmc2, 1700000, 1800000, 1950000); + default: + return -EINVAL; + } +} +EXPORT_SYMBOL_GPL(mmc_regulator_set_vqmmc2); + #else static inline int mmc_regulator_get_ocrmask(struct regulator *supply) @@ -252,6 +279,7 @@ int mmc_regulator_get_supply(struct mmc_host *mmc) mmc->supply.vmmc = devm_regulator_get_optional(dev, "vmmc"); mmc->supply.vqmmc = devm_regulator_get_optional(dev, "vqmmc"); + mmc->supply.vqmmc2 = devm_regulator_get_optional(dev, "vqmmc2"); if (IS_ERR(mmc->supply.vmmc)) { if (PTR_ERR(mmc->supply.vmmc) == -EPROBE_DEFER) @@ -275,6 +303,12 @@ int mmc_regulator_get_supply(struct mmc_host *mmc) dev_dbg(dev, "No vqmmc regulator found\n"); } + if (IS_ERR(mmc->supply.vqmmc2)) { + if (PTR_ERR(mmc->supply.vqmmc2) == -EPROBE_DEFER) + return -EPROBE_DEFER; + dev_dbg(dev, "No vqmmc2 regulator found\n"); + } + return 0; } EXPORT_SYMBOL_GPL(mmc_regulator_get_supply); diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 9a148280bf42..a7f790c75bc8 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -75,6 +75,9 @@ struct mmc_ios { #define MMC_SIGNAL_VOLTAGE_180 1 #define MMC_SIGNAL_VOLTAGE_120 2 + unsigned char vqmmc2_voltage; +#define MMC_VQMMC2_VOLTAGE_180 0 + unsigned char drv_type; /* driver type (A, B, C, D) */ #define MMC_SET_DRIVER_TYPE_B 0 @@ -308,6 +311,7 @@ struct mmc_pwrseq; struct mmc_supply { struct regulator *vmmc; /* Card power supply */ struct regulator *vqmmc; /* Optional Vccq supply */ + struct regulator *vqmmc2; /* Optional supply for phy */ }; struct mmc_ctx { @@ -590,6 +594,7 @@ int mmc_regulator_set_ocr(struct mmc_host *mmc, struct regulator *supply, unsigned short vdd_bit); int mmc_regulator_set_vqmmc(struct mmc_host *mmc, struct mmc_ios *ios); +int mmc_regulator_set_vqmmc2(struct mmc_host *mmc, struct mmc_ios *ios); #else static inline int mmc_regulator_set_ocr(struct mmc_host *mmc, struct regulator *supply, @@ -603,6 +608,12 @@ static inline int mmc_regulator_set_vqmmc(struct mmc_host *mmc, { return -EINVAL; } + +static inline int mmc_regulator_set_vqmmc2(struct mmc_host *mmc, + struct mmc_ios *ios) +{ + return -EINVAL; +} #endif int mmc_regulator_get_supply(struct mmc_host *mmc); -- cgit v1.2.3 From a9a75f9dc23c1562dcb261c0a8f3d6fc70d246cd Mon Sep 17 00:00:00 2001 From: Victor Shih Date: Fri, 13 Sep 2024 18:28:19 +0800 Subject: mmc: core: Add definitions for SD UHS-II cards Add UHS-II specific data structures for commands and defines for registers, as described in Part 1 UHS-II Addendum Version 1.01. UHS-II related definitions are listed below: 1. UHS-II card capability: sd_uhs2_caps{} 2. UHS-II configuration: sd_uhs2_config{} 3. UHS-II register I/O address and register field definitions: sd_uhs2.h Signed-off-by: Jason Lai Signed-off-by: Victor Shih Link: https://lore.kernel.org/r/20240913102836.6144-6-victorshihgli@gmail.com Signed-off-by: Ulf Hansson --- include/linux/mmc/card.h | 31 +++++- include/linux/mmc/host.h | 25 ++++- include/linux/mmc/sd_uhs2.h | 240 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 294 insertions(+), 2 deletions(-) create mode 100644 include/linux/mmc/sd_uhs2.h (limited to 'include') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 0bb1ad1ea4dc..526fce581657 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -188,6 +188,12 @@ struct sd_switch_caps { #define SD_MAX_CURRENT_400 (1 << SD_SET_CURRENT_LIMIT_400) #define SD_MAX_CURRENT_600 (1 << SD_SET_CURRENT_LIMIT_600) #define SD_MAX_CURRENT_800 (1 << SD_SET_CURRENT_LIMIT_800) + +#define SD4_SET_POWER_LIMIT_0_72W 0 +#define SD4_SET_POWER_LIMIT_1_44W 1 +#define SD4_SET_POWER_LIMIT_2_16W 2 +#define SD4_SET_POWER_LIMIT_2_88W 3 +#define SD4_SET_POWER_LIMIT_1_80W 4 }; struct sd_ext_reg { @@ -211,7 +217,30 @@ struct sd_ext_reg { struct sd_uhs2_config { u32 node_id; - /* TODO: Extend with more register configs. */ + + u32 n_fcu; + u32 maxblk_len; + u8 n_lanes; + u8 dadr_len; + u8 app_type; + u8 phy_minor_rev; + u8 phy_major_rev; + u8 can_hibernate; + u8 n_lss_sync; + u8 n_lss_dir; + u8 link_minor_rev; + u8 link_major_rev; + u8 dev_type; + u8 n_data_gap; + + u32 n_fcu_set; + u32 maxblk_len_set; + u8 n_lanes_set; + u8 speed_range_set; + u8 n_lss_sync_set; + u8 n_lss_dir_set; + u8 n_data_gap_set; + u8 max_retry_set; }; struct sdio_cccr { diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index a7f790c75bc8..0980d06ed419 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -17,6 +17,7 @@ #include #include #include +#include struct mmc_ios { unsigned int clock; /* clock rate */ @@ -100,7 +101,29 @@ struct mmc_clk_phase_map { }; struct sd_uhs2_caps { - /* TODO: Add UHS-II capabilities for the host. */ + u32 dap; + u32 gap; + u32 group_desc; + u32 maxblk_len; + u32 n_fcu; + u8 n_lanes; + u8 addr64; + u8 card_type; + u8 phy_rev; + u8 speed_range; + u8 n_lss_sync; + u8 n_lss_dir; + u8 link_rev; + u8 host_type; + u8 n_data_gap; + + u32 maxblk_len_set; + u32 n_fcu_set; + u8 n_lanes_set; + u8 n_lss_sync_set; + u8 n_lss_dir_set; + u8 n_data_gap_set; + u8 max_retry_set; }; enum sd_uhs2_operation { diff --git a/include/linux/mmc/sd_uhs2.h b/include/linux/mmc/sd_uhs2.h new file mode 100644 index 000000000000..7abe9bd870c7 --- /dev/null +++ b/include/linux/mmc/sd_uhs2.h @@ -0,0 +1,240 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Header file for UHS-II packets, Host Controller registers and I/O + * accessors. + * + * Copyright (C) 2014 Intel Corp, All Rights Reserved. + */ +#ifndef LINUX_MMC_UHS2_H +#define LINUX_MMC_UHS2_H + +/* LINK Layer definition */ +/* + * UHS2 Header: + * Refer to UHS-II Addendum Version 1.02 Figure 5-2, the format of CCMD Header is described below: + * bit [3:0] : DID(Destination ID = Node ID of UHS2 card) + * bit [6:4] : TYP(Packet Type) + * 000b: CCMD(Control command packet) + * 001b: DCMD(Data command packet) + * 010b: RES(Response packet) + * 011b: DATA(Data payload packet) + * 111b: MSG(Message packet) + * Others: Reserved + * bit [7] : NP(Native Packet) + * bit [10:8] : TID(Transaction ID) + * bit [11] : Reserved + * bit [15:12]: SID(Source ID 0: Node ID of Host) + * + * Broadcast CCMD issued by Host is represented as DID=SID=0. + */ +/* + * UHS2 Argument: + * Refer to UHS-II Addendum Version 1.02 Figure 6-5, the format of CCMD Argument is described below: + * bit [3:0] : MSB of IOADR + * bit [5:4] : PLEN(Payload Length) + * 00b: 0 byte + * 01b: 4 bytes + * 10b: 8 bytes + * 11b: 16 bytes + * bit [6] : Reserved + * bit [7] : R/W(Read/Write) + * 0: Control read command + * 1: Control write command + * bit [15:8] : LSB of IOADR + * + * I/O Address specifies the address of register in UHS-II I/O space accessed by CCMD. + * The unit of I/O Address is 4 Bytes. It is transmitted in MSB first, LSB last. + */ +#define UHS2_NATIVE_PACKET_POS 7 +#define UHS2_NATIVE_PACKET (1 << UHS2_NATIVE_PACKET_POS) + +#define UHS2_PACKET_TYPE_POS 4 +#define UHS2_PACKET_TYPE_CCMD (0 << UHS2_PACKET_TYPE_POS) +#define UHS2_PACKET_TYPE_DCMD (1 << UHS2_PACKET_TYPE_POS) +#define UHS2_PACKET_TYPE_RES (2 << UHS2_PACKET_TYPE_POS) +#define UHS2_PACKET_TYPE_DATA (3 << UHS2_PACKET_TYPE_POS) +#define UHS2_PACKET_TYPE_MSG (7 << UHS2_PACKET_TYPE_POS) + +#define UHS2_DEST_ID_MASK 0x0F +#define UHS2_DEST_ID 0x1 + +#define UHS2_SRC_ID_POS 12 +#define UHS2_SRC_ID_MASK 0xF000 + +#define UHS2_TRANS_ID_POS 8 +#define UHS2_TRANS_ID_MASK 0x0700 + +/* UHS2 MSG */ +#define UHS2_MSG_CTG_POS 5 +#define UHS2_MSG_CTG_LMSG 0x00 +#define UHS2_MSG_CTG_INT 0x60 +#define UHS2_MSG_CTG_AMSG 0x80 + +#define UHS2_MSG_CTG_FCREQ 0x00 +#define UHS2_MSG_CTG_FCRDY 0x01 +#define UHS2_MSG_CTG_STAT 0x02 + +#define UHS2_MSG_CODE_POS 8 +#define UHS2_MSG_CODE_FC_UNRECOVER_ERR 0x8 +#define UHS2_MSG_CODE_STAT_UNRECOVER_ERR 0x8 +#define UHS2_MSG_CODE_STAT_RECOVER_ERR 0x1 + +/* TRANS Layer definition */ + +/* Native packets*/ +#define UHS2_NATIVE_CMD_RW_POS 7 +#define UHS2_NATIVE_CMD_WRITE (1 << UHS2_NATIVE_CMD_RW_POS) +#define UHS2_NATIVE_CMD_READ (0 << UHS2_NATIVE_CMD_RW_POS) + +#define UHS2_NATIVE_CMD_PLEN_POS 4 +#define UHS2_NATIVE_CMD_PLEN_4B (1 << UHS2_NATIVE_CMD_PLEN_POS) +#define UHS2_NATIVE_CMD_PLEN_8B (2 << UHS2_NATIVE_CMD_PLEN_POS) +#define UHS2_NATIVE_CMD_PLEN_16B (3 << UHS2_NATIVE_CMD_PLEN_POS) + +#define UHS2_NATIVE_CCMD_GET_MIOADR_MASK 0xF00 +#define UHS2_NATIVE_CCMD_MIOADR_MASK 0x0F + +#define UHS2_NATIVE_CCMD_LIOADR_POS 8 +#define UHS2_NATIVE_CCMD_GET_LIOADR_MASK 0x0FF + +#define UHS2_CCMD_DEV_INIT_COMPLETE_FLAG BIT(11) +#define UHS2_DEV_INIT_PAYLOAD_LEN 1 +#define UHS2_DEV_INIT_RESP_LEN 6 +#define UHS2_DEV_ENUM_PAYLOAD_LEN 1 +#define UHS2_DEV_ENUM_RESP_LEN 8 +#define UHS2_CFG_WRITE_PAYLOAD_LEN 2 +#define UHS2_CFG_WRITE_PHY_SET_RESP_LEN 4 +#define UHS2_CFG_WRITE_GENERIC_SET_RESP_LEN 5 +#define UHS2_GO_DORMANT_PAYLOAD_LEN 1 + +/* + * UHS2 Argument: + * Refer to UHS-II Addendum Version 1.02 Figure 6-8, the format of DCMD Argument is described below: + * bit [3:0] : Reserved + * bit [6:3] : TMODE(Transfer Mode) + * bit 3: DAM(Data Access Mode) + * bit 4: TLUM(TLEN Unit Mode) + * bit 5: LM(Length Mode) + * bit 6: DM(Duplex Mode) + * bit [7] : R/W(Read/Write) + * 0: Control read command + * 1: Control write command + * bit [15:8] : Reserved + * + * I/O Address specifies the address of register in UHS-II I/O space accessed by CCMD. + * The unit of I/O Address is 4 Bytes. It is transmitted in MSB first, LSB last. + */ +#define UHS2_DCMD_DM_POS 6 +#define UHS2_DCMD_2L_HD_MODE (1 << UHS2_DCMD_DM_POS) +#define UHS2_DCMD_LM_POS 5 +#define UHS2_DCMD_LM_TLEN_EXIST (1 << UHS2_DCMD_LM_POS) +#define UHS2_DCMD_TLUM_POS 4 +#define UHS2_DCMD_TLUM_BYTE_MODE (1 << UHS2_DCMD_TLUM_POS) +#define UHS2_NATIVE_DCMD_DAM_POS 3 +#define UHS2_NATIVE_DCMD_DAM_IO (1 << UHS2_NATIVE_DCMD_DAM_POS) + +#define UHS2_RES_NACK_POS 7 +#define UHS2_RES_NACK_MASK (0x1 << UHS2_RES_NACK_POS) + +#define UHS2_RES_ECODE_POS 4 +#define UHS2_RES_ECODE_MASK 0x7 +#define UHS2_RES_ECODE_COND 1 +#define UHS2_RES_ECODE_ARG 2 +#define UHS2_RES_ECODE_GEN 3 + +/* IOADR of device registers */ +#define UHS2_IOADR_GENERIC_CAPS 0x00 +#define UHS2_IOADR_PHY_CAPS 0x02 +#define UHS2_IOADR_LINK_CAPS 0x04 +#define UHS2_IOADR_RSV_CAPS 0x06 +#define UHS2_IOADR_GENERIC_SETTINGS 0x08 +#define UHS2_IOADR_PHY_SETTINGS 0x0A +#define UHS2_IOADR_LINK_SETTINGS 0x0C +#define UHS2_IOADR_PRESET 0x40 + +/* SD application packets */ +#define UHS2_SD_CMD_INDEX_POS 8 + +#define UHS2_SD_CMD_APP_POS 14 +#define UHS2_SD_CMD_APP (1 << UHS2_SD_CMD_APP_POS) + +/* UHS-II Device Registers */ +#define UHS2_DEV_CONFIG_REG 0x000 + +/* General Caps and Settings registers */ +#define UHS2_DEV_CONFIG_GEN_CAPS (UHS2_DEV_CONFIG_REG + 0x000) +#define UHS2_DEV_CONFIG_N_LANES_POS 8 +#define UHS2_DEV_CONFIG_N_LANES_MASK 0x3F +#define UHS2_DEV_CONFIG_2L_HD_FD 0x1 +#define UHS2_DEV_CONFIG_2D1U_FD 0x2 +#define UHS2_DEV_CONFIG_1D2U_FD 0x4 +#define UHS2_DEV_CONFIG_2D2U_FD 0x8 +#define UHS2_DEV_CONFIG_DADR_POS 14 +#define UHS2_DEV_CONFIG_DADR_MASK 0x1 +#define UHS2_DEV_CONFIG_APP_POS 16 +#define UHS2_DEV_CONFIG_APP_MASK 0xFF +#define UHS2_DEV_CONFIG_APP_SD_MEM 0x1 + +#define UHS2_DEV_CONFIG_GEN_SET (UHS2_DEV_CONFIG_REG + 0x008) +#define UHS2_DEV_CONFIG_GEN_SET_N_LANES_POS 8 +#define UHS2_DEV_CONFIG_GEN_SET_2L_FD_HD 0x0 +#define UHS2_DEV_CONFIG_GEN_SET_2D1U_FD 0x2 +#define UHS2_DEV_CONFIG_GEN_SET_1D2U_FD 0x3 +#define UHS2_DEV_CONFIG_GEN_SET_2D2U_FD 0x4 +#define UHS2_DEV_CONFIG_GEN_SET_CFG_COMPLETE BIT(31) + +/* PHY Caps and Settings registers */ +#define UHS2_DEV_CONFIG_PHY_CAPS (UHS2_DEV_CONFIG_REG + 0x002) +#define UHS2_DEV_CONFIG_PHY_MINOR_MASK 0xF +#define UHS2_DEV_CONFIG_PHY_MAJOR_POS 4 +#define UHS2_DEV_CONFIG_PHY_MAJOR_MASK 0x3 +#define UHS2_DEV_CONFIG_CAN_HIBER_POS 15 +#define UHS2_DEV_CONFIG_CAN_HIBER_MASK 0x1 +#define UHS2_DEV_CONFIG_PHY_CAPS1 (UHS2_DEV_CONFIG_REG + 0x003) +#define UHS2_DEV_CONFIG_N_LSS_SYN_MASK 0xF +#define UHS2_DEV_CONFIG_N_LSS_DIR_POS 4 +#define UHS2_DEV_CONFIG_N_LSS_DIR_MASK 0xF + +#define UHS2_DEV_CONFIG_PHY_SET (UHS2_DEV_CONFIG_REG + 0x00A) +#define UHS2_DEV_CONFIG_PHY_SET_SPEED_POS 6 +#define UHS2_DEV_CONFIG_PHY_SET_SPEED_A 0x0 +#define UHS2_DEV_CONFIG_PHY_SET_SPEED_B 0x1 + +/* LINK-TRAN Caps and Settings registers */ +#define UHS2_DEV_CONFIG_LINK_TRAN_CAPS (UHS2_DEV_CONFIG_REG + 0x004) +#define UHS2_DEV_CONFIG_LT_MINOR_MASK 0xF +#define UHS2_DEV_CONFIG_LT_MAJOR_POS 4 +#define UHS2_DEV_CONFIG_LT_MAJOR_MASK 0x3 +#define UHS2_DEV_CONFIG_N_FCU_POS 8 +#define UHS2_DEV_CONFIG_N_FCU_MASK 0xFF +#define UHS2_DEV_CONFIG_DEV_TYPE_POS 16 +#define UHS2_DEV_CONFIG_DEV_TYPE_MASK 0x7 +#define UHS2_DEV_CONFIG_MAX_BLK_LEN_POS 20 +#define UHS2_DEV_CONFIG_MAX_BLK_LEN_MASK 0xFFF +#define UHS2_DEV_CONFIG_LINK_TRAN_CAPS1 (UHS2_DEV_CONFIG_REG + 0x005) +#define UHS2_DEV_CONFIG_N_DATA_GAP_MASK 0xFF + +#define UHS2_DEV_CONFIG_LINK_TRAN_SET (UHS2_DEV_CONFIG_REG + 0x00C) +#define UHS2_DEV_CONFIG_LT_SET_MAX_BLK_LEN 0x200 +#define UHS2_DEV_CONFIG_LT_SET_MAX_RETRY_POS 16 + +/* Preset register */ +#define UHS2_DEV_CONFIG_PRESET (UHS2_DEV_CONFIG_REG + 0x040) + +#define UHS2_DEV_INT_REG 0x100 + +#define UHS2_DEV_STATUS_REG 0x180 + +#define UHS2_DEV_CMD_REG 0x200 +#define UHS2_DEV_CMD_FULL_RESET (UHS2_DEV_CMD_REG + 0x000) +#define UHS2_DEV_CMD_GO_DORMANT_STATE (UHS2_DEV_CMD_REG + 0x001) +#define UHS2_DEV_CMD_DORMANT_HIBER BIT(7) +#define UHS2_DEV_CMD_DEVICE_INIT (UHS2_DEV_CMD_REG + 0x002) +#define UHS2_DEV_INIT_COMPLETE_FLAG BIT(11) +#define UHS2_DEV_CMD_ENUMERATE (UHS2_DEV_CMD_REG + 0x003) +#define UHS2_DEV_CMD_TRANS_ABORT (UHS2_DEV_CMD_REG + 0x004) + +#define UHS2_RCLK_MAX 52000000 +#define UHS2_RCLK_MIN 26000000 + +#endif /* LINUX_MMC_UHS2_H */ -- cgit v1.2.3 From 5bb798cfdfd00182065cf55c1ff4b2c08d3be13f Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 28 Sep 2024 18:20:56 +0200 Subject: memstick: Constify struct memstick_device_id 'struct memstick_device_id' are not modified in these drivers. Constifying this structure moves some data to a read-only section, so increases overall security. Update memstick_dev_match(), memstick_bus_match() and struct memstick_driver accordingly. On a x86_64, with allmodconfig, as an example: Before: ====== text data bss dec hex filename 74055 3455 88 77598 12f1e drivers/memstick/core/ms_block.o After: ===== text data bss dec hex filename 74087 3423 88 77598 12f1e drivers/memstick/core/ms_block.o Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/6509d6f6ed64193f04e747a98ccea7492c976ca8.1727540434.git.christophe.jaillet@wanadoo.fr Signed-off-by: Ulf Hansson --- drivers/memstick/core/memstick.c | 4 ++-- drivers/memstick/core/ms_block.c | 2 +- drivers/memstick/core/mspro_block.c | 2 +- include/linux/memstick.h | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c index 9a3a784054cc..ae4e8b8e6eb7 100644 --- a/drivers/memstick/core/memstick.c +++ b/drivers/memstick/core/memstick.c @@ -26,7 +26,7 @@ static DEFINE_IDR(memstick_host_idr); static DEFINE_SPINLOCK(memstick_host_lock); static int memstick_dev_match(struct memstick_dev *card, - struct memstick_device_id *id) + const struct memstick_device_id *id) { if (id->match_flags & MEMSTICK_MATCH_ALL) { if ((id->type == card->id.type) @@ -44,7 +44,7 @@ static int memstick_bus_match(struct device *dev, const struct device_driver *dr dev); const struct memstick_driver *ms_drv = container_of_const(drv, struct memstick_driver, driver); - struct memstick_device_id *ids = ms_drv->id_table; + const struct memstick_device_id *ids = ms_drv->id_table; if (ids) { while (ids->match_flags) { diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c index c0383959dbb2..20a2466bec23 100644 --- a/drivers/memstick/core/ms_block.c +++ b/drivers/memstick/core/ms_block.c @@ -2279,7 +2279,7 @@ out: #endif /* CONFIG_PM */ -static struct memstick_device_id msb_id_tbl[] = { +static const struct memstick_device_id msb_id_tbl[] = { {MEMSTICK_MATCH_ALL, MEMSTICK_TYPE_LEGACY, MEMSTICK_CATEGORY_STORAGE, MEMSTICK_CLASS_FLASH}, diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index 49accfdc89d6..13b317c56069 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -1349,7 +1349,7 @@ out_unlock: #endif /* CONFIG_PM */ -static struct memstick_device_id mspro_block_id_tbl[] = { +static const struct memstick_device_id mspro_block_id_tbl[] = { {MEMSTICK_MATCH_ALL, MEMSTICK_TYPE_PRO, MEMSTICK_CATEGORY_STORAGE_DUO, MEMSTICK_CLASS_DUO}, {} diff --git a/include/linux/memstick.h b/include/linux/memstick.h index ebf73d4ee969..107bdcbedf79 100644 --- a/include/linux/memstick.h +++ b/include/linux/memstick.h @@ -293,7 +293,7 @@ struct memstick_host { }; struct memstick_driver { - struct memstick_device_id *id_table; + const struct memstick_device_id *id_table; int (*probe)(struct memstick_dev *card); void (*remove)(struct memstick_dev *card); int (*suspend)(struct memstick_dev *card, -- cgit v1.2.3 From 7e019dcc470f27066c98697e43d930df8d54bd9c Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Wed, 9 Oct 2024 09:50:07 -0400 Subject: sched: Improve cache locality of RSEQ concurrency IDs for intermittent workloads commit 223baf9d17f25 ("sched: Fix performance regression introduced by mm_cid") introduced a per-mm/cpu current concurrency id (mm_cid), which keeps a reference to the concurrency id allocated for each CPU. This reference expires shortly after a 100ms delay. These per-CPU references keep the per-mm-cid data cache-local in situations where threads are running at least once on each CPU within each 100ms window, thus keeping the per-cpu reference alive. However, intermittent workloads behaving in bursts spaced by more than 100ms on each CPU exhibit bad cache locality and degraded performance compared to purely per-cpu data indexing, because concurrency IDs are allocated over various CPUs and cores, therefore losing cache locality of the associated data. Introduce the following changes to improve per-mm-cid cache locality: - Add a "recent_cid" field to the per-mm/cpu mm_cid structure to keep track of which mm_cid value was last used, and use it as a hint to attempt re-allocating the same concurrency ID the next time this mm/cpu needs to allocate a concurrency ID, - Add a per-mm CPUs allowed mask, which keeps track of the union of CPUs allowed for all threads belonging to this mm. This cpumask is only set during the lifetime of the mm, never cleared, so it represents the union of all the CPUs allowed since the beginning of the mm lifetime (note that the mm_cpumask() is really arch-specific and tailored to the TLB flush needs, and is thus _not_ a viable approach for this), - Add a per-mm nr_cpus_allowed to keep track of the weight of the per-mm CPUs allowed mask (for fast access), - Add a per-mm max_nr_cid to keep track of the highest number of concurrency IDs allocated for the mm. This is used for expanding the concurrency ID allocation within the upper bound defined by: min(mm->nr_cpus_allowed, mm->mm_users) When the next unused CID value reaches this threshold, stop trying to expand the cid allocation and use the first available cid value instead. Spreading allocation to use all the cid values within the range [ 0, min(mm->nr_cpus_allowed, mm->mm_users) - 1 ] improves cache locality while preserving mm_cid compactness within the expected user limits, - In __mm_cid_try_get, only return cid values within the range [ 0, mm->nr_cpus_allowed ] rather than [ 0, nr_cpu_ids ]. This prevents allocating cids above the number of allowed cpus in rare scenarios where cid allocation races with a concurrent remote-clear of the per-mm/cpu cid. This improvement is made possible by the addition of the per-mm CPUs allowed mask, - In sched_mm_cid_migrate_to, use mm->nr_cpus_allowed rather than t->nr_cpus_allowed. This criterion was really meant to compare the number of mm->mm_users to the number of CPUs allowed for the entire mm. Therefore, the prior comparison worked fine when all threads shared the same CPUs allowed mask, but not so much in scenarios where those threads have different masks (e.g. each thread pinned to a single CPU). This improvement is made possible by the addition of the per-mm CPUs allowed mask. * Benchmarks Each thread increments 16kB worth of 8-bit integers in bursts, with a configurable delay between each thread's execution. Each thread run one after the other (no threads run concurrently). The order of thread execution in the sequence is random. The thread execution sequence begins again after all threads have executed. The 16kB areas are allocated with rseq_mempool and indexed by either cpu_id, mm_cid (not cache-local), or cache-local mm_cid. Each thread is pinned to its own core. Testing configurations: 8-core/1-L3: Use 8 cores within a single L3 24-core/24-L3: Use 24 cores, 1 core per L3 192-core/24-L3: Use 192 cores (all cores in the system) 384-thread/24-L3: Use 384 HW threads (all HW threads in the system) Intermittent workload delays between threads: 200ms, 10ms. Hardware: CPU(s): 384 On-line CPU(s) list: 0-383 Vendor ID: AuthenticAMD Model name: AMD EPYC 9654 96-Core Processor Thread(s) per core: 2 Core(s) per socket: 96 Socket(s): 2 Caches (sum of all): L1d: 6 MiB (192 instances) L1i: 6 MiB (192 instances) L2: 192 MiB (192 instances) L3: 768 MiB (24 instances) Each result is an average of 5 test runs. The cache-local speedup is calculated as: (cache-local mm_cid) / (mm_cid). Intermittent workload delay: 200ms per-cpu mm_cid cache-local mm_cid cache-local speedup (ns) (ns) (ns) 8-core/1-L3 1374 19289 1336 14.4x 24-core/24-L3 2423 26721 1594 16.7x 192-core/24-L3 2291 15826 2153 7.3x 384-thread/24-L3 1874 13234 1907 6.9x Intermittent workload delay: 10ms per-cpu mm_cid cache-local mm_cid cache-local speedup (ns) (ns) (ns) 8-core/1-L3 662 756 686 1.1x 24-core/24-L3 1378 3648 1035 3.5x 192-core/24-L3 1439 10833 1482 7.3x 384-thread/24-L3 1503 10570 1556 6.8x [ This deprecates the prior "sched: NUMA-aware per-memory-map concurrency IDs" patch series with a simpler and more general approach. ] [ This patch applies on top of v6.12-rc1. ] Signed-off-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra (Intel) Acked-by: Marco Elver Link: https://lore.kernel.org/lkml/20240823185946.418340-1-mathieu.desnoyers@efficios.com/ --- fs/exec.c | 2 +- include/linux/mm_types.h | 72 ++++++++++++++++++++++++++++++++++++++++++------ kernel/fork.c | 2 +- kernel/sched/core.c | 22 +++++++++------ kernel/sched/sched.h | 48 ++++++++++++++++++++++---------- 5 files changed, 112 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/fs/exec.c b/fs/exec.c index 6c53920795c2..aaa605529a75 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -990,7 +990,7 @@ static int exec_mmap(struct mm_struct *mm) active_mm = tsk->active_mm; tsk->active_mm = mm; tsk->mm = mm; - mm_init_cid(mm); + mm_init_cid(mm, tsk); /* * This prevents preemption while active_mm is being loaded and * it and mm are being updated, which could cause problems for diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 6e3bdf8e38bc..381d22eba088 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -782,6 +782,7 @@ struct vm_area_struct { struct mm_cid { u64 time; int cid; + int recent_cid; }; #endif @@ -852,6 +853,27 @@ struct mm_struct { * When the next mm_cid scan is due (in jiffies). */ unsigned long mm_cid_next_scan; + /** + * @nr_cpus_allowed: Number of CPUs allowed for mm. + * + * Number of CPUs allowed in the union of all mm's + * threads allowed CPUs. + */ + unsigned int nr_cpus_allowed; + /** + * @max_nr_cid: Maximum number of concurrency IDs allocated. + * + * Track the highest number of concurrency IDs allocated for the + * mm. + */ + atomic_t max_nr_cid; + /** + * @cpus_allowed_lock: Lock protecting mm cpus_allowed. + * + * Provide mutual exclusion for mm cpus_allowed and + * mm nr_cpus_allowed updates. + */ + raw_spinlock_t cpus_allowed_lock; #endif #ifdef CONFIG_MMU atomic_long_t pgtables_bytes; /* size of all page tables */ @@ -1170,18 +1192,30 @@ static inline int mm_cid_clear_lazy_put(int cid) return cid & ~MM_CID_LAZY_PUT; } +/* + * mm_cpus_allowed: Union of all mm's threads allowed CPUs. + */ +static inline cpumask_t *mm_cpus_allowed(struct mm_struct *mm) +{ + unsigned long bitmap = (unsigned long)mm; + + bitmap += offsetof(struct mm_struct, cpu_bitmap); + /* Skip cpu_bitmap */ + bitmap += cpumask_size(); + return (struct cpumask *)bitmap; +} + /* Accessor for struct mm_struct's cidmask. */ static inline cpumask_t *mm_cidmask(struct mm_struct *mm) { - unsigned long cid_bitmap = (unsigned long)mm; + unsigned long cid_bitmap = (unsigned long)mm_cpus_allowed(mm); - cid_bitmap += offsetof(struct mm_struct, cpu_bitmap); - /* Skip cpu_bitmap */ + /* Skip mm_cpus_allowed */ cid_bitmap += cpumask_size(); return (struct cpumask *)cid_bitmap; } -static inline void mm_init_cid(struct mm_struct *mm) +static inline void mm_init_cid(struct mm_struct *mm, struct task_struct *p) { int i; @@ -1189,17 +1223,22 @@ static inline void mm_init_cid(struct mm_struct *mm) struct mm_cid *pcpu_cid = per_cpu_ptr(mm->pcpu_cid, i); pcpu_cid->cid = MM_CID_UNSET; + pcpu_cid->recent_cid = MM_CID_UNSET; pcpu_cid->time = 0; } + mm->nr_cpus_allowed = p->nr_cpus_allowed; + atomic_set(&mm->max_nr_cid, 0); + raw_spin_lock_init(&mm->cpus_allowed_lock); + cpumask_copy(mm_cpus_allowed(mm), &p->cpus_mask); cpumask_clear(mm_cidmask(mm)); } -static inline int mm_alloc_cid_noprof(struct mm_struct *mm) +static inline int mm_alloc_cid_noprof(struct mm_struct *mm, struct task_struct *p) { mm->pcpu_cid = alloc_percpu_noprof(struct mm_cid); if (!mm->pcpu_cid) return -ENOMEM; - mm_init_cid(mm); + mm_init_cid(mm, p); return 0; } #define mm_alloc_cid(...) alloc_hooks(mm_alloc_cid_noprof(__VA_ARGS__)) @@ -1212,16 +1251,31 @@ static inline void mm_destroy_cid(struct mm_struct *mm) static inline unsigned int mm_cid_size(void) { - return cpumask_size(); + return 2 * cpumask_size(); /* mm_cpus_allowed(), mm_cidmask(). */ +} + +static inline void mm_set_cpus_allowed(struct mm_struct *mm, const struct cpumask *cpumask) +{ + struct cpumask *mm_allowed = mm_cpus_allowed(mm); + + if (!mm) + return; + /* The mm_cpus_allowed is the union of each thread allowed CPUs masks. */ + raw_spin_lock(&mm->cpus_allowed_lock); + cpumask_or(mm_allowed, mm_allowed, cpumask); + WRITE_ONCE(mm->nr_cpus_allowed, cpumask_weight(mm_allowed)); + raw_spin_unlock(&mm->cpus_allowed_lock); } #else /* CONFIG_SCHED_MM_CID */ -static inline void mm_init_cid(struct mm_struct *mm) { } -static inline int mm_alloc_cid(struct mm_struct *mm) { return 0; } +static inline void mm_init_cid(struct mm_struct *mm, struct task_struct *p) { } +static inline int mm_alloc_cid(struct mm_struct *mm, struct task_struct *p) { return 0; } static inline void mm_destroy_cid(struct mm_struct *mm) { } + static inline unsigned int mm_cid_size(void) { return 0; } +static inline void mm_set_cpus_allowed(struct mm_struct *mm, const struct cpumask *cpumask) { } #endif /* CONFIG_SCHED_MM_CID */ struct mmu_gather; diff --git a/kernel/fork.c b/kernel/fork.c index 89ceb4a68af2..7d950e93f080 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1298,7 +1298,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, if (init_new_context(p, mm)) goto fail_nocontext; - if (mm_alloc_cid(mm)) + if (mm_alloc_cid(mm, p)) goto fail_cid; if (percpu_counter_init_many(mm->rss_stat, 0, GFP_KERNEL_ACCOUNT, diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 7db711ba6d12..f5ec452e2c5e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2696,6 +2696,7 @@ __do_set_cpus_allowed(struct task_struct *p, struct affinity_context *ctx) put_prev_task(rq, p); p->sched_class->set_cpus_allowed(p, ctx); + mm_set_cpus_allowed(p->mm, ctx->new_mask); if (queued) enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK); @@ -10243,6 +10244,7 @@ int __sched_mm_cid_migrate_from_try_steal_cid(struct rq *src_rq, */ if (!try_cmpxchg(&src_pcpu_cid->cid, &lazy_cid, MM_CID_UNSET)) return -1; + WRITE_ONCE(src_pcpu_cid->recent_cid, MM_CID_UNSET); return src_cid; } @@ -10255,7 +10257,8 @@ void sched_mm_cid_migrate_to(struct rq *dst_rq, struct task_struct *t) { struct mm_cid *src_pcpu_cid, *dst_pcpu_cid; struct mm_struct *mm = t->mm; - int src_cid, dst_cid, src_cpu; + int src_cid, src_cpu; + bool dst_cid_is_set; struct rq *src_rq; lockdep_assert_rq_held(dst_rq); @@ -10272,9 +10275,9 @@ void sched_mm_cid_migrate_to(struct rq *dst_rq, struct task_struct *t) * allocation closest to 0 in cases where few threads migrate around * many CPUs. * - * If destination cid is already set, we may have to just clear - * the src cid to ensure compactness in frequent migrations - * scenarios. + * If destination cid or recent cid is already set, we may have + * to just clear the src cid to ensure compactness in frequent + * migrations scenarios. * * It is not useful to clear the src cid when the number of threads is * greater or equal to the number of allowed CPUs, because user-space @@ -10282,9 +10285,9 @@ void sched_mm_cid_migrate_to(struct rq *dst_rq, struct task_struct *t) * allowed CPUs. */ dst_pcpu_cid = per_cpu_ptr(mm->pcpu_cid, cpu_of(dst_rq)); - dst_cid = READ_ONCE(dst_pcpu_cid->cid); - if (!mm_cid_is_unset(dst_cid) && - atomic_read(&mm->mm_users) >= t->nr_cpus_allowed) + dst_cid_is_set = !mm_cid_is_unset(READ_ONCE(dst_pcpu_cid->cid)) || + !mm_cid_is_unset(READ_ONCE(dst_pcpu_cid->recent_cid)); + if (dst_cid_is_set && atomic_read(&mm->mm_users) >= READ_ONCE(mm->nr_cpus_allowed)) return; src_pcpu_cid = per_cpu_ptr(mm->pcpu_cid, src_cpu); src_rq = cpu_rq(src_cpu); @@ -10295,13 +10298,14 @@ void sched_mm_cid_migrate_to(struct rq *dst_rq, struct task_struct *t) src_cid); if (src_cid == -1) return; - if (!mm_cid_is_unset(dst_cid)) { + if (dst_cid_is_set) { __mm_cid_put(mm, src_cid); return; } /* Move src_cid to dst cpu. */ mm_cid_snapshot_time(dst_rq, mm); WRITE_ONCE(dst_pcpu_cid->cid, src_cid); + WRITE_ONCE(dst_pcpu_cid->recent_cid, src_cid); } static void sched_mm_cid_remote_clear(struct mm_struct *mm, struct mm_cid *pcpu_cid, @@ -10540,7 +10544,7 @@ void sched_mm_cid_after_execve(struct task_struct *t) * Matches barrier in sched_mm_cid_remote_clear_old(). */ smp_mb(); - t->last_mm_cid = t->mm_cid = mm_cid_get(rq, mm); + t->last_mm_cid = t->mm_cid = mm_cid_get(rq, t, mm); } rseq_set_notify_resume(t); } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index fba524c81c63..20b6e75604ec 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -3596,24 +3596,41 @@ static inline void mm_cid_put(struct mm_struct *mm) __mm_cid_put(mm, mm_cid_clear_lazy_put(cid)); } -static inline int __mm_cid_try_get(struct mm_struct *mm) +static inline int __mm_cid_try_get(struct task_struct *t, struct mm_struct *mm) { - struct cpumask *cpumask; - int cid; + struct cpumask *cidmask = mm_cidmask(mm); + struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid; + int cid = __this_cpu_read(pcpu_cid->recent_cid); - cpumask = mm_cidmask(mm); + /* Try to re-use recent cid. This improves cache locality. */ + if (!mm_cid_is_unset(cid) && !cpumask_test_and_set_cpu(cid, cidmask)) + return cid; + /* + * Expand cid allocation if the maximum number of concurrency + * IDs allocated (max_nr_cid) is below the number cpus allowed + * and number of threads. Expanding cid allocation as much as + * possible improves cache locality. + */ + cid = atomic_read(&mm->max_nr_cid); + while (cid < READ_ONCE(mm->nr_cpus_allowed) && cid < atomic_read(&mm->mm_users)) { + if (!atomic_try_cmpxchg(&mm->max_nr_cid, &cid, cid + 1)) + continue; + if (!cpumask_test_and_set_cpu(cid, cidmask)) + return cid; + } /* + * Find the first available concurrency id. * Retry finding first zero bit if the mask is temporarily * filled. This only happens during concurrent remote-clear * which owns a cid without holding a rq lock. */ for (;;) { - cid = cpumask_first_zero(cpumask); - if (cid < nr_cpu_ids) + cid = cpumask_first_zero(cidmask); + if (cid < READ_ONCE(mm->nr_cpus_allowed)) break; cpu_relax(); } - if (cpumask_test_and_set_cpu(cid, cpumask)) + if (cpumask_test_and_set_cpu(cid, cidmask)) return -1; return cid; @@ -3631,7 +3648,8 @@ static inline void mm_cid_snapshot_time(struct rq *rq, struct mm_struct *mm) WRITE_ONCE(pcpu_cid->time, rq->clock); } -static inline int __mm_cid_get(struct rq *rq, struct mm_struct *mm) +static inline int __mm_cid_get(struct rq *rq, struct task_struct *t, + struct mm_struct *mm) { int cid; @@ -3641,13 +3659,13 @@ static inline int __mm_cid_get(struct rq *rq, struct mm_struct *mm) * guarantee forward progress. */ if (!READ_ONCE(use_cid_lock)) { - cid = __mm_cid_try_get(mm); + cid = __mm_cid_try_get(t, mm); if (cid >= 0) goto end; raw_spin_lock(&cid_lock); } else { raw_spin_lock(&cid_lock); - cid = __mm_cid_try_get(mm); + cid = __mm_cid_try_get(t, mm); if (cid >= 0) goto unlock; } @@ -3667,7 +3685,7 @@ static inline int __mm_cid_get(struct rq *rq, struct mm_struct *mm) * all newcoming allocations observe the use_cid_lock flag set. */ do { - cid = __mm_cid_try_get(mm); + cid = __mm_cid_try_get(t, mm); cpu_relax(); } while (cid < 0); /* @@ -3684,7 +3702,8 @@ end: return cid; } -static inline int mm_cid_get(struct rq *rq, struct mm_struct *mm) +static inline int mm_cid_get(struct rq *rq, struct task_struct *t, + struct mm_struct *mm) { struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid; struct cpumask *cpumask; @@ -3701,8 +3720,9 @@ static inline int mm_cid_get(struct rq *rq, struct mm_struct *mm) if (try_cmpxchg(&this_cpu_ptr(pcpu_cid)->cid, &cid, MM_CID_UNSET)) __mm_cid_put(mm, mm_cid_clear_lazy_put(cid)); } - cid = __mm_cid_get(rq, mm); + cid = __mm_cid_get(rq, t, mm); __this_cpu_write(pcpu_cid->cid, cid); + __this_cpu_write(pcpu_cid->recent_cid, cid); return cid; } @@ -3755,7 +3775,7 @@ static inline void switch_mm_cid(struct rq *rq, prev->mm_cid = -1; } if (next->mm_cid_active) - next->last_mm_cid = next->mm_cid = mm_cid_get(rq, next->mm); + next->last_mm_cid = next->mm_cid = mm_cid_get(rq, next, next->mm); } #else /* !CONFIG_SCHED_MM_CID: */ -- cgit v1.2.3 From c2f8fde8689272a55b9319b69dfe7e8f0e2e9dfe Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 14 Oct 2024 11:40:56 +0200 Subject: fs: add helper to use mount option as path or fd Allow filesystems to use a mount option either as a file or path. Link: https://lore.kernel.org/r/20241014-work-overlayfs-v3-1-32b3fed1286e@kernel.org Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- fs/fs_parser.c | 20 ++++++++++++++++++++ include/linux/fs_parser.h | 5 ++++- 2 files changed, 24 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/fs/fs_parser.c b/fs/fs_parser.c index 24727ec34e5a..8f583b814e6e 100644 --- a/fs/fs_parser.c +++ b/fs/fs_parser.c @@ -308,6 +308,26 @@ int fs_param_is_fd(struct p_log *log, const struct fs_parameter_spec *p, } EXPORT_SYMBOL(fs_param_is_fd); +int fs_param_is_file_or_string(struct p_log *log, + const struct fs_parameter_spec *p, + struct fs_parameter *param, + struct fs_parse_result *result) +{ + switch (param->type) { + case fs_value_is_string: + return fs_param_is_string(log, p, param, result); + case fs_value_is_file: + result->uint_32 = param->dirfd; + if (result->uint_32 <= INT_MAX) + return 0; + break; + default: + break; + } + return fs_param_bad_value(log, param); +} +EXPORT_SYMBOL(fs_param_is_file_or_string); + int fs_param_is_uid(struct p_log *log, const struct fs_parameter_spec *p, struct fs_parameter *param, struct fs_parse_result *result) { diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h index 6cf713a7e6c6..3cef566088fc 100644 --- a/include/linux/fs_parser.h +++ b/include/linux/fs_parser.h @@ -28,7 +28,8 @@ typedef int fs_param_type(struct p_log *, */ fs_param_type fs_param_is_bool, fs_param_is_u32, fs_param_is_s32, fs_param_is_u64, fs_param_is_enum, fs_param_is_string, fs_param_is_blob, fs_param_is_blockdev, - fs_param_is_path, fs_param_is_fd, fs_param_is_uid, fs_param_is_gid; + fs_param_is_path, fs_param_is_fd, fs_param_is_uid, fs_param_is_gid, + fs_param_is_file_or_string; /* * Specification of the type of value a parameter wants. @@ -133,6 +134,8 @@ static inline bool fs_validate_description(const char *name, #define fsparam_bdev(NAME, OPT) __fsparam(fs_param_is_blockdev, NAME, OPT, 0, NULL) #define fsparam_path(NAME, OPT) __fsparam(fs_param_is_path, NAME, OPT, 0, NULL) #define fsparam_fd(NAME, OPT) __fsparam(fs_param_is_fd, NAME, OPT, 0, NULL) +#define fsparam_file_or_string(NAME, OPT) \ + __fsparam(fs_param_is_file_or_string, NAME, OPT, 0, NULL) #define fsparam_uid(NAME, OPT) __fsparam(fs_param_is_uid, NAME, OPT, 0, NULL) #define fsparam_gid(NAME, OPT) __fsparam(fs_param_is_gid, NAME, OPT, 0, NULL) -- cgit v1.2.3 From b692bf9a7543af7ad11a59d182a3757578f0ba53 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Mon, 7 Oct 2024 14:24:53 +0200 Subject: xsk: Get rid of xdp_buff_xsk::xskb_list_node Let's bring xdp_buff_xsk back to occupying 2 cachelines by removing xskb_list_node - for the purpose of gathering the xskb frags free_list_node can be used, head of the list (xsk_buff_pool::xskb_list) stays as-is, just reuse the node ptr. It is safe to do as a single xdp_buff_xsk can never reside in two pool's lists simultaneously. Signed-off-by: Maciej Fijalkowski Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/20241007122458.282590-2-maciej.fijalkowski@intel.com --- include/net/xdp_sock_drv.h | 14 +++++++------- include/net/xsk_buff_pool.h | 1 - net/xdp/xsk.c | 4 ++-- net/xdp/xsk_buff_pool.c | 1 - 4 files changed, 9 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 0a5dca2b2b3f..360bc1244c6a 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -126,8 +126,8 @@ static inline void xsk_buff_free(struct xdp_buff *xdp) if (likely(!xdp_buff_has_frags(xdp))) goto out; - list_for_each_entry_safe(pos, tmp, xskb_list, xskb_list_node) { - list_del(&pos->xskb_list_node); + list_for_each_entry_safe(pos, tmp, xskb_list, free_list_node) { + list_del(&pos->free_list_node); xp_free(pos); } @@ -140,7 +140,7 @@ static inline void xsk_buff_add_frag(struct xdp_buff *xdp) { struct xdp_buff_xsk *frag = container_of(xdp, struct xdp_buff_xsk, xdp); - list_add_tail(&frag->xskb_list_node, &frag->pool->xskb_list); + list_add_tail(&frag->free_list_node, &frag->pool->xskb_list); } static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first) @@ -150,9 +150,9 @@ static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first) struct xdp_buff_xsk *frag; frag = list_first_entry_or_null(&xskb->pool->xskb_list, - struct xdp_buff_xsk, xskb_list_node); + struct xdp_buff_xsk, free_list_node); if (frag) { - list_del(&frag->xskb_list_node); + list_del(&frag->free_list_node); ret = &frag->xdp; } @@ -163,7 +163,7 @@ static inline void xsk_buff_del_tail(struct xdp_buff *tail) { struct xdp_buff_xsk *xskb = container_of(tail, struct xdp_buff_xsk, xdp); - list_del(&xskb->xskb_list_node); + list_del(&xskb->free_list_node); } static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first) @@ -172,7 +172,7 @@ static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first) struct xdp_buff_xsk *frag; frag = list_last_entry(&xskb->pool->xskb_list, struct xdp_buff_xsk, - xskb_list_node); + free_list_node); return &frag->xdp; } diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index bacb33f1e3e5..aa7f1d0b3a5e 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -30,7 +30,6 @@ struct xdp_buff_xsk { struct xsk_buff_pool *pool; u64 orig_addr; struct list_head free_list_node; - struct list_head xskb_list_node; }; #define XSK_CHECK_PRIV_TYPE(t) BUILD_BUG_ON(sizeof(t) > offsetofend(struct xdp_buff_xsk, cb)) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 1140b2a120ca..9c93064349a8 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -171,14 +171,14 @@ static int xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) return 0; xskb_list = &xskb->pool->xskb_list; - list_for_each_entry_safe(pos, tmp, xskb_list, xskb_list_node) { + list_for_each_entry_safe(pos, tmp, xskb_list, free_list_node) { if (list_is_singular(xskb_list)) contd = 0; len = pos->xdp.data_end - pos->xdp.data; err = __xsk_rcv_zc(xs, pos, len, contd); if (err) goto err; - list_del(&pos->xskb_list_node); + list_del(&pos->free_list_node); } return 0; diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 521a2938e50a..e5368db7d18e 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -102,7 +102,6 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, xskb->pool = pool; xskb->xdp.frame_sz = umem->chunk_size - umem->headroom; INIT_LIST_HEAD(&xskb->free_list_node); - INIT_LIST_HEAD(&xskb->xskb_list_node); if (pool->unaligned) pool->free_heads[i] = xskb; else -- cgit v1.2.3 From 30ec2c1baaead43903ad63ff8e3083949059083c Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Mon, 7 Oct 2024 14:24:54 +0200 Subject: xsk: s/free_list_node/list_node/ Now that free_list_node's purpose is two-folded, make it just a 'list_node'. Signed-off-by: Maciej Fijalkowski Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/20241007122458.282590-3-maciej.fijalkowski@intel.com --- include/net/xdp_sock_drv.h | 14 +++++++------- include/net/xsk_buff_pool.h | 2 +- net/xdp/xsk.c | 4 ++-- net/xdp/xsk_buff_pool.c | 14 +++++++------- 4 files changed, 17 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 360bc1244c6a..40085afd9160 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -126,8 +126,8 @@ static inline void xsk_buff_free(struct xdp_buff *xdp) if (likely(!xdp_buff_has_frags(xdp))) goto out; - list_for_each_entry_safe(pos, tmp, xskb_list, free_list_node) { - list_del(&pos->free_list_node); + list_for_each_entry_safe(pos, tmp, xskb_list, list_node) { + list_del(&pos->list_node); xp_free(pos); } @@ -140,7 +140,7 @@ static inline void xsk_buff_add_frag(struct xdp_buff *xdp) { struct xdp_buff_xsk *frag = container_of(xdp, struct xdp_buff_xsk, xdp); - list_add_tail(&frag->free_list_node, &frag->pool->xskb_list); + list_add_tail(&frag->list_node, &frag->pool->xskb_list); } static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first) @@ -150,9 +150,9 @@ static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first) struct xdp_buff_xsk *frag; frag = list_first_entry_or_null(&xskb->pool->xskb_list, - struct xdp_buff_xsk, free_list_node); + struct xdp_buff_xsk, list_node); if (frag) { - list_del(&frag->free_list_node); + list_del(&frag->list_node); ret = &frag->xdp; } @@ -163,7 +163,7 @@ static inline void xsk_buff_del_tail(struct xdp_buff *tail) { struct xdp_buff_xsk *xskb = container_of(tail, struct xdp_buff_xsk, xdp); - list_del(&xskb->free_list_node); + list_del(&xskb->list_node); } static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first) @@ -172,7 +172,7 @@ static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first) struct xdp_buff_xsk *frag; frag = list_last_entry(&xskb->pool->xskb_list, struct xdp_buff_xsk, - free_list_node); + list_node); return &frag->xdp; } diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index aa7f1d0b3a5e..af8b6f776f86 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -29,7 +29,7 @@ struct xdp_buff_xsk { dma_addr_t frame_dma; struct xsk_buff_pool *pool; u64 orig_addr; - struct list_head free_list_node; + struct list_head list_node; }; #define XSK_CHECK_PRIV_TYPE(t) BUILD_BUG_ON(sizeof(t) > offsetofend(struct xdp_buff_xsk, cb)) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 9c93064349a8..520023405908 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -171,14 +171,14 @@ static int xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) return 0; xskb_list = &xskb->pool->xskb_list; - list_for_each_entry_safe(pos, tmp, xskb_list, free_list_node) { + list_for_each_entry_safe(pos, tmp, xskb_list, list_node) { if (list_is_singular(xskb_list)) contd = 0; len = pos->xdp.data_end - pos->xdp.data; err = __xsk_rcv_zc(xs, pos, len, contd); if (err) goto err; - list_del(&pos->free_list_node); + list_del(&pos->list_node); } return 0; diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index e5368db7d18e..973557d5e4f7 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -101,7 +101,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, xskb = &pool->heads[i]; xskb->pool = pool; xskb->xdp.frame_sz = umem->chunk_size - umem->headroom; - INIT_LIST_HEAD(&xskb->free_list_node); + INIT_LIST_HEAD(&xskb->list_node); if (pool->unaligned) pool->free_heads[i] = xskb; else @@ -549,8 +549,8 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool) } else { pool->free_list_cnt--; xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk, - free_list_node); - list_del_init(&xskb->free_list_node); + list_node); + list_del_init(&xskb->list_node); } xskb->xdp.data = xskb->xdp.data_hard_start + XDP_PACKET_HEADROOM; @@ -616,8 +616,8 @@ static u32 xp_alloc_reused(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u3 i = nb_entries; while (i--) { - xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk, free_list_node); - list_del_init(&xskb->free_list_node); + xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk, list_node); + list_del_init(&xskb->list_node); *xdp = &xskb->xdp; xdp++; @@ -687,11 +687,11 @@ EXPORT_SYMBOL(xp_can_alloc); void xp_free(struct xdp_buff_xsk *xskb) { - if (!list_empty(&xskb->free_list_node)) + if (!list_empty(&xskb->list_node)) return; xskb->pool->free_list_cnt++; - list_add(&xskb->free_list_node, &xskb->pool->free_list); + list_add(&xskb->list_node, &xskb->pool->free_list); } EXPORT_SYMBOL(xp_free); -- cgit v1.2.3 From bea14124bacbe5c9366381e62635eed28ac892ae Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Mon, 7 Oct 2024 14:24:55 +0200 Subject: xsk: Get rid of xdp_buff_xsk::orig_addr Continue the process of dieting xdp_buff_xsk by removing orig_addr member. It can be calculated from xdp->data_hard_start where it was previously used, so it is not anything that has to be carried around in struct used widely in hot path. This has been used for initializing xdp_buff_xsk::frame_dma during pool setup and as a shortcut in xp_get_handle() to retrieve address provided to xsk Rx queue. Signed-off-by: Maciej Fijalkowski Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/20241007122458.282590-4-maciej.fijalkowski@intel.com --- include/net/xsk_buff_pool.h | 19 +++++++++++-------- net/xdp/xsk.c | 2 +- net/xdp/xsk_buff_pool.c | 4 +++- 3 files changed, 15 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index af8b6f776f86..468a23b1b4c5 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -28,7 +28,6 @@ struct xdp_buff_xsk { dma_addr_t dma; dma_addr_t frame_dma; struct xsk_buff_pool *pool; - u64 orig_addr; struct list_head list_node; }; @@ -119,7 +118,6 @@ void xp_free(struct xdp_buff_xsk *xskb); static inline void xp_init_xskb_addr(struct xdp_buff_xsk *xskb, struct xsk_buff_pool *pool, u64 addr) { - xskb->orig_addr = addr; xskb->xdp.data_hard_start = pool->addrs + addr + pool->headroom; } @@ -221,14 +219,19 @@ static inline void xp_release(struct xdp_buff_xsk *xskb) xskb->pool->free_heads[xskb->pool->free_heads_cnt++] = xskb; } -static inline u64 xp_get_handle(struct xdp_buff_xsk *xskb) +static inline u64 xp_get_handle(struct xdp_buff_xsk *xskb, + struct xsk_buff_pool *pool) { - u64 offset = xskb->xdp.data - xskb->xdp.data_hard_start; + u64 orig_addr = xskb->xdp.data - pool->addrs; + u64 offset; - offset += xskb->pool->headroom; - if (!xskb->pool->unaligned) - return xskb->orig_addr + offset; - return xskb->orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT); + if (!pool->unaligned) + return orig_addr; + + offset = xskb->xdp.data - xskb->xdp.data_hard_start; + orig_addr -= offset; + offset += pool->headroom; + return orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT); } static inline bool xp_tx_metadata_enabled(const struct xsk_buff_pool *pool) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 520023405908..6c31c1de1619 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -141,7 +141,7 @@ static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff_xsk *xskb, u32 len, u64 addr; int err; - addr = xp_get_handle(xskb); + addr = xp_get_handle(xskb, xskb->pool); err = xskq_prod_reserve_desc(xs->rx, addr, len, flags); if (err) { xs->rx_queue_full++; diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 973557d5e4f7..7ecd4ccd2473 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -416,8 +416,10 @@ static int xp_init_dma_info(struct xsk_buff_pool *pool, struct xsk_dma_map *dma_ for (i = 0; i < pool->heads_cnt; i++) { struct xdp_buff_xsk *xskb = &pool->heads[i]; + u64 orig_addr; - xp_init_xskb_dma(xskb, pool, dma_map->dma_pages, xskb->orig_addr); + orig_addr = xskb->xdp.data_hard_start - pool->addrs - pool->headroom; + xp_init_xskb_dma(xskb, pool, dma_map->dma_pages, orig_addr); } } -- cgit v1.2.3 From 6e126872191df946a6fe01b79273119d32d96711 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Mon, 7 Oct 2024 14:24:56 +0200 Subject: xsk: Carry a copy of xdp_zc_max_segs within xsk_buff_pool This so we avoid dereferencing struct net_device within hot path. Signed-off-by: Maciej Fijalkowski Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/20241007122458.282590-5-maciej.fijalkowski@intel.com --- include/net/xsk_buff_pool.h | 1 + net/xdp/xsk_buff_pool.c | 1 + net/xdp/xsk_queue.h | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index 468a23b1b4c5..bb03cee716b3 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -76,6 +76,7 @@ struct xsk_buff_pool { u32 chunk_size; u32 chunk_shift; u32 frame_len; + u32 xdp_zc_max_segs; u8 tx_metadata_len; /* inherited from umem */ u8 cached_need_wakeup; bool uses_need_wakeup; diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 7ecd4ccd2473..e946ba4a5ccf 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -229,6 +229,7 @@ int xp_assign_dev(struct xsk_buff_pool *pool, goto err_unreg_xsk; } pool->umem->zc = true; + pool->xdp_zc_max_segs = netdev->xdp_zc_max_segs; return 0; err_unreg_xsk: diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index 406b20dfee8d..46d87e961ad6 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -260,7 +260,7 @@ u32 xskq_cons_read_desc_batch(struct xsk_queue *q, struct xsk_buff_pool *pool, nr_frags = 0; } else { nr_frags++; - if (nr_frags == pool->netdev->xdp_zc_max_segs) { + if (nr_frags == pool->xdp_zc_max_segs) { nr_frags = 0; break; } -- cgit v1.2.3 From 6e3ea06240adfef7b46e2338dd824541c31de06d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 7 Oct 2024 18:03:23 +0300 Subject: dmaengine: acpi: Drop unused devm_acpi_dma_controller_free() After introduction a few years ago the devm_acpi_dma_controller_free() was never used. Drop it. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20241007150436.2183575-2-andriy.shevchenko@linux.intel.com Signed-off-by: Vinod Koul --- Documentation/driver-api/driver-model/devres.rst | 1 - drivers/dma/acpi-dma.c | 15 --------------- include/linux/acpi_dma.h | 4 ---- 3 files changed, 20 deletions(-) (limited to 'include') diff --git a/Documentation/driver-api/driver-model/devres.rst b/Documentation/driver-api/driver-model/devres.rst index 5f2ee8d717b1..f23dbe5d6606 100644 --- a/Documentation/driver-api/driver-model/devres.rst +++ b/Documentation/driver-api/driver-model/devres.rst @@ -459,7 +459,6 @@ SERDEV SLAVE DMA ENGINE devm_acpi_dma_controller_register() - devm_acpi_dma_controller_free() SPI devm_spi_alloc_master() diff --git a/drivers/dma/acpi-dma.c b/drivers/dma/acpi-dma.c index a58a1600dd65..d5beb96ef510 100644 --- a/drivers/dma/acpi-dma.c +++ b/drivers/dma/acpi-dma.c @@ -276,21 +276,6 @@ int devm_acpi_dma_controller_register(struct device *dev, } EXPORT_SYMBOL_GPL(devm_acpi_dma_controller_register); -/** - * devm_acpi_dma_controller_free - resource managed acpi_dma_controller_free() - * @dev: device that is unregistering as DMA controller - * - * Unregister a DMA controller registered with - * devm_acpi_dma_controller_register(). Normally this function will not need to - * be called and the resource management code will ensure that the resource is - * freed. - */ -void devm_acpi_dma_controller_free(struct device *dev) -{ - WARN_ON(devres_release(dev, devm_acpi_dma_release, NULL, NULL)); -} -EXPORT_SYMBOL_GPL(devm_acpi_dma_controller_free); - /** * acpi_dma_update_dma_spec - prepare dma specifier to pass to translation function * @adma: struct acpi_dma of DMA controller diff --git a/include/linux/acpi_dma.h b/include/linux/acpi_dma.h index 72cedb916a9c..3ef1ec7a04cb 100644 --- a/include/linux/acpi_dma.h +++ b/include/linux/acpi_dma.h @@ -65,7 +65,6 @@ int devm_acpi_dma_controller_register(struct device *dev, struct dma_chan *(*acpi_dma_xlate) (struct acpi_dma_spec *, struct acpi_dma *), void *data); -void devm_acpi_dma_controller_free(struct device *dev); struct dma_chan *acpi_dma_request_slave_chan_by_index(struct device *dev, size_t index); @@ -94,9 +93,6 @@ static inline int devm_acpi_dma_controller_register(struct device *dev, { return -ENODEV; } -static inline void devm_acpi_dma_controller_free(struct device *dev) -{ -} static inline struct dma_chan *acpi_dma_request_slave_chan_by_index( struct device *dev, size_t index) -- cgit v1.2.3 From 662f045332addc961940e48eb920caa954abbf09 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 7 Oct 2024 18:03:25 +0300 Subject: dmaengine: acpi: Clean up headers There is a few things done: - include only the headers we are direct user of - when pointer is in use, provide a forward declaration - add missing headers - sort alphabetically Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20241007150436.2183575-4-andriy.shevchenko@linux.intel.com Signed-off-by: Vinod Koul --- drivers/dma/acpi-dma.c | 13 ++++++++----- include/linux/acpi_dma.h | 5 +++-- 2 files changed, 11 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/dma/acpi-dma.c b/drivers/dma/acpi-dma.c index f594ea265c76..2abbe11e797e 100644 --- a/drivers/dma/acpi-dma.c +++ b/drivers/dma/acpi-dma.c @@ -9,18 +9,21 @@ * Mika Westerberg */ +#include +#include #include #include #include -#include +#include +#include +#include #include #include #include -#include -#include -#include -#include #include +#include +#include +#include static LIST_HEAD(acpi_dma_list); static DEFINE_MUTEX(acpi_dma_lock); diff --git a/include/linux/acpi_dma.h b/include/linux/acpi_dma.h index 3ef1ec7a04cb..e748b2877602 100644 --- a/include/linux/acpi_dma.h +++ b/include/linux/acpi_dma.h @@ -11,10 +11,11 @@ #ifndef __LINUX_ACPI_DMA_H #define __LINUX_ACPI_DMA_H -#include -#include #include #include +#include + +struct device; /** * struct acpi_dma_spec - slave device DMA resources -- cgit v1.2.3 From 6ba55951e70bf7a8d89c03aa5612d2e0c81ded6d Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Thu, 10 Oct 2024 11:27:15 -0500 Subject: logic_pio: Constify fwnode_handle The fwnode_handle passed into find_io_range_by_fwnode() and logic_pio_trans_hwaddr() are not modified, so make them const. Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20241010-dt-const-v1-2-87a51f558425@kernel.org Signed-off-by: Rob Herring (Arm) --- include/linux/logic_pio.h | 6 +++--- lib/logic_pio.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/logic_pio.h b/include/linux/logic_pio.h index babf4e3c28ba..8f1a9408302f 100644 --- a/include/linux/logic_pio.h +++ b/include/linux/logic_pio.h @@ -17,7 +17,7 @@ enum { struct logic_pio_hwaddr { struct list_head list; - struct fwnode_handle *fwnode; + const struct fwnode_handle *fwnode; resource_size_t hw_start; resource_size_t io_start; resource_size_t size; /* range size populated */ @@ -110,8 +110,8 @@ void logic_outsl(unsigned long addr, const void *buffer, unsigned int count); #endif /* CONFIG_INDIRECT_PIO */ #define MMIO_UPPER_LIMIT (IO_SPACE_LIMIT - PIO_INDIRECT_SIZE) -struct logic_pio_hwaddr *find_io_range_by_fwnode(struct fwnode_handle *fwnode); -unsigned long logic_pio_trans_hwaddr(struct fwnode_handle *fwnode, +struct logic_pio_hwaddr *find_io_range_by_fwnode(const struct fwnode_handle *fwnode); +unsigned long logic_pio_trans_hwaddr(const struct fwnode_handle *fwnode, resource_size_t hw_addr, resource_size_t size); int logic_pio_register_range(struct logic_pio_hwaddr *newrange); void logic_pio_unregister_range(struct logic_pio_hwaddr *range); diff --git a/lib/logic_pio.c b/lib/logic_pio.c index 2ea564a40064..e29496a38d06 100644 --- a/lib/logic_pio.c +++ b/lib/logic_pio.c @@ -122,7 +122,7 @@ void logic_pio_unregister_range(struct logic_pio_hwaddr *range) * * Traverse the io_range_list to find the registered node for @fwnode. */ -struct logic_pio_hwaddr *find_io_range_by_fwnode(struct fwnode_handle *fwnode) +struct logic_pio_hwaddr *find_io_range_by_fwnode(const struct fwnode_handle *fwnode) { struct logic_pio_hwaddr *range, *found_range = NULL; @@ -186,7 +186,7 @@ resource_size_t logic_pio_to_hwaddr(unsigned long pio) * * Returns Logical PIO value if successful, ~0UL otherwise */ -unsigned long logic_pio_trans_hwaddr(struct fwnode_handle *fwnode, +unsigned long logic_pio_trans_hwaddr(const struct fwnode_handle *fwnode, resource_size_t addr, resource_size_t size) { struct logic_pio_hwaddr *range; -- cgit v1.2.3 From 78e2baf3d96edd21c6f26d8afc0e68d02ec2c51c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 10 Oct 2024 17:48:13 +0000 Subject: net: add TIME_WAIT logic to sk_to_full_sk() TCP will soon attach TIME_WAIT sockets to some ACK and RST. Make sure sk_to_full_sk() detects this and does not return a non full socket. v3: also changed sk_const_to_full_sk() Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Reviewed-by: Martin KaFai Lau Reviewed-by: Brian Vazquez Link: https://patch.msgid.link/20241010174817.1543642-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/bpf-cgroup.h | 2 +- include/net/inet_sock.h | 8 ++++++-- net/core/filter.c | 6 +----- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index ce91d9b2acb9..f0f219271daf 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -209,7 +209,7 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, int __ret = 0; \ if (cgroup_bpf_enabled(CGROUP_INET_EGRESS) && sk) { \ typeof(sk) __sk = sk_to_full_sk(sk); \ - if (sk_fullsock(__sk) && __sk == skb_to_full_sk(skb) && \ + if (__sk && __sk == skb_to_full_sk(skb) && \ cgroup_bpf_sock_enabled(__sk, CGROUP_INET_EGRESS)) \ __ret = __cgroup_bpf_run_filter_skb(__sk, skb, \ CGROUP_INET_EGRESS); \ diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index f01dd273bea6..56d8bc5593d3 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -321,8 +321,10 @@ static inline unsigned long inet_cmsg_flags(const struct inet_sock *inet) static inline struct sock *sk_to_full_sk(struct sock *sk) { #ifdef CONFIG_INET - if (sk && sk->sk_state == TCP_NEW_SYN_RECV) + if (sk && READ_ONCE(sk->sk_state) == TCP_NEW_SYN_RECV) sk = inet_reqsk(sk)->rsk_listener; + if (sk && READ_ONCE(sk->sk_state) == TCP_TIME_WAIT) + sk = NULL; #endif return sk; } @@ -331,8 +333,10 @@ static inline struct sock *sk_to_full_sk(struct sock *sk) static inline const struct sock *sk_const_to_full_sk(const struct sock *sk) { #ifdef CONFIG_INET - if (sk && sk->sk_state == TCP_NEW_SYN_RECV) + if (sk && READ_ONCE(sk->sk_state) == TCP_NEW_SYN_RECV) sk = ((const struct request_sock *)sk)->rsk_listener; + if (sk && READ_ONCE(sk->sk_state) == TCP_TIME_WAIT) + sk = NULL; #endif return sk; } diff --git a/net/core/filter.c b/net/core/filter.c index bd0d08bf76bb..202c1d386e19 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6778,8 +6778,6 @@ __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, /* sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk * sock refcnt is decremented to prevent a request_sock leak. */ - if (!sk_fullsock(sk2)) - sk2 = NULL; if (sk2 != sk) { sock_gen_put(sk); /* Ensure there is no need to bump sk2 refcnt */ @@ -6826,8 +6824,6 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, /* sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk * sock refcnt is decremented to prevent a request_sock leak. */ - if (!sk_fullsock(sk2)) - sk2 = NULL; if (sk2 != sk) { sock_gen_put(sk); /* Ensure there is no need to bump sk2 refcnt */ @@ -7276,7 +7272,7 @@ BPF_CALL_1(bpf_get_listener_sock, struct sock *, sk) { sk = sk_to_full_sk(sk); - if (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE)) + if (sk && sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE)) return (unsigned long)sk; return (unsigned long)NULL; -- cgit v1.2.3 From bc43a3c83cad46a27d6e3bf869acdd926bbe79ad Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 10 Oct 2024 17:48:14 +0000 Subject: net_sched: sch_fq: prepare for TIME_WAIT sockets TCP stack is not attaching skb to TIME_WAIT sockets yet, but we would like to allow this in the future. Add sk_listener_or_tw() helper to detect the three states that FQ needs to take care. Like NEW_SYN_RECV, TIME_WAIT are not full sockets and do not contain sk->sk_pacing_status, sk->sk_pacing_rate. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Reviewed-by: Brian Vazquez Link: https://patch.msgid.link/20241010174817.1543642-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 10 ++++++++++ net/sched/sch_fq.c | 3 ++- 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 6da420ab1ee1..2f200d91ef11 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2802,6 +2802,16 @@ static inline bool sk_listener(const struct sock *sk) return (1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV); } +/* This helper checks if a socket is a LISTEN or NEW_SYN_RECV or TIME_WAIT + * TCP SYNACK messages can be attached to LISTEN or NEW_SYN_RECV (depending on SYNCOOKIE) + * TCP RST and ACK can be attached to TIME_WAIT. + */ +static inline bool sk_listener_or_tw(const struct sock *sk) +{ + return (1 << READ_ONCE(sk->sk_state)) & + (TCPF_LISTEN | TCPF_NEW_SYN_RECV | TCPF_TIME_WAIT); +} + void sock_enable_timestamp(struct sock *sk, enum sock_flags flag); int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, int level, int type); diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index aeabf45c9200..a97638bef6da 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -362,8 +362,9 @@ static struct fq_flow *fq_classify(struct Qdisc *sch, struct sk_buff *skb, * 3) We do not want to rate limit them (eg SYNFLOOD attack), * especially if the listener set SO_MAX_PACING_RATE * 4) We pretend they are orphaned + * TCP can also associate TIME_WAIT sockets with RST or ACK packets. */ - if (!sk || sk_listener(sk)) { + if (!sk || sk_listener_or_tw(sk)) { unsigned long hash = skb_get_hash(skb) & q->orphan_mask; /* By forcing low order bit to 1, we make sure to not -- cgit v1.2.3 From 5ced52fa8f0dc23adb067f7b8a009a5ee051efb7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 10 Oct 2024 17:48:15 +0000 Subject: net: add skb_set_owner_edemux() helper This can be used to attach a socket to an skb, taking a reference on sk->sk_refcnt. This helper might be a NOP if sk->sk_refcnt is zero. Use it from tcp_make_synack(). Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Reviewed-by: Brian Vazquez Link: https://patch.msgid.link/20241010174817.1543642-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 9 +++++++++ net/core/sock.c | 9 +++------ net/ipv4/tcp_output.c | 2 +- 3 files changed, 13 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 2f200d91ef11..bf7fa3db10ae 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1760,6 +1760,15 @@ void sock_efree(struct sk_buff *skb); #ifdef CONFIG_INET void sock_edemux(struct sk_buff *skb); void sock_pfree(struct sk_buff *skb); + +static inline void skb_set_owner_edemux(struct sk_buff *skb, struct sock *sk) +{ + skb_orphan(skb); + if (refcount_inc_not_zero(&sk->sk_refcnt)) { + skb->sk = sk; + skb->destructor = sock_edemux; + } +} #else #define sock_edemux sock_efree #endif diff --git a/net/core/sock.c b/net/core/sock.c index 083d438d8b6f..f8c0d4eda888 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2592,14 +2592,11 @@ void __sock_wfree(struct sk_buff *skb) void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) { skb_orphan(skb); - skb->sk = sk; #ifdef CONFIG_INET - if (unlikely(!sk_fullsock(sk))) { - skb->destructor = sock_edemux; - sock_hold(sk); - return; - } + if (unlikely(!sk_fullsock(sk))) + return skb_set_owner_edemux(skb, sk); #endif + skb->sk = sk; skb->destructor = sock_wfree; skb_set_hash_from_sk(skb, sk); /* diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 06200bb111f8..054244ce5117 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3728,7 +3728,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, switch (synack_type) { case TCP_SYNACK_NORMAL: - skb_set_owner_w(skb, req_to_sk(req)); + skb_set_owner_edemux(skb, req_to_sk(req)); break; case TCP_SYNACK_COOKIE: /* Under synflood, we do not attach skb to a socket, -- cgit v1.2.3 From 79636038d37e7bd4d078238f2a3f002cab4423bc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 10 Oct 2024 17:48:17 +0000 Subject: ipv4: tcp: give socket pointer to control skbs ip_send_unicast_reply() send orphaned 'control packets'. These are RST packets and also ACK packets sent from TIME_WAIT. Some eBPF programs would prefer to have a meaningful skb->sk pointer as much as possible. This means that TCP can now attach TIME_WAIT sockets to outgoing skbs. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Reviewed-by: Brian Vazquez Link: https://patch.msgid.link/20241010174817.1543642-6-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/ip.h | 3 ++- net/ipv4/ip_output.c | 5 ++++- net/ipv4/tcp_ipv4.c | 4 ++-- 3 files changed, 8 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index bab084df1567..4be0a6a603b2 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -288,7 +288,8 @@ static inline __u8 ip_reply_arg_flowi_flags(const struct ip_reply_arg *arg) return (arg->flags & IP_REPLY_ARG_NOSRCCHECK) ? FLOWI_FLAG_ANYSRC : 0; } -void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, +void ip_send_unicast_reply(struct sock *sk, const struct sock *orig_sk, + struct sk_buff *skb, const struct ip_options *sopt, __be32 daddr, __be32 saddr, const struct ip_reply_arg *arg, diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e5c55a95063d..0065b1996c94 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1596,7 +1596,8 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset, * Generic function to send a packet as reply to another packet. * Used to send some TCP resets/acks so far. */ -void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, +void ip_send_unicast_reply(struct sock *sk, const struct sock *orig_sk, + struct sk_buff *skb, const struct ip_options *sopt, __be32 daddr, __be32 saddr, const struct ip_reply_arg *arg, @@ -1662,6 +1663,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, arg->csumoffset) = csum_fold(csum_add(nskb->csum, arg->csum)); nskb->ip_summed = CHECKSUM_NONE; + if (orig_sk) + skb_set_owner_edemux(nskb, (struct sock *)orig_sk); if (transmit_time) nskb->tstamp_type = SKB_CLOCK_MONOTONIC; if (txhash) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 985028434f64..9d3dd101ea71 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -907,7 +907,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb, ctl_sk->sk_mark = 0; ctl_sk->sk_priority = 0; } - ip_send_unicast_reply(ctl_sk, + ip_send_unicast_reply(ctl_sk, sk, skb, &TCP_SKB_CB(skb)->header.h4.opt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len, @@ -1021,7 +1021,7 @@ static void tcp_v4_send_ack(const struct sock *sk, ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ? inet_twsk(sk)->tw_priority : READ_ONCE(sk->sk_priority); transmit_time = tcp_transmit_time(sk); - ip_send_unicast_reply(ctl_sk, + ip_send_unicast_reply(ctl_sk, sk, skb, &TCP_SKB_CB(skb)->header.h4.opt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len, -- cgit v1.2.3 From f15e3b3ddb9fab1c1731b6154e2cd6573fb54c4d Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Fri, 11 Oct 2024 18:44:56 +0000 Subject: net: napi: Make napi_defer_hard_irqs per-NAPI Add defer_hard_irqs to napi_struct in preparation for per-NAPI settings. The existing sysfs parameter is respected; writes to sysfs will write to all NAPI structs for the device and the net_device defer_hard_irq field. Reads from sysfs show the net_device field. The ability to set defer_hard_irqs on specific NAPI instances will be added in a later commit, via netdev-genl. Signed-off-by: Joe Damato Reviewed-by: Eric Dumazet Reviewed-by: Jakub Kicinski Link: https://patch.msgid.link/20241011184527.16393-2-jdamato@fastly.com Signed-off-by: Jakub Kicinski --- .../networking/net_cachelines/net_device.rst | 1 + include/linux/netdevice.h | 3 +- net/core/dev.c | 10 +++--- net/core/dev.h | 36 ++++++++++++++++++++++ net/core/net-sysfs.c | 2 +- 5 files changed, 45 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst index 1b018ac35e9a..5a7388b2ab6f 100644 --- a/Documentation/networking/net_cachelines/net_device.rst +++ b/Documentation/networking/net_cachelines/net_device.rst @@ -186,4 +186,5 @@ struct dpll_pin* dpll_pin struct hlist_head page_pools struct dim_irq_moder* irq_moder u64 max_pacing_offload_horizon +u32 napi_defer_hard_irqs =================================== =========================== =================== =================== =================================================================================== diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e6b93d01e631..2e7bc23660ec 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -373,6 +373,7 @@ struct napi_struct { unsigned int napi_id; struct hrtimer timer; struct task_struct *thread; + u32 defer_hard_irqs; /* control-path-only fields follow */ struct list_head dev_list; struct hlist_node napi_hash_node; @@ -2085,7 +2086,6 @@ struct net_device { unsigned int real_num_rx_queues; struct netdev_rx_queue *_rx; unsigned long gro_flush_timeout; - u32 napi_defer_hard_irqs; unsigned int gro_max_size; unsigned int gro_ipv4_max_size; rx_handler_func_t __rcu *rx_handler; @@ -2413,6 +2413,7 @@ struct net_device { struct dim_irq_moder *irq_moder; u64 max_pacing_offload_horizon; + u32 napi_defer_hard_irqs; /** * @lock: protects @net_shaper_hierarchy, feel free to use for other diff --git a/net/core/dev.c b/net/core/dev.c index b590eefce3b4..fbaa9eabf77f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6233,7 +6233,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done) if (work_done) { if (n->gro_bitmask) timeout = READ_ONCE(n->dev->gro_flush_timeout); - n->defer_hard_irqs_count = READ_ONCE(n->dev->napi_defer_hard_irqs); + n->defer_hard_irqs_count = napi_get_defer_hard_irqs(n); } if (n->defer_hard_irqs_count > 0) { n->defer_hard_irqs_count--; @@ -6371,7 +6371,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); if (flags & NAPI_F_PREFER_BUSY_POLL) { - napi->defer_hard_irqs_count = READ_ONCE(napi->dev->napi_defer_hard_irqs); + napi->defer_hard_irqs_count = napi_get_defer_hard_irqs(napi); timeout = READ_ONCE(napi->dev->gro_flush_timeout); if (napi->defer_hard_irqs_count && timeout) { hrtimer_start(&napi->timer, ns_to_ktime(timeout), HRTIMER_MODE_REL_PINNED); @@ -6653,6 +6653,7 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, INIT_HLIST_NODE(&napi->napi_hash_node); hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); napi->timer.function = napi_watchdog; + napi_set_defer_hard_irqs(napi, READ_ONCE(dev->napi_defer_hard_irqs)); init_gro_hash(napi); napi->skb = NULL; INIT_LIST_HEAD(&napi->rx_list); @@ -11059,7 +11060,7 @@ void netdev_sw_irq_coalesce_default_on(struct net_device *dev) if (!IS_ENABLED(CONFIG_PREEMPT_RT)) { dev->gro_flush_timeout = 20000; - dev->napi_defer_hard_irqs = 1; + netdev_set_defer_hard_irqs(dev, 1); } } EXPORT_SYMBOL_GPL(netdev_sw_irq_coalesce_default_on); @@ -12003,7 +12004,6 @@ static void __init net_dev_struct_check(void) CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, real_num_rx_queues); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, _rx); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_flush_timeout); - CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, napi_defer_hard_irqs); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_max_size); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_ipv4_max_size); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, rx_handler); @@ -12015,7 +12015,7 @@ static void __init net_dev_struct_check(void) #ifdef CONFIG_NET_XGRESS CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, tcx_ingress); #endif - CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 104); + CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 100); } /* diff --git a/net/core/dev.h b/net/core/dev.h index d3ea92949ff3..0716b1048261 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -148,6 +148,42 @@ static inline void netif_set_gro_ipv4_max_size(struct net_device *dev, WRITE_ONCE(dev->gro_ipv4_max_size, size); } +/** + * napi_get_defer_hard_irqs - get the NAPI's defer_hard_irqs + * @n: napi struct to get the defer_hard_irqs field from + * + * Return: the per-NAPI value of the defar_hard_irqs field. + */ +static inline u32 napi_get_defer_hard_irqs(const struct napi_struct *n) +{ + return READ_ONCE(n->defer_hard_irqs); +} + +/** + * napi_set_defer_hard_irqs - set the defer_hard_irqs for a napi + * @n: napi_struct to set the defer_hard_irqs field + * @defer: the value the field should be set to + */ +static inline void napi_set_defer_hard_irqs(struct napi_struct *n, u32 defer) +{ + WRITE_ONCE(n->defer_hard_irqs, defer); +} + +/** + * netdev_set_defer_hard_irqs - set defer_hard_irqs for all NAPIs of a netdev + * @netdev: the net_device for which all NAPIs will have defer_hard_irqs set + * @defer: the defer_hard_irqs value to set + */ +static inline void netdev_set_defer_hard_irqs(struct net_device *netdev, + u32 defer) +{ + struct napi_struct *napi; + + WRITE_ONCE(netdev->napi_defer_hard_irqs, defer); + list_for_each_entry(napi, &netdev->napi_list, dev_list) + napi_set_defer_hard_irqs(napi, defer); +} + int rps_cpumask_housekeeping(struct cpumask *mask); #if defined(CONFIG_DEBUG_NET) && defined(CONFIG_BPF_SYSCALL) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 05cf5347f25e..25125f356a15 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -429,7 +429,7 @@ static int change_napi_defer_hard_irqs(struct net_device *dev, unsigned long val if (val > S32_MAX) return -ERANGE; - WRITE_ONCE(dev->napi_defer_hard_irqs, val); + netdev_set_defer_hard_irqs(dev, (u32)val); return 0; } -- cgit v1.2.3 From 516010460011ae74ac3b7383cf90ed27e2711cd6 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Fri, 11 Oct 2024 18:44:57 +0000 Subject: netdev-genl: Dump napi_defer_hard_irqs Support dumping defer_hard_irqs for a NAPI ID. Signed-off-by: Joe Damato Reviewed-by: Eric Dumazet Reviewed-by: Jakub Kicinski Link: https://patch.msgid.link/20241011184527.16393-3-jdamato@fastly.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/netdev.yaml | 8 ++++++++ include/uapi/linux/netdev.h | 1 + net/core/netdev-genl.c | 6 ++++++ tools/include/uapi/linux/netdev.h | 1 + 4 files changed, 16 insertions(+) (limited to 'include') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index 08412c279297..585e87ec3c16 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -248,6 +248,13 @@ attribute-sets: threaded mode. If NAPI is not in threaded mode (i.e. uses normal softirq context), the attribute will be absent. type: u32 + - + name: defer-hard-irqs + doc: The number of consecutive empty polls before IRQ deferral ends + and hardware IRQs are re-enabled. + type: u32 + checks: + max: s32-max - name: queue attributes: @@ -636,6 +643,7 @@ operations: - ifindex - irq - pid + - defer-hard-irqs dump: request: attributes: diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 7c308f04e7a0..13dc0b027e86 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -122,6 +122,7 @@ enum { NETDEV_A_NAPI_ID, NETDEV_A_NAPI_IRQ, NETDEV_A_NAPI_PID, + NETDEV_A_NAPI_DEFER_HARD_IRQS, __NETDEV_A_NAPI_MAX, NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 358cba248796..f98e5d1d0d21 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -161,6 +161,7 @@ static int netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi, const struct genl_info *info) { + u32 napi_defer_hard_irqs; void *hdr; pid_t pid; @@ -189,6 +190,11 @@ netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi, goto nla_put_failure; } + napi_defer_hard_irqs = napi_get_defer_hard_irqs(napi); + if (nla_put_s32(rsp, NETDEV_A_NAPI_DEFER_HARD_IRQS, + napi_defer_hard_irqs)) + goto nla_put_failure; + genlmsg_end(rsp, hdr); return 0; diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 7c308f04e7a0..13dc0b027e86 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -122,6 +122,7 @@ enum { NETDEV_A_NAPI_ID, NETDEV_A_NAPI_IRQ, NETDEV_A_NAPI_PID, + NETDEV_A_NAPI_DEFER_HARD_IRQS, __NETDEV_A_NAPI_MAX, NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) -- cgit v1.2.3 From acb8d4ed5661d05f794ef2ce34fd11e699e9ca32 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Fri, 11 Oct 2024 18:44:58 +0000 Subject: net: napi: Make gro_flush_timeout per-NAPI Allow per-NAPI gro_flush_timeout setting. The existing sysfs parameter is respected; writes to sysfs will write to all NAPI structs for the device and the net_device gro_flush_timeout field. Reads from sysfs will read from the net_device field. The ability to set gro_flush_timeout on specific NAPI instances will be added in a later commit, via netdev-genl. Signed-off-by: Joe Damato Reviewed-by: Eric Dumazet Reviewed-by: Jakub Kicinski Link: https://patch.msgid.link/20241011184527.16393-4-jdamato@fastly.com Signed-off-by: Jakub Kicinski --- .../networking/net_cachelines/net_device.rst | 1 + include/linux/netdevice.h | 3 +- net/core/dev.c | 12 +++---- net/core/dev.h | 40 ++++++++++++++++++++++ net/core/net-sysfs.c | 2 +- 5 files changed, 50 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst index 5a7388b2ab6f..67910ea49160 100644 --- a/Documentation/networking/net_cachelines/net_device.rst +++ b/Documentation/networking/net_cachelines/net_device.rst @@ -186,5 +186,6 @@ struct dpll_pin* dpll_pin struct hlist_head page_pools struct dim_irq_moder* irq_moder u64 max_pacing_offload_horizon +unsigned_long gro_flush_timeout u32 napi_defer_hard_irqs =================================== =========================== =================== =================== =================================================================================== diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2e7bc23660ec..93241d4de437 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -373,6 +373,7 @@ struct napi_struct { unsigned int napi_id; struct hrtimer timer; struct task_struct *thread; + unsigned long gro_flush_timeout; u32 defer_hard_irqs; /* control-path-only fields follow */ struct list_head dev_list; @@ -2085,7 +2086,6 @@ struct net_device { int ifindex; unsigned int real_num_rx_queues; struct netdev_rx_queue *_rx; - unsigned long gro_flush_timeout; unsigned int gro_max_size; unsigned int gro_ipv4_max_size; rx_handler_func_t __rcu *rx_handler; @@ -2413,6 +2413,7 @@ struct net_device { struct dim_irq_moder *irq_moder; u64 max_pacing_offload_horizon; + unsigned long gro_flush_timeout; u32 napi_defer_hard_irqs; /** diff --git a/net/core/dev.c b/net/core/dev.c index fbaa9eabf77f..e21ace3551d5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6232,12 +6232,12 @@ bool napi_complete_done(struct napi_struct *n, int work_done) if (work_done) { if (n->gro_bitmask) - timeout = READ_ONCE(n->dev->gro_flush_timeout); + timeout = napi_get_gro_flush_timeout(n); n->defer_hard_irqs_count = napi_get_defer_hard_irqs(n); } if (n->defer_hard_irqs_count > 0) { n->defer_hard_irqs_count--; - timeout = READ_ONCE(n->dev->gro_flush_timeout); + timeout = napi_get_gro_flush_timeout(n); if (timeout) ret = false; } @@ -6372,7 +6372,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, if (flags & NAPI_F_PREFER_BUSY_POLL) { napi->defer_hard_irqs_count = napi_get_defer_hard_irqs(napi); - timeout = READ_ONCE(napi->dev->gro_flush_timeout); + timeout = napi_get_gro_flush_timeout(napi); if (napi->defer_hard_irqs_count && timeout) { hrtimer_start(&napi->timer, ns_to_ktime(timeout), HRTIMER_MODE_REL_PINNED); skip_schedule = true; @@ -6654,6 +6654,7 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); napi->timer.function = napi_watchdog; napi_set_defer_hard_irqs(napi, READ_ONCE(dev->napi_defer_hard_irqs)); + napi_set_gro_flush_timeout(napi, READ_ONCE(dev->gro_flush_timeout)); init_gro_hash(napi); napi->skb = NULL; INIT_LIST_HEAD(&napi->rx_list); @@ -11059,7 +11060,7 @@ void netdev_sw_irq_coalesce_default_on(struct net_device *dev) WARN_ON(dev->reg_state == NETREG_REGISTERED); if (!IS_ENABLED(CONFIG_PREEMPT_RT)) { - dev->gro_flush_timeout = 20000; + netdev_set_gro_flush_timeout(dev, 20000); netdev_set_defer_hard_irqs(dev, 1); } } @@ -12003,7 +12004,6 @@ static void __init net_dev_struct_check(void) CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, ifindex); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, real_num_rx_queues); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, _rx); - CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_flush_timeout); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_max_size); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_ipv4_max_size); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, rx_handler); @@ -12015,7 +12015,7 @@ static void __init net_dev_struct_check(void) #ifdef CONFIG_NET_XGRESS CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, tcx_ingress); #endif - CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 100); + CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 92); } /* diff --git a/net/core/dev.h b/net/core/dev.h index 0716b1048261..7d0aab7e3ef1 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -184,6 +184,46 @@ static inline void netdev_set_defer_hard_irqs(struct net_device *netdev, napi_set_defer_hard_irqs(napi, defer); } +/** + * napi_get_gro_flush_timeout - get the gro_flush_timeout + * @n: napi struct to get the gro_flush_timeout from + * + * Return: the per-NAPI value of the gro_flush_timeout field. + */ +static inline unsigned long +napi_get_gro_flush_timeout(const struct napi_struct *n) +{ + return READ_ONCE(n->gro_flush_timeout); +} + +/** + * napi_set_gro_flush_timeout - set the gro_flush_timeout for a napi + * @n: napi struct to set the gro_flush_timeout + * @timeout: timeout value to set + * + * napi_set_gro_flush_timeout sets the per-NAPI gro_flush_timeout + */ +static inline void napi_set_gro_flush_timeout(struct napi_struct *n, + unsigned long timeout) +{ + WRITE_ONCE(n->gro_flush_timeout, timeout); +} + +/** + * netdev_set_gro_flush_timeout - set gro_flush_timeout of a netdev's NAPIs + * @netdev: the net_device for which all NAPIs will have gro_flush_timeout set + * @timeout: the timeout value to set + */ +static inline void netdev_set_gro_flush_timeout(struct net_device *netdev, + unsigned long timeout) +{ + struct napi_struct *napi; + + WRITE_ONCE(netdev->gro_flush_timeout, timeout); + list_for_each_entry(napi, &netdev->napi_list, dev_list) + napi_set_gro_flush_timeout(napi, timeout); +} + int rps_cpumask_housekeeping(struct cpumask *mask); #if defined(CONFIG_DEBUG_NET) && defined(CONFIG_BPF_SYSCALL) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 25125f356a15..2d9afc6e2161 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -409,7 +409,7 @@ NETDEVICE_SHOW_RW(tx_queue_len, fmt_dec); static int change_gro_flush_timeout(struct net_device *dev, unsigned long val) { - WRITE_ONCE(dev->gro_flush_timeout, val); + netdev_set_gro_flush_timeout(dev, val); return 0; } -- cgit v1.2.3 From 0137891e74576f77a7901718dc0ce08ca074ae74 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Fri, 11 Oct 2024 18:44:59 +0000 Subject: netdev-genl: Dump gro_flush_timeout Support dumping gro_flush_timeout for a NAPI ID. Signed-off-by: Joe Damato Reviewed-by: Jakub Kicinski Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241011184527.16393-5-jdamato@fastly.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/netdev.yaml | 9 +++++++++ include/uapi/linux/netdev.h | 1 + net/core/netdev-genl.c | 6 ++++++ tools/include/uapi/linux/netdev.h | 1 + 4 files changed, 17 insertions(+) (limited to 'include') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index 585e87ec3c16..7b47454c51dd 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -255,6 +255,14 @@ attribute-sets: type: u32 checks: max: s32-max + - + name: gro-flush-timeout + doc: The timeout, in nanoseconds, of when to trigger the NAPI watchdog + timer which schedules NAPI processing. Additionally, a non-zero + value will also prevent GRO from flushing recent super-frames at + the end of a NAPI cycle. This may add receive latency in exchange + for reducing the number of frames processed by the network stack. + type: uint - name: queue attributes: @@ -644,6 +652,7 @@ operations: - irq - pid - defer-hard-irqs + - gro-flush-timeout dump: request: attributes: diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 13dc0b027e86..cacd33359c76 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -123,6 +123,7 @@ enum { NETDEV_A_NAPI_IRQ, NETDEV_A_NAPI_PID, NETDEV_A_NAPI_DEFER_HARD_IRQS, + NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT, __NETDEV_A_NAPI_MAX, NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index f98e5d1d0d21..ac19f2e6cfbe 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -161,6 +161,7 @@ static int netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi, const struct genl_info *info) { + unsigned long gro_flush_timeout; u32 napi_defer_hard_irqs; void *hdr; pid_t pid; @@ -195,6 +196,11 @@ netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi, napi_defer_hard_irqs)) goto nla_put_failure; + gro_flush_timeout = napi_get_gro_flush_timeout(napi); + if (nla_put_uint(rsp, NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT, + gro_flush_timeout)) + goto nla_put_failure; + genlmsg_end(rsp, hdr); return 0; diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 13dc0b027e86..cacd33359c76 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -123,6 +123,7 @@ enum { NETDEV_A_NAPI_IRQ, NETDEV_A_NAPI_PID, NETDEV_A_NAPI_DEFER_HARD_IRQS, + NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT, __NETDEV_A_NAPI_MAX, NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) -- cgit v1.2.3 From 86e25f40aa1e9e54e081e55016f65b5c92523989 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Fri, 11 Oct 2024 18:45:00 +0000 Subject: net: napi: Add napi_config Add a persistent NAPI config area for NAPI configuration to the core. Drivers opt-in to setting the persistent config for a NAPI by passing an index when calling netif_napi_add_config. napi_config is allocated in alloc_netdev_mqs, freed in free_netdev (after the NAPIs are deleted). Drivers which call netif_napi_add_config will have persistent per-NAPI settings: NAPI IDs, gro_flush_timeout, and defer_hard_irq settings. Per-NAPI settings are saved in napi_disable and restored in napi_enable. Co-developed-by: Martin Karsten Signed-off-by: Martin Karsten Signed-off-by: Joe Damato Reviewed-by: Jakub Kicinski Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241011184527.16393-6-jdamato@fastly.com Signed-off-by: Jakub Kicinski --- .../networking/net_cachelines/net_device.rst | 1 + include/linux/netdevice.h | 36 +++++++++- net/core/dev.c | 80 ++++++++++++++++++++-- net/core/dev.h | 12 ++++ 4 files changed, 119 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst index 67910ea49160..db6192b2bb50 100644 --- a/Documentation/networking/net_cachelines/net_device.rst +++ b/Documentation/networking/net_cachelines/net_device.rst @@ -186,6 +186,7 @@ struct dpll_pin* dpll_pin struct hlist_head page_pools struct dim_irq_moder* irq_moder u64 max_pacing_offload_horizon +struct_napi_config* napi_config unsigned_long gro_flush_timeout u32 napi_defer_hard_irqs =================================== =========================== =================== =================== =================================================================================== diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 93241d4de437..8feaca12655e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -342,6 +342,15 @@ struct gro_list { */ #define GRO_HASH_BUCKETS 8 +/* + * Structure for per-NAPI config + */ +struct napi_config { + u64 gro_flush_timeout; + u32 defer_hard_irqs; + unsigned int napi_id; +}; + /* * Structure for NAPI scheduling similar to tasklet but with weighting */ @@ -379,6 +388,8 @@ struct napi_struct { struct list_head dev_list; struct hlist_node napi_hash_node; int irq; + int index; + struct napi_config *config; }; enum { @@ -1868,9 +1879,6 @@ enum netdev_reg_state { * allocated at register_netdev() time * @real_num_rx_queues: Number of RX queues currently active in device * @xdp_prog: XDP sockets filter program pointer - * @gro_flush_timeout: timeout for GRO layer in NAPI - * @napi_defer_hard_irqs: If not zero, provides a counter that would - * allow to avoid NIC hard IRQ, on busy queues. * * @rx_handler: handler for received packets * @rx_handler_data: XXX: need comments on this one @@ -2020,6 +2028,11 @@ enum netdev_reg_state { * where the clock is recovered. * * @max_pacing_offload_horizon: max EDT offload horizon in nsec. + * @napi_config: An array of napi_config structures containing per-NAPI + * settings. + * @gro_flush_timeout: timeout for GRO layer in NAPI + * @napi_defer_hard_irqs: If not zero, provides a counter that would + * allow to avoid NIC hard IRQ, on busy queues. * * FIXME: cleanup struct net_device such that network protocol info * moves out. @@ -2413,6 +2426,7 @@ struct net_device { struct dim_irq_moder *irq_moder; u64 max_pacing_offload_horizon; + struct napi_config *napi_config; unsigned long gro_flush_timeout; u32 napi_defer_hard_irqs; @@ -2678,6 +2692,22 @@ netif_napi_add_tx_weight(struct net_device *dev, netif_napi_add_weight(dev, napi, poll, weight); } +/** + * netif_napi_add_config - initialize a NAPI context with persistent config + * @dev: network device + * @napi: NAPI context + * @poll: polling function + * @index: the NAPI index + */ +static inline void +netif_napi_add_config(struct net_device *dev, struct napi_struct *napi, + int (*poll)(struct napi_struct *, int), int index) +{ + napi->index = index; + napi->config = &dev->napi_config[index]; + netif_napi_add_weight(dev, napi, poll, NAPI_POLL_WEIGHT); +} + /** * netif_napi_add_tx() - initialize a NAPI context to be used for Tx only * @dev: network device diff --git a/net/core/dev.c b/net/core/dev.c index e21ace3551d5..c682173a7642 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6505,6 +6505,23 @@ EXPORT_SYMBOL(napi_busy_loop); #endif /* CONFIG_NET_RX_BUSY_POLL */ +static void __napi_hash_add_with_id(struct napi_struct *napi, + unsigned int napi_id) +{ + napi->napi_id = napi_id; + hlist_add_head_rcu(&napi->napi_hash_node, + &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]); +} + +static void napi_hash_add_with_id(struct napi_struct *napi, + unsigned int napi_id) +{ + spin_lock(&napi_hash_lock); + WARN_ON_ONCE(napi_by_id(napi_id)); + __napi_hash_add_with_id(napi, napi_id); + spin_unlock(&napi_hash_lock); +} + static void napi_hash_add(struct napi_struct *napi) { if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state)) @@ -6517,10 +6534,8 @@ static void napi_hash_add(struct napi_struct *napi) if (unlikely(++napi_gen_id < MIN_NAPI_ID)) napi_gen_id = MIN_NAPI_ID; } while (napi_by_id(napi_gen_id)); - napi->napi_id = napi_gen_id; - hlist_add_head_rcu(&napi->napi_hash_node, - &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]); + __napi_hash_add_with_id(napi, napi_gen_id); spin_unlock(&napi_hash_lock); } @@ -6643,6 +6658,28 @@ void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index, } EXPORT_SYMBOL(netif_queue_set_napi); +static void napi_restore_config(struct napi_struct *n) +{ + n->defer_hard_irqs = n->config->defer_hard_irqs; + n->gro_flush_timeout = n->config->gro_flush_timeout; + /* a NAPI ID might be stored in the config, if so use it. if not, use + * napi_hash_add to generate one for us. It will be saved to the config + * in napi_disable. + */ + if (n->config->napi_id) + napi_hash_add_with_id(n, n->config->napi_id); + else + napi_hash_add(n); +} + +static void napi_save_config(struct napi_struct *n) +{ + n->config->defer_hard_irqs = n->defer_hard_irqs; + n->config->gro_flush_timeout = n->gro_flush_timeout; + n->config->napi_id = n->napi_id; + napi_hash_del(n); +} + void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight) { @@ -6653,8 +6690,6 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, INIT_HLIST_NODE(&napi->napi_hash_node); hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); napi->timer.function = napi_watchdog; - napi_set_defer_hard_irqs(napi, READ_ONCE(dev->napi_defer_hard_irqs)); - napi_set_gro_flush_timeout(napi, READ_ONCE(dev->gro_flush_timeout)); init_gro_hash(napi); napi->skb = NULL; INIT_LIST_HEAD(&napi->rx_list); @@ -6672,7 +6707,13 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, set_bit(NAPI_STATE_SCHED, &napi->state); set_bit(NAPI_STATE_NPSVC, &napi->state); list_add_rcu(&napi->dev_list, &dev->napi_list); - napi_hash_add(napi); + + /* default settings from sysfs are applied to all NAPIs. any per-NAPI + * configuration will be loaded in napi_enable + */ + napi_set_defer_hard_irqs(napi, READ_ONCE(dev->napi_defer_hard_irqs)); + napi_set_gro_flush_timeout(napi, READ_ONCE(dev->gro_flush_timeout)); + napi_get_frags_check(napi); /* Create kthread for this napi if dev->threaded is set. * Clear dev->threaded if kthread creation failed so that @@ -6704,6 +6745,11 @@ void napi_disable(struct napi_struct *n) hrtimer_cancel(&n->timer); + if (n->config) + napi_save_config(n); + else + napi_hash_del(n); + clear_bit(NAPI_STATE_DISABLE, &n->state); } EXPORT_SYMBOL(napi_disable); @@ -6719,6 +6765,11 @@ void napi_enable(struct napi_struct *n) { unsigned long new, val = READ_ONCE(n->state); + if (n->config) + napi_restore_config(n); + else + napi_hash_add(n); + do { BUG_ON(!test_bit(NAPI_STATE_SCHED, &val)); @@ -6748,7 +6799,11 @@ void __netif_napi_del(struct napi_struct *napi) if (!test_and_clear_bit(NAPI_STATE_LISTED, &napi->state)) return; - napi_hash_del(napi); + if (napi->config) { + napi->index = -1; + napi->config = NULL; + } + list_del_rcu(&napi->dev_list); napi_free_frags(napi); @@ -11085,6 +11140,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, unsigned int txqs, unsigned int rxqs) { struct net_device *dev; + size_t napi_config_sz; + unsigned int maxqs; BUG_ON(strlen(name) >= sizeof(dev->name)); @@ -11098,6 +11155,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, return NULL; } + maxqs = max(txqs, rxqs); + dev = kvzalloc(struct_size(dev, priv, sizeof_priv), GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL); if (!dev) @@ -11174,6 +11233,11 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, if (!dev->ethtool) goto free_all; + napi_config_sz = array_size(maxqs, sizeof(*dev->napi_config)); + dev->napi_config = kvzalloc(napi_config_sz, GFP_KERNEL_ACCOUNT); + if (!dev->napi_config) + goto free_all; + strscpy(dev->name, name); dev->name_assign_type = name_assign_type; dev->group = INIT_NETDEV_GROUP; @@ -11237,6 +11301,8 @@ void free_netdev(struct net_device *dev) list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) netif_napi_del(p); + kvfree(dev->napi_config); + ref_tracker_dir_exit(&dev->refcnt_tracker); #ifdef CONFIG_PCPU_DEV_REFCNT free_percpu(dev->pcpu_refcnt); diff --git a/net/core/dev.h b/net/core/dev.h index 7d0aab7e3ef1..7881bced70a9 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -177,11 +177,17 @@ static inline void napi_set_defer_hard_irqs(struct napi_struct *n, u32 defer) static inline void netdev_set_defer_hard_irqs(struct net_device *netdev, u32 defer) { + unsigned int count = max(netdev->num_rx_queues, + netdev->num_tx_queues); struct napi_struct *napi; + int i; WRITE_ONCE(netdev->napi_defer_hard_irqs, defer); list_for_each_entry(napi, &netdev->napi_list, dev_list) napi_set_defer_hard_irqs(napi, defer); + + for (i = 0; i < count; i++) + netdev->napi_config[i].defer_hard_irqs = defer; } /** @@ -217,11 +223,17 @@ static inline void napi_set_gro_flush_timeout(struct napi_struct *n, static inline void netdev_set_gro_flush_timeout(struct net_device *netdev, unsigned long timeout) { + unsigned int count = max(netdev->num_rx_queues, + netdev->num_tx_queues); struct napi_struct *napi; + int i; WRITE_ONCE(netdev->gro_flush_timeout, timeout); list_for_each_entry(napi, &netdev->napi_list, dev_list) napi_set_gro_flush_timeout(napi, timeout); + + for (i = 0; i < count; i++) + netdev->napi_config[i].gro_flush_timeout = timeout; } int rps_cpumask_housekeeping(struct cpumask *mask); -- cgit v1.2.3 From 1287c1ae0fc227e5acef11a539eb4e75646e31c7 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Fri, 11 Oct 2024 18:45:01 +0000 Subject: netdev-genl: Support setting per-NAPI config values Add support to set per-NAPI defer_hard_irqs and gro_flush_timeout. Signed-off-by: Joe Damato Reviewed-by: Jakub Kicinski Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241011184527.16393-7-jdamato@fastly.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/netdev.yaml | 11 ++++++++ include/uapi/linux/netdev.h | 1 + net/core/netdev-genl-gen.c | 18 +++++++++++++ net/core/netdev-genl-gen.h | 1 + net/core/netdev-genl.c | 45 +++++++++++++++++++++++++++++++++ tools/include/uapi/linux/netdev.h | 1 + 6 files changed, 77 insertions(+) (limited to 'include') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index 7b47454c51dd..f9cb97d6106c 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -693,6 +693,17 @@ operations: reply: attributes: - id + - + name: napi-set + doc: Set configurable NAPI instance settings. + attribute-set: napi + flags: [ admin-perm ] + do: + request: + attributes: + - id + - defer-hard-irqs + - gro-flush-timeout kernel-family: headers: [ "linux/list.h"] diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index cacd33359c76..e3ebb49f60d2 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -201,6 +201,7 @@ enum { NETDEV_CMD_NAPI_GET, NETDEV_CMD_QSTATS_GET, NETDEV_CMD_BIND_RX, + NETDEV_CMD_NAPI_SET, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index b28424ae06d5..e197bd84997c 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -22,6 +22,10 @@ static const struct netlink_range_validation netdev_a_page_pool_ifindex_range = .max = 2147483647ULL, }; +static const struct netlink_range_validation netdev_a_napi_defer_hard_irqs_range = { + .max = 2147483647ULL, +}; + /* Common nested types */ const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFINDEX + 1] = { [NETDEV_A_PAGE_POOL_ID] = NLA_POLICY_FULL_RANGE(NLA_UINT, &netdev_a_page_pool_id_range), @@ -87,6 +91,13 @@ static const struct nla_policy netdev_bind_rx_nl_policy[NETDEV_A_DMABUF_FD + 1] [NETDEV_A_DMABUF_QUEUES] = NLA_POLICY_NESTED(netdev_queue_id_nl_policy), }; +/* NETDEV_CMD_NAPI_SET - do */ +static const struct nla_policy netdev_napi_set_nl_policy[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT + 1] = { + [NETDEV_A_NAPI_ID] = { .type = NLA_U32, }, + [NETDEV_A_NAPI_DEFER_HARD_IRQS] = NLA_POLICY_FULL_RANGE(NLA_U32, &netdev_a_napi_defer_hard_irqs_range), + [NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT] = { .type = NLA_UINT, }, +}; + /* Ops table for netdev */ static const struct genl_split_ops netdev_nl_ops[] = { { @@ -171,6 +182,13 @@ static const struct genl_split_ops netdev_nl_ops[] = { .maxattr = NETDEV_A_DMABUF_FD, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, + { + .cmd = NETDEV_CMD_NAPI_SET, + .doit = netdev_nl_napi_set_doit, + .policy = netdev_napi_set_nl_policy, + .maxattr = NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, }; static const struct genl_multicast_group netdev_nl_mcgrps[] = { diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h index 8cda334fd042..e09dd7539ff2 100644 --- a/net/core/netdev-genl-gen.h +++ b/net/core/netdev-genl-gen.h @@ -33,6 +33,7 @@ int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); int netdev_nl_qstats_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info); +int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info); enum { NETDEV_NLGRP_MGMT, diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index ac19f2e6cfbe..b49c3b4e5fbe 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -303,6 +303,51 @@ int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) return err; } +static int +netdev_nl_napi_set_config(struct napi_struct *napi, struct genl_info *info) +{ + u64 gro_flush_timeout = 0; + u32 defer = 0; + + if (info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]) { + defer = nla_get_u32(info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]); + napi_set_defer_hard_irqs(napi, defer); + } + + if (info->attrs[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT]) { + gro_flush_timeout = nla_get_uint(info->attrs[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT]); + napi_set_gro_flush_timeout(napi, gro_flush_timeout); + } + + return 0; +} + +int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct napi_struct *napi; + unsigned int napi_id; + int err; + + if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_NAPI_ID)) + return -EINVAL; + + napi_id = nla_get_u32(info->attrs[NETDEV_A_NAPI_ID]); + + rtnl_lock(); + + napi = napi_by_id(napi_id); + if (napi) { + err = netdev_nl_napi_set_config(napi, info); + } else { + NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_NAPI_ID]); + err = -ENOENT; + } + + rtnl_unlock(); + + return err; +} + static int netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev, u32 q_idx, u32 q_type, const struct genl_info *info) diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index cacd33359c76..e3ebb49f60d2 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -201,6 +201,7 @@ enum { NETDEV_CMD_NAPI_GET, NETDEV_CMD_QSTATS_GET, NETDEV_CMD_BIND_RX, + NETDEV_CMD_NAPI_SET, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) -- cgit v1.2.3 From 4971266e1595f76be3f844c834c1f9357a97dbde Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 10 Oct 2024 16:25:03 -0700 Subject: bpf: Add kmem_cache iterator The new "kmem_cache" iterator will traverse the list of slab caches and call attached BPF programs for each entry. It should check the argument (ctx.s) if it's NULL before using it. Now the iteration grabs the slab_mutex only if it traverse the list and releases the mutex when it runs the BPF program. The kmem_cache entry is protected by a refcount during the execution. Signed-off-by: Namhyung Kim Acked-by: Vlastimil Babka #slab Link: https://lore.kernel.org/r/20241010232505.1339892-2-namhyung@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/btf_ids.h | 1 + kernel/bpf/Makefile | 1 + kernel/bpf/kmem_cache_iter.c | 175 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 177 insertions(+) create mode 100644 kernel/bpf/kmem_cache_iter.c (limited to 'include') diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index c0e3e1426a82..139bdececdcf 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -283,5 +283,6 @@ extern u32 btf_tracing_ids[]; extern u32 bpf_cgroup_btf_id[]; extern u32 bpf_local_storage_map_btf_id[]; extern u32 btf_bpf_map_id[]; +extern u32 bpf_kmem_cache_btf_id[]; #endif diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 9b9c151b5c82..105328f0b9c0 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -52,3 +52,4 @@ obj-$(CONFIG_BPF_PRELOAD) += preload/ obj-$(CONFIG_BPF_SYSCALL) += relo_core.o obj-$(CONFIG_BPF_SYSCALL) += btf_iter.o obj-$(CONFIG_BPF_SYSCALL) += btf_relocate.o +obj-$(CONFIG_BPF_SYSCALL) += kmem_cache_iter.o diff --git a/kernel/bpf/kmem_cache_iter.c b/kernel/bpf/kmem_cache_iter.c new file mode 100644 index 000000000000..ebc101d7da51 --- /dev/null +++ b/kernel/bpf/kmem_cache_iter.c @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2024 Google */ +#include +#include +#include +#include +#include + +#include "../../mm/slab.h" /* kmem_cache, slab_caches and slab_mutex */ + +struct bpf_iter__kmem_cache { + __bpf_md_ptr(struct bpf_iter_meta *, meta); + __bpf_md_ptr(struct kmem_cache *, s); +}; + +static void *kmem_cache_iter_seq_start(struct seq_file *seq, loff_t *pos) +{ + loff_t cnt = 0; + bool found = false; + struct kmem_cache *s; + + mutex_lock(&slab_mutex); + + /* Find an entry at the given position in the slab_caches list instead + * of keeping a reference (of the last visited entry, if any) out of + * slab_mutex. It might miss something if one is deleted in the middle + * while it releases the lock. But it should be rare and there's not + * much we can do about it. + */ + list_for_each_entry(s, &slab_caches, list) { + if (cnt == *pos) { + /* Make sure this entry remains in the list by getting + * a new reference count. Note that boot_cache entries + * have a negative refcount, so don't touch them. + */ + if (s->refcount > 0) + s->refcount++; + found = true; + break; + } + cnt++; + } + mutex_unlock(&slab_mutex); + + if (!found) + return NULL; + + return s; +} + +static void kmem_cache_iter_seq_stop(struct seq_file *seq, void *v) +{ + struct bpf_iter_meta meta; + struct bpf_iter__kmem_cache ctx = { + .meta = &meta, + .s = v, + }; + struct bpf_prog *prog; + bool destroy = false; + + meta.seq = seq; + prog = bpf_iter_get_info(&meta, true); + if (prog && !ctx.s) + bpf_iter_run_prog(prog, &ctx); + + if (ctx.s == NULL) + return; + + mutex_lock(&slab_mutex); + + /* Skip kmem_cache_destroy() for active entries */ + if (ctx.s->refcount > 1) + ctx.s->refcount--; + else if (ctx.s->refcount == 1) + destroy = true; + + mutex_unlock(&slab_mutex); + + if (destroy) + kmem_cache_destroy(ctx.s); +} + +static void *kmem_cache_iter_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct kmem_cache *s = v; + struct kmem_cache *next = NULL; + bool destroy = false; + + ++*pos; + + mutex_lock(&slab_mutex); + + if (list_last_entry(&slab_caches, struct kmem_cache, list) != s) { + next = list_next_entry(s, list); + + WARN_ON_ONCE(next->refcount == 0); + + /* boot_caches have negative refcount, don't touch them */ + if (next->refcount > 0) + next->refcount++; + } + + /* Skip kmem_cache_destroy() for active entries */ + if (s->refcount > 1) + s->refcount--; + else if (s->refcount == 1) + destroy = true; + + mutex_unlock(&slab_mutex); + + if (destroy) + kmem_cache_destroy(s); + + return next; +} + +static int kmem_cache_iter_seq_show(struct seq_file *seq, void *v) +{ + struct bpf_iter_meta meta; + struct bpf_iter__kmem_cache ctx = { + .meta = &meta, + .s = v, + }; + struct bpf_prog *prog; + int ret = 0; + + meta.seq = seq; + prog = bpf_iter_get_info(&meta, false); + if (prog) + ret = bpf_iter_run_prog(prog, &ctx); + + return ret; +} + +static const struct seq_operations kmem_cache_iter_seq_ops = { + .start = kmem_cache_iter_seq_start, + .next = kmem_cache_iter_seq_next, + .stop = kmem_cache_iter_seq_stop, + .show = kmem_cache_iter_seq_show, +}; + +BTF_ID_LIST_GLOBAL_SINGLE(bpf_kmem_cache_btf_id, struct, kmem_cache) + +static const struct bpf_iter_seq_info kmem_cache_iter_seq_info = { + .seq_ops = &kmem_cache_iter_seq_ops, +}; + +static void bpf_iter_kmem_cache_show_fdinfo(const struct bpf_iter_aux_info *aux, + struct seq_file *seq) +{ + seq_puts(seq, "kmem_cache iter\n"); +} + +DEFINE_BPF_ITER_FUNC(kmem_cache, struct bpf_iter_meta *meta, + struct kmem_cache *s) + +static struct bpf_iter_reg bpf_kmem_cache_reg_info = { + .target = "kmem_cache", + .feature = BPF_ITER_RESCHED, + .show_fdinfo = bpf_iter_kmem_cache_show_fdinfo, + .ctx_arg_info_size = 1, + .ctx_arg_info = { + { offsetof(struct bpf_iter__kmem_cache, s), + PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED }, + }, + .seq_info = &kmem_cache_iter_seq_info, +}; + +static int __init bpf_kmem_cache_iter_init(void) +{ + bpf_kmem_cache_reg_info.ctx_arg_info[0].btf_id = bpf_kmem_cache_btf_id[0]; + return bpf_iter_reg_target(&bpf_kmem_cache_reg_info); +} + +late_initcall(bpf_kmem_cache_iter_init); -- cgit v1.2.3 From 6632863226d88383c9e2bedfeeb928ac7f8232b9 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 9 Oct 2024 13:18:08 +0800 Subject: iommu: Remove iommu_present() The last callsite of iommu_present() is removed by commit <45c690aea8ee> ("drm/tegra: Use iommu_paging_domain_alloc()"). Remove it to avoid dead code. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Yi Liu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20241009051808.29455-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 25 ------------------------- include/linux/iommu.h | 6 ------ 2 files changed, 31 deletions(-) (limited to 'include') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index ce72edd34f07..4c29a4bc368b 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1840,31 +1840,6 @@ int bus_iommu_probe(const struct bus_type *bus) return 0; } -/** - * iommu_present() - make platform-specific assumptions about an IOMMU - * @bus: bus to check - * - * Do not use this function. You want device_iommu_mapped() instead. - * - * Return: true if some IOMMU is present and aware of devices on the given bus; - * in general it may not be the only IOMMU, and it may not have anything to do - * with whatever device you are ultimately interested in. - */ -bool iommu_present(const struct bus_type *bus) -{ - bool ret = false; - - for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) { - if (iommu_buses[i] == bus) { - spin_lock(&iommu_device_lock); - ret = !list_empty(&iommu_device_list); - spin_unlock(&iommu_device_lock); - } - } - return ret; -} -EXPORT_SYMBOL_GPL(iommu_present); - /** * device_iommu_capable() - check for a general IOMMU capability * @dev: device to which the capability would be relevant, if available diff --git a/include/linux/iommu.h b/include/linux/iommu.h index bd722f473635..62d1b85c80d3 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -785,7 +785,6 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather) } extern int bus_iommu_probe(const struct bus_type *bus); -extern bool iommu_present(const struct bus_type *bus); extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap); extern bool iommu_group_has_isolated_msi(struct iommu_group *group); extern struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus); @@ -1081,11 +1080,6 @@ struct iommu_iotlb_gather {}; struct iommu_dirty_bitmap {}; struct iommu_dirty_ops {}; -static inline bool iommu_present(const struct bus_type *bus) -{ - return false; -} - static inline bool device_iommu_capable(struct device *dev, enum iommu_cap cap) { return false; -- cgit v1.2.3 From a274465cc3bef2dfd9c9ea5100848dda0a8641e1 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Thu, 10 Oct 2024 13:54:19 +0100 Subject: net: phy: support 'active-high' property for PHY LEDs In addition to 'active-low' and 'inactive-high-impedance' also support 'active-high' property for PHY LED pin configuration. As only either 'active-high' or 'active-low' can be set at the same time, WARN and return an error in case both are set. Signed-off-by: Daniel Golle Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/91598487773d768f254d5faf06cf65b13e972f0e.1728558223.git.daniel@makrotopia.org Signed-off-by: Paolo Abeni --- drivers/net/phy/phy_device.c | 6 ++++++ include/linux/phy.h | 5 +++-- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 94da999b6fde..563497a3274c 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -3358,11 +3358,17 @@ static int of_phy_led(struct phy_device *phydev, if (index > U8_MAX) return -EINVAL; + if (of_property_read_bool(led, "active-high")) + set_bit(PHY_LED_ACTIVE_HIGH, &modes); if (of_property_read_bool(led, "active-low")) set_bit(PHY_LED_ACTIVE_LOW, &modes); if (of_property_read_bool(led, "inactive-high-impedance")) set_bit(PHY_LED_INACTIVE_HIGH_IMPEDANCE, &modes); + if (WARN_ON(modes & BIT(PHY_LED_ACTIVE_LOW) && + modes & BIT(PHY_LED_ACTIVE_HIGH))) + return -EINVAL; + if (modes) { /* Return error if asked to set polarity modes but not supported */ if (!phydev->drv->led_polarity_set) diff --git a/include/linux/phy.h b/include/linux/phy.h index ff762a3d8270..bf0eb4e5d35c 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -877,8 +877,9 @@ struct phy_plca_status { /* Modes for PHY LED configuration */ enum phy_led_modes { - PHY_LED_ACTIVE_LOW = 0, - PHY_LED_INACTIVE_HIGH_IMPEDANCE = 1, + PHY_LED_ACTIVE_HIGH = 0, + PHY_LED_ACTIVE_LOW = 1, + PHY_LED_INACTIVE_HIGH_IMPEDANCE = 2, /* keep it last */ __PHY_LED_MODES_NUM, -- cgit v1.2.3 From f68303cf1cf2fb96e20df5499c194f1fe5bab9e2 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Thu, 10 Oct 2024 11:27:14 -0500 Subject: PCI: Constify pci_register_io_range() fwnode_handle pci_register_io_range() does not modify the passed in fwnode_handle, so make it const. Acked-by: Bjorn Helgaas Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20241010-dt-const-v1-1-87a51f558425@kernel.org Signed-off-by: Rob Herring (Arm) --- drivers/pci/pci.c | 2 +- include/linux/pci.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 7d85c04fbba2..4b102bd1cfea 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -4163,7 +4163,7 @@ EXPORT_SYMBOL(pci_request_regions_exclusive); * Record the PCI IO range (expressed as CPU physical address + size). * Return a negative value if an error has occurred, zero otherwise */ -int pci_register_io_range(struct fwnode_handle *fwnode, phys_addr_t addr, +int pci_register_io_range(const struct fwnode_handle *fwnode, phys_addr_t addr, resource_size_t size) { int ret = 0; diff --git a/include/linux/pci.h b/include/linux/pci.h index 573b4c4c2be6..733ff6570e2d 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1556,7 +1556,7 @@ int __must_check pci_bus_alloc_resource(struct pci_bus *bus, void *alignf_data); -int pci_register_io_range(struct fwnode_handle *fwnode, phys_addr_t addr, +int pci_register_io_range(const struct fwnode_handle *fwnode, phys_addr_t addr, resource_size_t size); unsigned long pci_address_to_pio(phys_addr_t addr); phys_addr_t pci_pio_to_address(unsigned long pio); @@ -2019,7 +2019,7 @@ static inline int pci_request_regions(struct pci_dev *dev, const char *res_name) { return -EIO; } static inline void pci_release_regions(struct pci_dev *dev) { } -static inline int pci_register_io_range(struct fwnode_handle *fwnode, +static inline int pci_register_io_range(const struct fwnode_handle *fwnode, phys_addr_t addr, resource_size_t size) { return -EINVAL; } -- cgit v1.2.3 From ec8c2329da1a8695b3fc80ba67cad9dc75d9a3ec Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Thu, 10 Oct 2024 11:27:16 -0500 Subject: of: Constify struct device_node function arguments Functions which don't change the refcount or otherwise modify struct device_node can make struct device_node const. Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20241010-dt-const-v1-3-87a51f558425@kernel.org Signed-off-by: Rob Herring (Arm) --- drivers/of/address.c | 10 +++++----- drivers/of/base.c | 8 ++++---- drivers/of/cpu.c | 2 +- drivers/of/irq.c | 4 ++-- drivers/of/of_private.h | 2 +- drivers/of/overlay.c | 14 +++++++------- drivers/of/resolver.c | 4 ++-- include/linux/of.h | 14 +++++++------- include/linux/of_address.h | 4 ++-- include/linux/of_irq.h | 4 ++-- 10 files changed, 33 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/drivers/of/address.c b/drivers/of/address.c index 286f0c161e33..aa1a4e381461 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -147,7 +147,7 @@ static unsigned int of_bus_pci_get_flags(const __be32 *addr) * PCI bus specific translator */ -static bool of_node_is_pcie(struct device_node *np) +static bool of_node_is_pcie(const struct device_node *np) { bool is_pcie = of_node_name_eq(np, "pcie"); @@ -230,8 +230,8 @@ static int __of_address_resource_bounds(struct resource *r, u64 start, u64 size) * To guard against that we try to register the IO range first. * If that fails we know that pci_address_to_pio() will do too. */ -int of_pci_range_to_resource(struct of_pci_range *range, - struct device_node *np, struct resource *res) +int of_pci_range_to_resource(const struct of_pci_range *range, + const struct device_node *np, struct resource *res) { u64 start; int err; @@ -399,7 +399,7 @@ static struct of_bus *of_match_bus(struct device_node *np) return NULL; } -static int of_empty_ranges_quirk(struct device_node *np) +static int of_empty_ranges_quirk(const struct device_node *np) { if (IS_ENABLED(CONFIG_PPC)) { /* To save cycles, we cache the result for global "Mac" setting */ @@ -1030,7 +1030,7 @@ EXPORT_SYMBOL_GPL(of_dma_is_coherent); * This is currently only enabled on builds that support Apple ARM devices, as * an optimization. */ -static bool of_mmio_is_nonposted(struct device_node *np) +static bool of_mmio_is_nonposted(const struct device_node *np) { if (!IS_ENABLED(CONFIG_ARCH_APPLE)) return false; diff --git a/drivers/of/base.c b/drivers/of/base.c index 20603d3c9931..d1aebb979522 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -771,7 +771,7 @@ struct device_node *of_get_child_by_name(const struct device_node *node, } EXPORT_SYMBOL(of_get_child_by_name); -struct device_node *__of_find_node_by_path(struct device_node *parent, +struct device_node *__of_find_node_by_path(const struct device_node *parent, const char *path) { struct device_node *child; @@ -1840,7 +1840,7 @@ void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)) * * Return: The alias id if found. */ -int of_alias_get_id(struct device_node *np, const char *stem) +int of_alias_get_id(const struct device_node *np, const char *stem) { struct alias_prop *app; int id = -ENODEV; @@ -1898,7 +1898,7 @@ EXPORT_SYMBOL_GPL(of_alias_get_highest_id); * * Return: TRUE if console successfully setup. Otherwise return FALSE. */ -bool of_console_check(struct device_node *dn, char *name, int index) +bool of_console_check(const struct device_node *dn, char *name, int index) { if (!dn || dn != of_stdout || console_set_on_cmdline) return false; @@ -1986,7 +1986,7 @@ int of_find_last_cache_level(unsigned int cpu) * * Return: 0 on success or a standard error code on failure. */ -int of_map_id(struct device_node *np, u32 id, +int of_map_id(const struct device_node *np, u32 id, const char *map_name, const char *map_mask_name, struct device_node **target, u32 *id_out) { diff --git a/drivers/of/cpu.c b/drivers/of/cpu.c index d17b2f851082..5214dc3d05ae 100644 --- a/drivers/of/cpu.c +++ b/drivers/of/cpu.c @@ -188,7 +188,7 @@ EXPORT_SYMBOL(of_cpu_node_to_id); * Return: An idle state node if found at @index. The refcount is incremented * for it, so call of_node_put() on it when done. Returns NULL if not found. */ -struct device_node *of_get_cpu_state_node(struct device_node *cpu_node, +struct device_node *of_get_cpu_state_node(const struct device_node *cpu_node, int index) { struct of_phandle_args args; diff --git a/drivers/of/irq.c b/drivers/of/irq.c index a494f56a0d0e..67fc0ceaa5f5 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -720,7 +720,7 @@ struct irq_domain *of_msi_map_get_device_domain(struct device *dev, u32 id, * Returns: the MSI domain for this device (or NULL on failure). */ struct irq_domain *of_msi_get_domain(struct device *dev, - struct device_node *np, + const struct device_node *np, enum irq_domain_bus_token token) { struct of_phandle_iterator it; @@ -742,7 +742,7 @@ EXPORT_SYMBOL_GPL(of_msi_get_domain); * @dev: device structure to associate with an MSI irq domain * @np: device node for that device */ -void of_msi_configure(struct device *dev, struct device_node *np) +void of_msi_configure(struct device *dev, const struct device_node *np) { dev_set_msi_domain(dev, of_msi_get_domain(dev, np, DOMAIN_BUS_PLATFORM_MSI)); diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h index 04aa2a91f851..d957cc6ce437 100644 --- a/drivers/of/of_private.h +++ b/drivers/of/of_private.h @@ -127,7 +127,7 @@ void __of_prop_free(struct property *prop); struct device_node *__of_node_dup(const struct device_node *np, const char *full_name); -struct device_node *__of_find_node_by_path(struct device_node *parent, +struct device_node *__of_find_node_by_path(const struct device_node *parent, const char *path); struct device_node *__of_find_node_by_full_path(struct device_node *node, const char *path); diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c index cbdecccca097..19aaa96ee817 100644 --- a/drivers/of/overlay.c +++ b/drivers/of/overlay.c @@ -398,7 +398,7 @@ static int add_changeset_property(struct overlay_changeset *ovcs, * invalid @overlay. */ static int add_changeset_node(struct overlay_changeset *ovcs, - struct target *target, struct device_node *node) + struct target *target, const struct device_node *node) { const char *node_kbasename; const __be32 *phandle; @@ -675,8 +675,8 @@ static int build_changeset(struct overlay_changeset *ovcs) * 1) "target" property containing the phandle of the target * 2) "target-path" property containing the path of the target */ -static struct device_node *find_target(struct device_node *info_node, - struct device_node *target_base) +static struct device_node *find_target(const struct device_node *info_node, + const struct device_node *target_base) { struct device_node *node; char *target_path; @@ -735,7 +735,7 @@ static struct device_node *find_target(struct device_node *info_node, * init_overlay_changeset() must call free_overlay_changeset(). */ static int init_overlay_changeset(struct overlay_changeset *ovcs, - struct device_node *target_base) + const struct device_node *target_base) { struct device_node *node, *overlay_node; struct fragment *fragment; @@ -910,7 +910,7 @@ static void free_overlay_changeset(struct overlay_changeset *ovcs) */ static int of_overlay_apply(struct overlay_changeset *ovcs, - struct device_node *base) + const struct device_node *base) { int ret = 0, ret_revert, ret_tmp; @@ -978,7 +978,7 @@ out: */ int of_overlay_fdt_apply(const void *overlay_fdt, u32 overlay_fdt_size, - int *ret_ovcs_id, struct device_node *base) + int *ret_ovcs_id, const struct device_node *base) { void *new_fdt; void *new_fdt_align; @@ -1074,7 +1074,7 @@ EXPORT_SYMBOL_GPL(of_overlay_fdt_apply); * * Returns 1 if @np is @tree or is contained in @tree, else 0 */ -static int find_node(struct device_node *tree, struct device_node *np) +static int find_node(const struct device_node *tree, struct device_node *np) { if (tree == np) return 1; diff --git a/drivers/of/resolver.c b/drivers/of/resolver.c index 5cf96776dd7d..ee7769525bb2 100644 --- a/drivers/of/resolver.c +++ b/drivers/of/resolver.c @@ -147,8 +147,8 @@ static int node_name_cmp(const struct device_node *dn1, * of offsets of the phandle reference(s) within the respective property * value(s). The values at these offsets will be fixed up. */ -static int adjust_local_phandle_references(struct device_node *local_fixups, - struct device_node *overlay, int phandle_delta) +static int adjust_local_phandle_references(const struct device_node *local_fixups, + const struct device_node *overlay, int phandle_delta) { struct device_node *overlay_child; struct property *prop_fix, *prop; diff --git a/include/linux/of.h b/include/linux/of.h index 85b60ac9eec5..7875b308f13c 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -357,7 +357,7 @@ extern struct device_node *of_get_cpu_node(int cpu, unsigned int *thread); extern struct device_node *of_cpu_device_node_get(int cpu); extern int of_cpu_node_to_id(struct device_node *np); extern struct device_node *of_get_next_cpu_node(struct device_node *prev); -extern struct device_node *of_get_cpu_state_node(struct device_node *cpu_node, +extern struct device_node *of_get_cpu_state_node(const struct device_node *cpu_node, int index); extern u64 of_get_cpu_hwid(struct device_node *cpun, unsigned int thread); @@ -395,7 +395,7 @@ extern int of_phandle_iterator_args(struct of_phandle_iterator *it, int size); extern void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)); -extern int of_alias_get_id(struct device_node *np, const char *stem); +extern int of_alias_get_id(const struct device_node *np, const char *stem); extern int of_alias_get_highest_id(const char *stem); bool of_machine_compatible_match(const char *const *compats); @@ -446,9 +446,9 @@ const __be32 *of_prop_next_u32(struct property *prop, const __be32 *cur, */ const char *of_prop_next_string(struct property *prop, const char *cur); -bool of_console_check(struct device_node *dn, char *name, int index); +bool of_console_check(const struct device_node *dn, char *name, int index); -int of_map_id(struct device_node *np, u32 id, +int of_map_id(const struct device_node *np, u32 id, const char *map_name, const char *map_mask_name, struct device_node **target, u32 *id_out); @@ -871,7 +871,7 @@ static inline void of_property_clear_flag(struct property *p, unsigned long flag { } -static inline int of_map_id(struct device_node *np, u32 id, +static inline int of_map_id(const struct device_node *np, u32 id, const char *map_name, const char *map_mask_name, struct device_node **target, u32 *id_out) { @@ -1734,7 +1734,7 @@ struct of_overlay_notify_data { #ifdef CONFIG_OF_OVERLAY int of_overlay_fdt_apply(const void *overlay_fdt, u32 overlay_fdt_size, - int *ovcs_id, struct device_node *target_base); + int *ovcs_id, const struct device_node *target_base); int of_overlay_remove(int *ovcs_id); int of_overlay_remove_all(void); @@ -1744,7 +1744,7 @@ int of_overlay_notifier_unregister(struct notifier_block *nb); #else static inline int of_overlay_fdt_apply(const void *overlay_fdt, u32 overlay_fdt_size, - int *ovcs_id, struct device_node *target_base) + int *ovcs_id, const struct device_node *target_base) { return -ENOTSUPP; } diff --git a/include/linux/of_address.h b/include/linux/of_address.h index 26a19daf0d09..bd46dbcc6e88 100644 --- a/include/linux/of_address.h +++ b/include/linux/of_address.h @@ -83,8 +83,8 @@ extern struct of_pci_range *of_pci_range_parser_one( struct of_pci_range *range); extern int of_pci_address_to_resource(struct device_node *dev, int bar, struct resource *r); -extern int of_pci_range_to_resource(struct of_pci_range *range, - struct device_node *np, +extern int of_pci_range_to_resource(const struct of_pci_range *range, + const struct device_node *np, struct resource *res); extern int of_range_to_resource(struct device_node *np, int index, struct resource *res); diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h index d6d3eae2f145..6337ad4e5fe8 100644 --- a/include/linux/of_irq.h +++ b/include/linux/of_irq.h @@ -48,12 +48,12 @@ extern int of_irq_to_resource_table(struct device_node *dev, struct resource *res, int nr_irqs); extern struct device_node *of_irq_find_parent(struct device_node *child); extern struct irq_domain *of_msi_get_domain(struct device *dev, - struct device_node *np, + const struct device_node *np, enum irq_domain_bus_token token); extern struct irq_domain *of_msi_map_get_device_domain(struct device *dev, u32 id, u32 bus_token); -extern void of_msi_configure(struct device *dev, struct device_node *np); +extern void of_msi_configure(struct device *dev, const struct device_node *np); u32 of_msi_map_id(struct device *dev, struct device_node *msi_np, u32 id_in); #else static inline void of_irq_init(const struct of_device_id *matches) -- cgit v1.2.3 From 9c63fea9acd077701fd67bbc21b1e77d6b98b7e0 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Thu, 10 Oct 2024 11:27:17 -0500 Subject: of: Constify struct property pointers Most accesses to struct property do not modify it, so constify struct property pointers where ever possible in the DT core code. Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20241010-dt-const-v1-4-87a51f558425@kernel.org Signed-off-by: Rob Herring (Arm) --- drivers/of/base.c | 12 ++++++------ drivers/of/kobj.c | 6 +++--- drivers/of/of_private.h | 10 +++++----- drivers/of/overlay.c | 5 +++-- drivers/of/property.c | 10 +++++----- drivers/of/resolver.c | 8 ++++---- include/linux/of.h | 14 +++++++------- 7 files changed, 33 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/drivers/of/base.c b/drivers/of/base.c index d1aebb979522..d94efee4a7fc 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -270,7 +270,7 @@ EXPORT_SYMBOL(of_find_all_nodes); const void *__of_get_property(const struct device_node *np, const char *name, int *lenp) { - struct property *pp = __of_find_property(np, name, lenp); + const struct property *pp = __of_find_property(np, name, lenp); return pp ? pp->value : NULL; } @@ -282,7 +282,7 @@ const void *__of_get_property(const struct device_node *np, const void *of_get_property(const struct device_node *np, const char *name, int *lenp) { - struct property *pp = of_find_property(np, name, lenp); + const struct property *pp = of_find_property(np, name, lenp); return pp ? pp->value : NULL; } @@ -321,7 +321,7 @@ EXPORT_SYMBOL(of_get_property); static int __of_device_is_compatible(const struct device_node *device, const char *compat, const char *type, const char *name) { - struct property *prop; + const struct property *prop; const char *cp; int index = 0, score = 0; @@ -828,7 +828,7 @@ struct device_node *__of_find_node_by_full_path(struct device_node *node, struct device_node *of_find_node_opts_by_path(const char *path, const char **opts) { struct device_node *np = NULL; - struct property *pp; + const struct property *pp; unsigned long flags; const char *separator = strchr(path, ':'); @@ -974,7 +974,7 @@ struct device_node *of_find_node_with_property(struct device_node *from, const char *prop_name) { struct device_node *np; - struct property *pp; + const struct property *pp; unsigned long flags; raw_spin_lock_irqsave(&devtree_lock, flags); @@ -1769,7 +1769,7 @@ static void of_alias_add(struct alias_prop *ap, struct device_node *np, */ void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)) { - struct property *pp; + const struct property *pp; of_aliases = of_find_node_by_path("/aliases"); of_chosen = of_find_node_by_path("/chosen"); diff --git a/drivers/of/kobj.c b/drivers/of/kobj.c index 3dbce1e6f184..aeb1709d4e85 100644 --- a/drivers/of/kobj.c +++ b/drivers/of/kobj.c @@ -84,7 +84,7 @@ int __of_add_property_sysfs(struct device_node *np, struct property *pp) return rc; } -void __of_sysfs_remove_bin_file(struct device_node *np, struct property *prop) +void __of_sysfs_remove_bin_file(struct device_node *np, const struct property *prop) { if (!IS_ENABLED(CONFIG_SYSFS)) return; @@ -93,7 +93,7 @@ void __of_sysfs_remove_bin_file(struct device_node *np, struct property *prop) kfree(prop->attr.attr.name); } -void __of_remove_property_sysfs(struct device_node *np, struct property *prop) +void __of_remove_property_sysfs(struct device_node *np, const struct property *prop) { /* at early boot, bail here and defer setup to of_init() */ if (of_kset && of_node_is_attached(np)) @@ -101,7 +101,7 @@ void __of_remove_property_sysfs(struct device_node *np, struct property *prop) } void __of_update_property_sysfs(struct device_node *np, struct property *newprop, - struct property *oldprop) + const struct property *oldprop) { /* At early boot, bail out and defer setup to of_init() */ if (!of_kset) diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h index d957cc6ce437..53a4a5be9997 100644 --- a/drivers/of/of_private.h +++ b/drivers/of/of_private.h @@ -69,9 +69,9 @@ static inline void of_platform_register_reconfig_notifier(void) { } #if defined(CONFIG_OF_KOBJ) int of_node_is_attached(const struct device_node *node); int __of_add_property_sysfs(struct device_node *np, struct property *pp); -void __of_remove_property_sysfs(struct device_node *np, struct property *prop); +void __of_remove_property_sysfs(struct device_node *np, const struct property *prop); void __of_update_property_sysfs(struct device_node *np, struct property *newprop, - struct property *oldprop); + const struct property *oldprop); int __of_attach_node_sysfs(struct device_node *np); void __of_detach_node_sysfs(struct device_node *np); #else @@ -79,9 +79,9 @@ static inline int __of_add_property_sysfs(struct device_node *np, struct propert { return 0; } -static inline void __of_remove_property_sysfs(struct device_node *np, struct property *prop) {} +static inline void __of_remove_property_sysfs(struct device_node *np, const struct property *prop) {} static inline void __of_update_property_sysfs(struct device_node *np, - struct property *newprop, struct property *oldprop) {} + struct property *newprop, const struct property *oldprop) {} static inline int __of_attach_node_sysfs(struct device_node *np) { return 0; @@ -142,7 +142,7 @@ extern int __of_update_property(struct device_node *np, extern void __of_detach_node(struct device_node *np); extern void __of_sysfs_remove_bin_file(struct device_node *np, - struct property *prop); + const struct property *prop); /* illegal phandle value (set when unresolved) */ #define OF_PHANDLE_ILLEGAL 0xdeadbeef diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c index 19aaa96ee817..434f6dd6a86c 100644 --- a/drivers/of/overlay.c +++ b/drivers/of/overlay.c @@ -296,10 +296,11 @@ err_free_target_path: * invalid @overlay. */ static int add_changeset_property(struct overlay_changeset *ovcs, - struct target *target, struct property *overlay_prop, + struct target *target, const struct property *overlay_prop, bool is_symbols_prop) { - struct property *new_prop = NULL, *prop; + struct property *new_prop = NULL; + const struct property *prop; int ret = 0; if (target->in_livetree) diff --git a/drivers/of/property.c b/drivers/of/property.c index 11b922fde7af..33dd72cad4ab 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -68,7 +68,7 @@ EXPORT_SYMBOL(of_graph_is_present); int of_property_count_elems_of_size(const struct device_node *np, const char *propname, int elem_size) { - struct property *prop = of_find_property(np, propname, NULL); + const struct property *prop = of_find_property(np, propname, NULL); if (!prop) return -EINVAL; @@ -104,7 +104,7 @@ EXPORT_SYMBOL_GPL(of_property_count_elems_of_size); static void *of_find_property_value_of_size(const struct device_node *np, const char *propname, u32 min, u32 max, size_t *len) { - struct property *prop = of_find_property(np, propname, NULL); + const struct property *prop = of_find_property(np, propname, NULL); if (!prop) return ERR_PTR(-EINVAL); @@ -530,7 +530,7 @@ int of_property_read_string_helper(const struct device_node *np, } EXPORT_SYMBOL_GPL(of_property_read_string_helper); -const __be32 *of_prop_next_u32(struct property *prop, const __be32 *cur, +const __be32 *of_prop_next_u32(const struct property *prop, const __be32 *cur, u32 *pu) { const void *curv = cur; @@ -553,7 +553,7 @@ out_val: } EXPORT_SYMBOL_GPL(of_prop_next_u32); -const char *of_prop_next_string(struct property *prop, const char *cur) +const char *of_prop_next_string(const struct property *prop, const char *cur) { const void *curv = cur; @@ -1466,7 +1466,7 @@ static int of_fwnode_irq_get(const struct fwnode_handle *fwnode, static int of_fwnode_add_links(struct fwnode_handle *fwnode) { - struct property *p; + const struct property *p; struct device_node *con_np = to_of_node(fwnode); if (IS_ENABLED(CONFIG_X86)) diff --git a/drivers/of/resolver.c b/drivers/of/resolver.c index ee7769525bb2..779db058c42f 100644 --- a/drivers/of/resolver.c +++ b/drivers/of/resolver.c @@ -42,7 +42,7 @@ static void adjust_overlay_phandles(struct device_node *overlay, int phandle_delta) { struct device_node *child; - struct property *prop; + const struct property *prop; phandle phandle; /* adjust node's phandle in node */ @@ -71,10 +71,10 @@ static void adjust_overlay_phandles(struct device_node *overlay, } static int update_usages_of_a_phandle_reference(struct device_node *overlay, - struct property *prop_fixup, phandle phandle) + const struct property *prop_fixup, phandle phandle) { struct device_node *refnode; - struct property *prop; + const struct property *prop; char *value __free(kfree) = kmemdup(prop_fixup->value, prop_fixup->length, GFP_KERNEL); char *cur, *end, *node_path, *prop_name, *s; int offset, len; @@ -151,7 +151,7 @@ static int adjust_local_phandle_references(const struct device_node *local_fixup const struct device_node *overlay, int phandle_delta) { struct device_node *overlay_child; - struct property *prop_fix, *prop; + const struct property *prop_fix, *prop; int err, i, count; unsigned int off; diff --git a/include/linux/of.h b/include/linux/of.h index 7875b308f13c..086a60f3b8a6 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -435,7 +435,7 @@ extern int of_detach_node(struct device_node *); * of_property_for_each_u32(np, "propname", u) * printk("U32 value: %x\n", u); */ -const __be32 *of_prop_next_u32(struct property *prop, const __be32 *cur, +const __be32 *of_prop_next_u32(const struct property *prop, const __be32 *cur, u32 *pu); /* * struct property *prop; @@ -444,7 +444,7 @@ const __be32 *of_prop_next_u32(struct property *prop, const __be32 *cur, * of_property_for_each_string(np, "propname", prop, s) * printk("String value: %s\n", s); */ -const char *of_prop_next_string(struct property *prop, const char *cur); +const char *of_prop_next_string(const struct property *prop, const char *cur); bool of_console_check(const struct device_node *dn, char *name, int index); @@ -826,13 +826,13 @@ static inline bool of_console_check(const struct device_node *dn, const char *na return false; } -static inline const __be32 *of_prop_next_u32(struct property *prop, +static inline const __be32 *of_prop_next_u32(const struct property *prop, const __be32 *cur, u32 *pu) { return NULL; } -static inline const char *of_prop_next_string(struct property *prop, +static inline const char *of_prop_next_string(const struct property *prop, const char *cur) { return NULL; @@ -899,7 +899,7 @@ static inline const void *of_device_get_match_data(const struct device *dev) #define of_node_cmp(s1, s2) strcasecmp((s1), (s2)) #endif -static inline int of_prop_val_eq(struct property *p1, struct property *p2) +static inline int of_prop_val_eq(const struct property *p1, const struct property *p2) { return p1->length == p2->length && !memcmp(p1->value, p2->value, (size_t)p1->length); @@ -1252,7 +1252,7 @@ static inline int of_property_read_string_index(const struct device_node *np, static inline bool of_property_read_bool(const struct device_node *np, const char *propname) { - struct property *prop = of_find_property(np, propname, NULL); + const struct property *prop = of_find_property(np, propname, NULL); return prop ? true : false; } @@ -1430,7 +1430,7 @@ static inline int of_property_read_s32(const struct device_node *np, err = of_phandle_iterator_next(it)) #define of_property_for_each_u32(np, propname, u) \ - for (struct {struct property *prop; const __be32 *item; } _it = \ + for (struct {const struct property *prop; const __be32 *item; } _it = \ {of_find_property(np, propname, NULL), \ of_prop_next_u32(_it.prop, NULL, &u)}; \ _it.item; \ -- cgit v1.2.3 From d79616b04f0e08178ceb716a5d2ef60ab723d532 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Thu, 10 Oct 2024 11:27:20 -0500 Subject: of/address: Constify of_busses[] array and pointers The of_busses array is fixed, so it and all struct of_bus pointers can be const. Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20241010-dt-const-v1-7-87a51f558425@kernel.org Signed-off-by: Rob Herring (Arm) --- drivers/of/address.c | 12 ++++++------ include/linux/of_address.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/of/address.c b/drivers/of/address.c index aa1a4e381461..824bb449e007 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -340,7 +340,7 @@ static int of_bus_default_flags_match(struct device_node *np) * Array of bus specific translators */ -static struct of_bus of_busses[] = { +static const struct of_bus of_busses[] = { #ifdef CONFIG_PCI /* PCI */ { @@ -388,7 +388,7 @@ static struct of_bus of_busses[] = { }, }; -static struct of_bus *of_match_bus(struct device_node *np) +static const struct of_bus *of_match_bus(struct device_node *np) { int i; @@ -419,8 +419,8 @@ static int of_empty_ranges_quirk(const struct device_node *np) return false; } -static int of_translate_one(struct device_node *parent, struct of_bus *bus, - struct of_bus *pbus, __be32 *addr, +static int of_translate_one(const struct device_node *parent, const struct of_bus *bus, + const struct of_bus *pbus, __be32 *addr, int na, int ns, int pna, const char *rprop) { const __be32 *ranges; @@ -505,7 +505,7 @@ static u64 __of_translate_address(struct device_node *node, { struct device_node *dev __free(device_node) = of_node_get(node); struct device_node *parent __free(device_node) = get_parent(dev); - struct of_bus *bus, *pbus; + const struct of_bus *bus, *pbus; __be32 addr[OF_MAX_ADDR_CELLS]; int na, ns, pna, pns; @@ -690,7 +690,7 @@ const __be32 *__of_get_address(struct device_node *dev, int index, int bar_no, const __be32 *prop; unsigned int psize; struct device_node *parent __free(device_node) = of_get_parent(dev); - struct of_bus *bus; + const struct of_bus *bus; int onesize, i, na, ns; if (parent == NULL) diff --git a/include/linux/of_address.h b/include/linux/of_address.h index bd46dbcc6e88..9e034363788a 100644 --- a/include/linux/of_address.h +++ b/include/linux/of_address.h @@ -10,7 +10,7 @@ struct of_bus; struct of_pci_range_parser { struct device_node *node; - struct of_bus *bus; + const struct of_bus *bus; const __be32 *range; const __be32 *end; int na; -- cgit v1.2.3 From 74fe1ad4132234f04fcc75e16600449496a67b5b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 7 Oct 2024 18:50:14 +0200 Subject: debugobjects: Prepare for batching Move the debug_obj::object pointer into a union and add a pointer to the last node in a batch. That allows to implement batch processing efficiently by utilizing the stack property of hlist: When the first object of a batch is added to the list, then the batch pointer is set to the hlist node of the object itself. Any subsequent add retrieves the pointer to the last node from the first object in the list and uses that for storing the last node pointer in the newly added object. Add the pointer to the data structure and ensure that all relevant pool sizes are strictly batch sized. The actual batching implementation follows in subsequent changes. Signed-off-by: Thomas Gleixner Reviewed-by: Zhen Lei Link: https://lore.kernel.org/all/20241007164914.139204961@linutronix.de --- include/linux/debugobjects.h | 12 ++++++++---- lib/debugobjects.c | 10 +++++++--- 2 files changed, 15 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/debugobjects.h b/include/linux/debugobjects.h index 32444686b6ff..8b95545e7924 100644 --- a/include/linux/debugobjects.h +++ b/include/linux/debugobjects.h @@ -23,13 +23,17 @@ struct debug_obj_descr; * @state: tracked object state * @astate: current active state * @object: pointer to the real object + * @batch_last: pointer to the last hlist node in a batch * @descr: pointer to an object type specific debug description structure */ struct debug_obj { - struct hlist_node node; - enum debug_obj_state state; - unsigned int astate; - void *object; + struct hlist_node node; + enum debug_obj_state state; + unsigned int astate; + union { + void *object; + struct hlist_node *batch_last; + }; const struct debug_obj_descr *descr; }; diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 65cce4cd9643..2fed7b863827 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -21,11 +21,15 @@ #define ODEBUG_HASH_BITS 14 #define ODEBUG_HASH_SIZE (1 << ODEBUG_HASH_BITS) -#define ODEBUG_POOL_SIZE 1024 -#define ODEBUG_POOL_MIN_LEVEL 256 -#define ODEBUG_POOL_PERCPU_SIZE 64 +/* Must be power of two */ #define ODEBUG_BATCH_SIZE 16 +/* Initial values. Must all be a multiple of batch size */ +#define ODEBUG_POOL_SIZE (64 * ODEBUG_BATCH_SIZE) +#define ODEBUG_POOL_MIN_LEVEL (ODEBUG_POOL_SIZE / 4) + +#define ODEBUG_POOL_PERCPU_SIZE (4 * ODEBUG_BATCH_SIZE) + #define ODEBUG_CHUNK_SHIFT PAGE_SHIFT #define ODEBUG_CHUNK_SIZE (1 << ODEBUG_CHUNK_SHIFT) #define ODEBUG_CHUNK_MASK (~(ODEBUG_CHUNK_SIZE - 1)) -- cgit v1.2.3 From e4cf33ca48128d580e25ebe779b7ba7b4b4cf733 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 10 Oct 2024 20:21:14 -0400 Subject: ftrace: Consolidate ftrace_regs accessor functions for archs using pt_regs Most architectures use pt_regs within ftrace_regs making a lot of the accessor functions just calls to the pt_regs internally. Instead of duplication this effort, use a HAVE_ARCH_FTRACE_REGS for architectures that have their own ftrace_regs that is not based on pt_regs and will define all the accessor functions, and for the architectures that just use pt_regs, it will leave it undefined, and the default accessor functions will be used. Note, this will also make it easier to add new accessor functions to ftrace_regs as it will mean having to touch less architectures. Cc: Cc: "x86@kernel.org" Cc: Mathieu Desnoyers Cc: Mark Rutland Cc: Will Deacon Cc: Huacai Chen Cc: WANG Xuerui Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Christophe Leroy Cc: Naveen N Rao Cc: Madhavan Srinivasan Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Alexander Gordeev Cc: Christian Borntraeger Cc: Sven Schnelle Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Link: https://lore.kernel.org/20241010202114.2289f6fd@gandalf.local.home Acked-by: Masami Hiramatsu (Google) Acked-by: Heiko Carstens # s390 Acked-by: Catalin Marinas Acked-by: Michael Ellerman # powerpc Suggested-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- arch/arm64/include/asm/ftrace.h | 1 + arch/loongarch/include/asm/ftrace.h | 25 +------------------------ arch/powerpc/include/asm/ftrace.h | 26 +------------------------- arch/riscv/include/asm/ftrace.h | 1 + arch/s390/include/asm/ftrace.h | 26 +------------------------- arch/x86/include/asm/ftrace.h | 21 +-------------------- include/linux/ftrace.h | 32 +++++++------------------------- include/linux/ftrace_regs.h | 36 ++++++++++++++++++++++++++++++++++++ 8 files changed, 49 insertions(+), 119 deletions(-) create mode 100644 include/linux/ftrace_regs.h (limited to 'include') diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index bbb69c7751b9..5ccff4de7f09 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -54,6 +54,7 @@ extern void return_to_handler(void); unsigned long ftrace_call_adjust(unsigned long addr); #ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS +#define HAVE_ARCH_FTRACE_REGS struct dyn_ftrace; struct ftrace_ops; struct ftrace_regs; diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h index 0e15d36ce251..8f13eaeaa325 100644 --- a/arch/loongarch/include/asm/ftrace.h +++ b/arch/loongarch/include/asm/ftrace.h @@ -43,43 +43,20 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent); #ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS struct ftrace_ops; -struct ftrace_regs; -#define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs)) -struct __arch_ftrace_regs { - struct pt_regs regs; -}; +#include static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs) { return &arch_ftrace_regs(fregs)->regs; } -static __always_inline unsigned long -ftrace_regs_get_instruction_pointer(struct ftrace_regs *fregs) -{ - return instruction_pointer(&arch_ftrace_regs(fregs)->regs); -} - static __always_inline void ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, unsigned long ip) { instruction_pointer_set(&arch_ftrace_regs(fregs)->regs, ip); } -#define ftrace_regs_get_argument(fregs, n) \ - regs_get_kernel_argument(&arch_ftrace_regs(fregs)->regs, n) -#define ftrace_regs_get_stack_pointer(fregs) \ - kernel_stack_pointer(&arch_ftrace_regs(fregs)->regs) -#define ftrace_regs_return_value(fregs) \ - regs_return_value(&arch_ftrace_regs(fregs)->regs) -#define ftrace_regs_set_return_value(fregs, ret) \ - regs_set_return_value(&arch_ftrace_regs(fregs)->regs, ret) -#define ftrace_override_function_with_return(fregs) \ - override_function_with_return(&arch_ftrace_regs(fregs)->regs) -#define ftrace_regs_query_register_offset(name) \ - regs_query_register_offset(name) - #define ftrace_graph_func ftrace_graph_func void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs); diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index e299fd47d201..0edfb874eb02 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -32,12 +32,7 @@ struct dyn_arch_ftrace { int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); #define ftrace_init_nop ftrace_init_nop -struct ftrace_regs; -#define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs)) - -struct __arch_ftrace_regs { - struct pt_regs regs; -}; +#include static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs) { @@ -52,25 +47,6 @@ ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, regs_set_return_ip(&arch_ftrace_regs(fregs)->regs, ip); } -static __always_inline unsigned long -ftrace_regs_get_instruction_pointer(struct ftrace_regs *fregs) -{ - return instruction_pointer(&arch_ftrace_regs(fregs)->regs); -} - -#define ftrace_regs_get_argument(fregs, n) \ - regs_get_kernel_argument(&arch_ftrace_regs(fregs)->regs, n) -#define ftrace_regs_get_stack_pointer(fregs) \ - kernel_stack_pointer(&arch_ftrace_regs(fregs)->regs) -#define ftrace_regs_return_value(fregs) \ - regs_return_value(&arch_ftrace_regs(fregs)->regs) -#define ftrace_regs_set_return_value(fregs, ret) \ - regs_set_return_value(&arch_ftrace_regs(fregs)->regs, ret) -#define ftrace_override_function_with_return(fregs) \ - override_function_with_return(&arch_ftrace_regs(fregs)->regs) -#define ftrace_regs_query_register_offset(name) \ - regs_query_register_offset(name) - struct ftrace_ops; #define ftrace_graph_func ftrace_graph_func diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index c6bcdff105b5..3d66437a1029 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -125,6 +125,7 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); #ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS #define arch_ftrace_get_regs(regs) NULL +#define HAVE_ARCH_FTRACE_REGS struct ftrace_ops; struct ftrace_regs; #define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs)) diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index 1498d0a9c762..fc97d75dc752 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -51,12 +51,7 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) return addr; } -struct ftrace_regs; -#define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs)) - -struct __arch_ftrace_regs { - struct pt_regs regs; -}; +#include static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs) { @@ -84,12 +79,6 @@ static __always_inline unsigned long fgraph_ret_regs_frame_pointer(struct fgraph } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -static __always_inline unsigned long -ftrace_regs_get_instruction_pointer(const struct ftrace_regs *fregs) -{ - return arch_ftrace_regs(fregs)->regs.psw.addr; -} - static __always_inline void ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, unsigned long ip) @@ -97,19 +86,6 @@ ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, arch_ftrace_regs(fregs)->regs.psw.addr = ip; } -#define ftrace_regs_get_argument(fregs, n) \ - regs_get_kernel_argument(&arch_ftrace_regs(fregs)->regs, n) -#define ftrace_regs_get_stack_pointer(fregs) \ - kernel_stack_pointer(&arch_ftrace_regs(fregs)->regs) -#define ftrace_regs_return_value(fregs) \ - regs_return_value(&arch_ftrace_regs(fregs)->regs) -#define ftrace_regs_set_return_value(fregs, ret) \ - regs_set_return_value(&arch_ftrace_regs(fregs)->regs, ret) -#define ftrace_override_function_with_return(fregs) \ - override_function_with_return(&arch_ftrace_regs(fregs)->regs) -#define ftrace_regs_query_register_offset(name) \ - regs_query_register_offset(name) - #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS /* * When an ftrace registered caller is tracing a function that is diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 87943f7a299b..8f02d28c571a 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -33,12 +33,8 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) } #ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS -struct ftrace_regs; -#define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs)) -struct __arch_ftrace_regs { - struct pt_regs regs; -}; +#include static __always_inline struct pt_regs * arch_ftrace_get_regs(struct ftrace_regs *fregs) @@ -52,21 +48,6 @@ arch_ftrace_get_regs(struct ftrace_regs *fregs) #define ftrace_regs_set_instruction_pointer(fregs, _ip) \ do { arch_ftrace_regs(fregs)->regs.ip = (_ip); } while (0) -#define ftrace_regs_get_instruction_pointer(fregs) \ - arch_ftrace_regs(fregs)->regs.ip) - -#define ftrace_regs_get_argument(fregs, n) \ - regs_get_kernel_argument(&arch_ftrace_regs(fregs)->regs, n) -#define ftrace_regs_get_stack_pointer(fregs) \ - kernel_stack_pointer(&arch_ftrace_regs(fregs)->regs) -#define ftrace_regs_return_value(fregs) \ - regs_return_value(&arch_ftrace_regs(fregs)->regs) -#define ftrace_regs_set_return_value(fregs, ret) \ - regs_set_return_value(&arch_ftrace_regs(fregs)->regs, ret) -#define ftrace_override_function_with_return(fregs) \ - override_function_with_return(&arch_ftrace_regs(fregs)->regs) -#define ftrace_regs_query_register_offset(name) \ - regs_query_register_offset(name) struct ftrace_ops; #define ftrace_graph_func ftrace_graph_func diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 66f10291a0b2..aa9ddd1e4bb6 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -113,6 +113,8 @@ static inline int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *val #ifdef CONFIG_FUNCTION_TRACER +#include + extern int ftrace_enabled; /** @@ -150,14 +152,11 @@ struct ftrace_regs { #define ftrace_regs_size() sizeof(struct __arch_ftrace_regs) #ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS - -struct __arch_ftrace_regs { - struct pt_regs regs; -}; - -struct ftrace_regs; -#define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs)) - +/* + * Architectures that define HAVE_DYNAMIC_FTRACE_WITH_ARGS must define their own + * arch_ftrace_get_regs() where it only returns pt_regs *if* it is fully + * populated. It should return NULL otherwise. + */ static inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs) { return &arch_ftrace_regs(fregs)->regs; @@ -191,23 +190,6 @@ static __always_inline bool ftrace_regs_has_args(struct ftrace_regs *fregs) return ftrace_get_regs(fregs) != NULL; } -#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS -#define ftrace_regs_get_instruction_pointer(fregs) \ - instruction_pointer(ftrace_get_regs(fregs)) -#define ftrace_regs_get_argument(fregs, n) \ - regs_get_kernel_argument(ftrace_get_regs(fregs), n) -#define ftrace_regs_get_stack_pointer(fregs) \ - kernel_stack_pointer(ftrace_get_regs(fregs)) -#define ftrace_regs_return_value(fregs) \ - regs_return_value(ftrace_get_regs(fregs)) -#define ftrace_regs_set_return_value(fregs, ret) \ - regs_set_return_value(ftrace_get_regs(fregs), ret) -#define ftrace_override_function_with_return(fregs) \ - override_function_with_return(ftrace_get_regs(fregs)) -#define ftrace_regs_query_register_offset(name) \ - regs_query_register_offset(name) -#endif - typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs); diff --git a/include/linux/ftrace_regs.h b/include/linux/ftrace_regs.h new file mode 100644 index 000000000000..dea6a0851b74 --- /dev/null +++ b/include/linux/ftrace_regs.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_FTRACE_REGS_H +#define _LINUX_FTRACE_REGS_H + +/* + * For archs that just copy pt_regs in ftrace regs, it can use this default. + * If an architecture does not use pt_regs, it must define all the below + * accessor functions. + */ +#ifndef HAVE_ARCH_FTRACE_REGS +struct __arch_ftrace_regs { + struct pt_regs regs; +}; + +#define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs)) + +struct ftrace_regs; + +#define ftrace_regs_get_instruction_pointer(fregs) \ + instruction_pointer(arch_ftrace_get_regs(fregs)) +#define ftrace_regs_get_argument(fregs, n) \ + regs_get_kernel_argument(arch_ftrace_get_regs(fregs), n) +#define ftrace_regs_get_stack_pointer(fregs) \ + kernel_stack_pointer(arch_ftrace_get_regs(fregs)) +#define ftrace_regs_return_value(fregs) \ + regs_return_value(arch_ftrace_get_regs(fregs)) +#define ftrace_regs_set_return_value(fregs, ret) \ + regs_set_return_value(arch_ftrace_get_regs(fregs), ret) +#define ftrace_override_function_with_return(fregs) \ + override_function_with_return(arch_ftrace_get_regs(fregs)) +#define ftrace_regs_query_register_offset(name) \ + regs_query_register_offset(name) + +#endif /* HAVE_ARCH_FTRACE_REGS */ + +#endif /* _LINUX_FTRACE_REGS_H */ -- cgit v1.2.3 From 39c089a01a7e431383710a566864644cbbc0f8fe Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Thu, 10 Oct 2024 17:44:44 +0200 Subject: vdso: Remove timekeeper argument of __arch_update_vsyscall() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No implementation of this hook uses the passed in timekeeper anymore. This avoids including a non-VDSO header while building the VDSO, which can lead to compilation errors. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Acked-by: Will Deacon Link: https://lore.kernel.org/all/20241010-vdso-generic-arch_update_vsyscall-v1-1-7fe5a3ea4382@linutronix.de --- arch/arm64/include/asm/vdso/vsyscall.h | 3 +-- include/asm-generic/vdso/vsyscall.h | 3 +-- kernel/time/vsyscall.c | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h index 5b6d0dd3cef5..eea51946d45a 100644 --- a/arch/arm64/include/asm/vdso/vsyscall.h +++ b/arch/arm64/include/asm/vdso/vsyscall.h @@ -6,7 +6,6 @@ #ifndef __ASSEMBLY__ -#include #include enum vvar_pages { @@ -37,7 +36,7 @@ struct vdso_rng_data *__arm64_get_k_vdso_rnd_data(void) #define __arch_get_k_vdso_rng_data __arm64_get_k_vdso_rnd_data static __always_inline -void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk) +void __arm64_update_vsyscall(struct vdso_data *vdata) { vdata[CS_HRES_COARSE].mask = VDSO_PRECISION_MASK; vdata[CS_RAW].mask = VDSO_PRECISION_MASK; diff --git a/include/asm-generic/vdso/vsyscall.h b/include/asm-generic/vdso/vsyscall.h index c835607f78ae..01dafd604188 100644 --- a/include/asm-generic/vdso/vsyscall.h +++ b/include/asm-generic/vdso/vsyscall.h @@ -12,8 +12,7 @@ static __always_inline struct vdso_data *__arch_get_k_vdso_data(void) #endif /* __arch_get_k_vdso_data */ #ifndef __arch_update_vsyscall -static __always_inline void __arch_update_vsyscall(struct vdso_data *vdata, - struct timekeeper *tk) +static __always_inline void __arch_update_vsyscall(struct vdso_data *vdata) { } #endif /* __arch_update_vsyscall */ diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c index 9193d6133e5d..28706a13c222 100644 --- a/kernel/time/vsyscall.c +++ b/kernel/time/vsyscall.c @@ -119,7 +119,7 @@ void update_vsyscall(struct timekeeper *tk) if (clock_mode != VDSO_CLOCKMODE_NONE) update_vdso_data(vdata, tk); - __arch_update_vsyscall(vdata, tk); + __arch_update_vsyscall(vdata); vdso_write_end(vdata); -- cgit v1.2.3 From a066397e8ed1036e8b959050ab6e830ee90d9f58 Mon Sep 17 00:00:00 2001 From: Gregory Price Date: Fri, 13 Sep 2024 19:19:53 -0400 Subject: tpm: fix unsigned/signed mismatch errors related to __calc_tpm2_event_size __calc_tpm2_event_size returns 0 or a positive length, but return values are often interpreted as ints. Convert everything over to u32 to avoid signed/unsigned logic errors. Signed-off-by: Gregory Price Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/tpm.c | 6 +++--- drivers/firmware/efi/tpm.c | 2 +- include/linux/tpm_eventlog.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/firmware/efi/libstub/tpm.c b/drivers/firmware/efi/libstub/tpm.c index 1fd6823248ab..d31ea3f351d8 100644 --- a/drivers/firmware/efi/libstub/tpm.c +++ b/drivers/firmware/efi/libstub/tpm.c @@ -57,7 +57,7 @@ static void efi_retrieve_tcg2_eventlog(int version, efi_physical_addr_t log_loca struct linux_efi_tpm_eventlog *log_tbl = NULL; unsigned long first_entry_addr, last_entry_addr; size_t log_size, last_entry_size; - int final_events_size = 0; + u32 final_events_size = 0; first_entry_addr = (unsigned long) log_location; @@ -110,9 +110,9 @@ static void efi_retrieve_tcg2_eventlog(int version, efi_physical_addr_t log_loca */ if (final_events_table && final_events_table->nr_events) { struct tcg_pcr_event2_head *header; - int offset; + u32 offset; void *data; - int event_size; + u32 event_size; int i = final_events_table->nr_events; data = (void *)final_events_table; diff --git a/drivers/firmware/efi/tpm.c b/drivers/firmware/efi/tpm.c index b0cc2cc11d7e..cdd431027065 100644 --- a/drivers/firmware/efi/tpm.c +++ b/drivers/firmware/efi/tpm.c @@ -19,7 +19,7 @@ EXPORT_SYMBOL(efi_tpm_final_log_size); static int __init tpm2_calc_event_log_size(void *data, int count, void *size_info) { struct tcg_pcr_event2_head *header; - int event_size, size = 0; + u32 event_size, size = 0; while (count > 0) { header = data + size; diff --git a/include/linux/tpm_eventlog.h b/include/linux/tpm_eventlog.h index 7d68a5cc5881..891368e82558 100644 --- a/include/linux/tpm_eventlog.h +++ b/include/linux/tpm_eventlog.h @@ -157,7 +157,7 @@ struct tcg_algorithm_info { * Return: size of the event on success, 0 on failure */ -static __always_inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event, +static __always_inline u32 __calc_tpm2_event_size(struct tcg_pcr_event2_head *event, struct tcg_pcr_event *event_header, bool do_mapping) { -- cgit v1.2.3 From 58797abed49d6b78c7af99b03b037f20c7ffb203 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sat, 5 Oct 2024 12:04:17 +0200 Subject: power: supply: core: constify power_supply_battery_info::resist_table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The power supply core never modifies the resist table. Reflect this in the API, so drivers can mark their static tables as const. Signed-off-by: Thomas Weißschuh Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20241005-power-supply-battery-const-v1-1-c1f721927048@weissschuh.net Signed-off-by: Sebastian Reichel --- drivers/power/supply/power_supply_core.c | 4 ++-- include/linux/power_supply.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index 49534458a9f7..a01703fa83c0 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -798,7 +798,7 @@ int power_supply_get_battery_info(struct power_supply *psy, goto out_ret_pointer; info->resist_table_size = len / (2 * sizeof(__be32)); - resist_table = info->resist_table = devm_kcalloc(&psy->dev, + info->resist_table = resist_table = devm_kcalloc(&psy->dev, info->resist_table_size, sizeof(*resist_table), GFP_KERNEL); @@ -982,7 +982,7 @@ EXPORT_SYMBOL_GPL(power_supply_battery_info_get_prop); * * Return: the battery internal resistance percent */ -int power_supply_temp2resist_simple(struct power_supply_resistance_temp_table *table, +int power_supply_temp2resist_simple(const struct power_supply_resistance_temp_table *table, int table_len, int temp) { int i, high, low; diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 910d407ebe63..9253411c105f 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -752,7 +752,7 @@ struct power_supply_battery_info { int temp_max; struct power_supply_battery_ocv_table *ocv_table[POWER_SUPPLY_OCV_TEMP_MAX]; int ocv_table_size[POWER_SUPPLY_OCV_TEMP_MAX]; - struct power_supply_resistance_temp_table *resist_table; + const struct power_supply_resistance_temp_table *resist_table; int resist_table_size; const struct power_supply_vbat_ri_table *vbat2ri_discharging; int vbat2ri_discharging_size; @@ -805,7 +805,7 @@ power_supply_find_ocv2cap_table(struct power_supply_battery_info *info, extern int power_supply_batinfo_ocv2cap(struct power_supply_battery_info *info, int ocv, int temp); extern int -power_supply_temp2resist_simple(struct power_supply_resistance_temp_table *table, +power_supply_temp2resist_simple(const struct power_supply_resistance_temp_table *table, int table_len, int temp); extern int power_supply_vbat2ri(struct power_supply_battery_info *info, int vbat_uv, bool charging); -- cgit v1.2.3 From ce20d5b9e37099a035ab34d4d3f59e1744756385 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sat, 5 Oct 2024 12:04:21 +0200 Subject: power: supply: core: constify power_supply_battery_info::ocv_table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The power supply core never modifies the ocv table. Reflect this in the API, so drivers can mark their static tables as const. Signed-off-by: Thomas Weißschuh Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20241005-power-supply-battery-const-v1-5-c1f721927048@weissschuh.net Signed-off-by: Sebastian Reichel --- drivers/power/supply/power_supply_core.c | 8 ++++---- include/linux/power_supply.h | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index a01703fa83c0..5aefba2ddcda 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -777,7 +777,7 @@ int power_supply_get_battery_info(struct power_supply *psy, tab_len = size / (2 * sizeof(__be32)); info->ocv_table_size[index] = tab_len; - table = info->ocv_table[index] = + info->ocv_table[index] = table = devm_kcalloc(&psy->dev, tab_len, sizeof(*table), GFP_KERNEL); if (!info->ocv_table[index]) { power_supply_put_battery_info(psy, info); @@ -1093,7 +1093,7 @@ EXPORT_SYMBOL_GPL(power_supply_get_maintenance_charging_setting); * * Return: the battery capacity. */ -int power_supply_ocv2cap_simple(struct power_supply_battery_ocv_table *table, +int power_supply_ocv2cap_simple(const struct power_supply_battery_ocv_table *table, int table_len, int ocv) { int i, high, low; @@ -1118,7 +1118,7 @@ int power_supply_ocv2cap_simple(struct power_supply_battery_ocv_table *table, } EXPORT_SYMBOL_GPL(power_supply_ocv2cap_simple); -struct power_supply_battery_ocv_table * +const struct power_supply_battery_ocv_table * power_supply_find_ocv2cap_table(struct power_supply_battery_info *info, int temp, int *table_len) { @@ -1149,7 +1149,7 @@ EXPORT_SYMBOL_GPL(power_supply_find_ocv2cap_table); int power_supply_batinfo_ocv2cap(struct power_supply_battery_info *info, int ocv, int temp) { - struct power_supply_battery_ocv_table *table; + const struct power_supply_battery_ocv_table *table; int table_len; table = power_supply_find_ocv2cap_table(info, temp, &table_len); diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 9253411c105f..4e29ec39c18f 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -750,7 +750,7 @@ struct power_supply_battery_info { int temp_alert_max; int temp_min; int temp_max; - struct power_supply_battery_ocv_table *ocv_table[POWER_SUPPLY_OCV_TEMP_MAX]; + const struct power_supply_battery_ocv_table *ocv_table[POWER_SUPPLY_OCV_TEMP_MAX]; int ocv_table_size[POWER_SUPPLY_OCV_TEMP_MAX]; const struct power_supply_resistance_temp_table *resist_table; int resist_table_size; @@ -797,9 +797,9 @@ extern bool power_supply_battery_info_has_prop(struct power_supply_battery_info extern int power_supply_battery_info_get_prop(struct power_supply_battery_info *info, enum power_supply_property psp, union power_supply_propval *val); -extern int power_supply_ocv2cap_simple(struct power_supply_battery_ocv_table *table, +extern int power_supply_ocv2cap_simple(const struct power_supply_battery_ocv_table *table, int table_len, int ocv); -extern struct power_supply_battery_ocv_table * +extern const struct power_supply_battery_ocv_table * power_supply_find_ocv2cap_table(struct power_supply_battery_info *info, int temp, int *table_len); extern int power_supply_batinfo_ocv2cap(struct power_supply_battery_info *info, -- cgit v1.2.3 From 49000fee9e639f62ba1f965ed2ae4c5ad18d19e2 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sat, 5 Oct 2024 12:05:03 +0200 Subject: power: supply: core: add wakeup source inhibit by power_supply_config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To inhibit wakeup users currently have to use dedicated functions to register the power supply: {,devm_}power_supply_register_no_ws(). This is inconsistent to other runtime settings which can be configured through struct power_supply_config. It's also not obvious what _no_ws() is meant to mean. Extend power_supply_config to also be able to inhibit the wakeup source. Signed-off-by: Thomas Weißschuh Reviewed-by: Tzung-Bi Shih Link: https://lore.kernel.org/r/20241005-power-supply-no-wakeup-source-v1-1-1d62bf9bcb1d@weissschuh.net Signed-off-by: Sebastian Reichel --- drivers/power/supply/power_supply_core.c | 3 +++ include/linux/power_supply.h | 2 ++ 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index 5aefba2ddcda..a2005e3c6f38 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -1410,6 +1410,9 @@ __power_supply_register(struct device *parent, if (rc) goto device_add_failed; + if (cfg && cfg->no_wakeup_source) + ws = false; + rc = device_init_wakeup(dev, ws); if (rc) goto wakeup_init_failed; diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 4e29ec39c18f..9a64043798bd 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -236,6 +236,8 @@ struct power_supply_config { char **supplied_to; size_t num_supplicants; + + bool no_wakeup_source; }; /* Description of power supply */ -- cgit v1.2.3 From 85d319e14f301e1c68131b74c1dceabae73d1e81 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sat, 5 Oct 2024 12:05:10 +0200 Subject: power: supply: core: remove {,devm_}power_supply_register_no_ws() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The same functionality is available through power_supply_config::no_wakeup_source, which is more idiomatic. All users of the old API have been converted. Also remove the argument "ws" from __power_supply_register(), as it is now always "true". Signed-off-by: Thomas Weißschuh Reviewed-by: Tzung-Bi Shih Link: https://lore.kernel.org/r/20241005-power-supply-no-wakeup-source-v1-8-1d62bf9bcb1d@weissschuh.net Signed-off-by: Sebastian Reichel --- drivers/power/supply/power_supply_core.c | 71 ++------------------------------ include/linux/power_supply.h | 8 ---- 2 files changed, 4 insertions(+), 75 deletions(-) (limited to 'include') diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index a2005e3c6f38..f20b0fde9847 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -1342,8 +1342,7 @@ static void psy_unregister_thermal(struct power_supply *psy) static struct power_supply *__must_check __power_supply_register(struct device *parent, const struct power_supply_desc *desc, - const struct power_supply_config *cfg, - bool ws) + const struct power_supply_config *cfg) { struct device *dev; struct power_supply *psy; @@ -1410,10 +1409,7 @@ __power_supply_register(struct device *parent, if (rc) goto device_add_failed; - if (cfg && cfg->no_wakeup_source) - ws = false; - - rc = device_init_wakeup(dev, ws); + rc = device_init_wakeup(dev, cfg ? !cfg->no_wakeup_source : true); if (rc) goto wakeup_init_failed; @@ -1479,33 +1475,10 @@ struct power_supply *__must_check power_supply_register(struct device *parent, const struct power_supply_desc *desc, const struct power_supply_config *cfg) { - return __power_supply_register(parent, desc, cfg, true); + return __power_supply_register(parent, desc, cfg); } EXPORT_SYMBOL_GPL(power_supply_register); -/** - * power_supply_register_no_ws() - Register new non-waking-source power supply - * @parent: Device to be a parent of power supply's device, usually - * the device which probe function calls this - * @desc: Description of power supply, must be valid through whole - * lifetime of this power supply - * @cfg: Run-time specific configuration accessed during registering, - * may be NULL - * - * Return: A pointer to newly allocated power_supply on success - * or ERR_PTR otherwise. - * Use power_supply_unregister() on returned power_supply pointer to release - * resources. - */ -struct power_supply *__must_check -power_supply_register_no_ws(struct device *parent, - const struct power_supply_desc *desc, - const struct power_supply_config *cfg) -{ - return __power_supply_register(parent, desc, cfg, false); -} -EXPORT_SYMBOL_GPL(power_supply_register_no_ws); - static void devm_power_supply_release(struct device *dev, void *res) { struct power_supply **psy = res; @@ -1538,7 +1511,7 @@ devm_power_supply_register(struct device *parent, if (!ptr) return ERR_PTR(-ENOMEM); - psy = __power_supply_register(parent, desc, cfg, true); + psy = __power_supply_register(parent, desc, cfg); if (IS_ERR(psy)) { devres_free(ptr); } else { @@ -1549,42 +1522,6 @@ devm_power_supply_register(struct device *parent, } EXPORT_SYMBOL_GPL(devm_power_supply_register); -/** - * devm_power_supply_register_no_ws() - Register managed non-waking-source power supply - * @parent: Device to be a parent of power supply's device, usually - * the device which probe function calls this - * @desc: Description of power supply, must be valid through whole - * lifetime of this power supply - * @cfg: Run-time specific configuration accessed during registering, - * may be NULL - * - * Return: A pointer to newly allocated power_supply on success - * or ERR_PTR otherwise. - * The returned power_supply pointer will be automatically unregistered - * on driver detach. - */ -struct power_supply *__must_check -devm_power_supply_register_no_ws(struct device *parent, - const struct power_supply_desc *desc, - const struct power_supply_config *cfg) -{ - struct power_supply **ptr, *psy; - - ptr = devres_alloc(devm_power_supply_release, sizeof(*ptr), GFP_KERNEL); - - if (!ptr) - return ERR_PTR(-ENOMEM); - psy = __power_supply_register(parent, desc, cfg, false); - if (IS_ERR(psy)) { - devres_free(ptr); - } else { - *ptr = psy; - devres_add(parent, ptr); - } - return psy; -} -EXPORT_SYMBOL_GPL(devm_power_supply_register_no_ws); - /** * power_supply_unregister() - Remove this power supply from system * @psy: Pointer to power supply to unregister diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 9a64043798bd..14eedefb3ac8 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -874,17 +874,9 @@ power_supply_register(struct device *parent, const struct power_supply_desc *desc, const struct power_supply_config *cfg); extern struct power_supply *__must_check -power_supply_register_no_ws(struct device *parent, - const struct power_supply_desc *desc, - const struct power_supply_config *cfg); -extern struct power_supply *__must_check devm_power_supply_register(struct device *parent, const struct power_supply_desc *desc, const struct power_supply_config *cfg); -extern struct power_supply *__must_check -devm_power_supply_register_no_ws(struct device *parent, - const struct power_supply_desc *desc, - const struct power_supply_config *cfg); extern void power_supply_unregister(struct power_supply *psy); extern int power_supply_powers(struct power_supply *psy, struct device *dev); -- cgit v1.2.3 From 0b582611a8f4270fa357a22a546909b2dd5fc5fe Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 15 Oct 2024 10:28:43 +0900 Subject: ftrace: Use arch_ftrace_regs() for ftrace_regs_*() macros Since the arch_ftrace_get_regs(fregs) is only valid when the FL_SAVE_REGS is set, we need to use `&arch_ftrace_regs()->regs` for ftrace_regs_*() APIs because those APIs are for ftrace_regs, not complete pt_regs. Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Alan Maguire Cc: Mark Rutland Link: https://lore.kernel.org/172895572290.107311.16057631001860177198.stgit@devnote2 Fixes: e4cf33ca4812 ("ftrace: Consolidate ftrace_regs accessor functions for archs using pt_regs") Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace_regs.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_regs.h b/include/linux/ftrace_regs.h index dea6a0851b74..b78a0a60515b 100644 --- a/include/linux/ftrace_regs.h +++ b/include/linux/ftrace_regs.h @@ -17,17 +17,17 @@ struct __arch_ftrace_regs { struct ftrace_regs; #define ftrace_regs_get_instruction_pointer(fregs) \ - instruction_pointer(arch_ftrace_get_regs(fregs)) + instruction_pointer(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_get_argument(fregs, n) \ - regs_get_kernel_argument(arch_ftrace_get_regs(fregs), n) + regs_get_kernel_argument(&arch_ftrace_regs(fregs)->regs, n) #define ftrace_regs_get_stack_pointer(fregs) \ - kernel_stack_pointer(arch_ftrace_get_regs(fregs)) + kernel_stack_pointer(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_return_value(fregs) \ - regs_return_value(arch_ftrace_get_regs(fregs)) + regs_return_value(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_set_return_value(fregs, ret) \ - regs_set_return_value(arch_ftrace_get_regs(fregs), ret) + regs_set_return_value(&arch_ftrace_regs(fregs)->regs, ret) #define ftrace_override_function_with_return(fregs) \ - override_function_with_return(arch_ftrace_get_regs(fregs)) + override_function_with_return(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_query_register_offset(name) \ regs_query_register_offset(name) -- cgit v1.2.3 From 2d17932da44fdc1ba835ad05110ab996d2912dbf Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 15 Oct 2024 10:28:53 +0900 Subject: ftrace: Rename ftrace_regs_return_value to ftrace_regs_get_return_value Rename ftrace_regs_return_value to ftrace_regs_get_return_value as same as other ftrace_regs_get/set_* APIs. arm64 and riscv are already using this new name. Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Alan Maguire Link: https://lore.kernel.org/172895573350.107311.7564634260652361511.stgit@devnote2 Acked-by: Mark Rutland Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace_regs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ftrace_regs.h b/include/linux/ftrace_regs.h index b78a0a60515b..be1ed0c891d0 100644 --- a/include/linux/ftrace_regs.h +++ b/include/linux/ftrace_regs.h @@ -22,7 +22,7 @@ struct ftrace_regs; regs_get_kernel_argument(&arch_ftrace_regs(fregs)->regs, n) #define ftrace_regs_get_stack_pointer(fregs) \ kernel_stack_pointer(&arch_ftrace_regs(fregs)->regs) -#define ftrace_regs_return_value(fregs) \ +#define ftrace_regs_get_return_value(fregs) \ regs_return_value(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_set_return_value(fregs, ret) \ regs_set_return_value(&arch_ftrace_regs(fregs)->regs, ret) -- cgit v1.2.3 From efe8419ae78d65e83edc31aad74b605c12e7d60c Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 14 Oct 2024 16:13:39 +0100 Subject: vdso: Introduce vdso/page.h The VDSO implementation includes headers from outside of the vdso/ namespace. Introduce vdso/page.h to make sure that the generic library uses only the allowed namespace. Signed-off-by: Vincenzo Frascino Signed-off-by: Thomas Gleixner Reviewed-by: Arnd Bergmann Acked-by: Geert Uytterhoeven # m68k Link: https://lore.kernel.org/all/20241014151340.1639555-3-vincenzo.frascino@arm.com --- arch/alpha/include/asm/page.h | 6 +----- arch/arc/include/uapi/asm/page.h | 7 +++---- arch/arm/include/asm/page.h | 5 +---- arch/arm64/include/asm/page-def.h | 5 +---- arch/csky/include/asm/page.h | 8 ++------ arch/hexagon/include/asm/page.h | 4 +--- arch/loongarch/include/asm/page.h | 7 +------ arch/m68k/include/asm/page.h | 6 ++---- arch/microblaze/include/asm/page.h | 5 +---- arch/mips/include/asm/page.h | 7 +------ arch/nios2/include/asm/page.h | 7 +------ arch/openrisc/include/asm/page.h | 11 +---------- arch/parisc/include/asm/page.h | 4 +--- arch/powerpc/include/asm/page.h | 10 +--------- arch/riscv/include/asm/page.h | 4 +--- arch/s390/include/asm/page.h | 13 +++++-------- arch/sh/include/asm/page.h | 6 ++---- arch/sparc/include/asm/page_32.h | 4 +--- arch/sparc/include/asm/page_64.h | 4 +--- arch/um/include/asm/page.h | 5 +---- arch/x86/include/asm/page_types.h | 5 +---- arch/xtensa/include/asm/page.h | 8 +------- include/vdso/page.h | 30 ++++++++++++++++++++++++++++++ 23 files changed, 61 insertions(+), 110 deletions(-) create mode 100644 include/vdso/page.h (limited to 'include') diff --git a/arch/alpha/include/asm/page.h b/arch/alpha/include/asm/page.h index 70419e6be1a3..261af54fd601 100644 --- a/arch/alpha/include/asm/page.h +++ b/arch/alpha/include/asm/page.h @@ -4,11 +4,7 @@ #include #include - -/* PAGE_SHIFT determines the page size */ -#define PAGE_SHIFT CONFIG_PAGE_SHIFT -#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1)) +#include #ifndef __ASSEMBLY__ diff --git a/arch/arc/include/uapi/asm/page.h b/arch/arc/include/uapi/asm/page.h index 7fd9e741b527..4606a326af5c 100644 --- a/arch/arc/include/uapi/asm/page.h +++ b/arch/arc/include/uapi/asm/page.h @@ -14,7 +14,7 @@ /* PAGE_SHIFT determines the page size */ #ifdef __KERNEL__ -#define PAGE_SHIFT CONFIG_PAGE_SHIFT +#include #else /* * Default 8k @@ -24,11 +24,10 @@ * not available */ #define PAGE_SHIFT 13 +#define PAGE_SIZE _BITUL(PAGE_SHIFT) /* Default 8K */ +#define PAGE_MASK (~(PAGE_SIZE-1)) #endif -#define PAGE_SIZE _BITUL(PAGE_SHIFT) /* Default 8K */ #define PAGE_OFFSET _AC(0x80000000, UL) /* Kernel starts at 2G onwrds */ -#define PAGE_MASK (~(PAGE_SIZE-1)) - #endif /* _UAPI__ASM_ARC_PAGE_H */ diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h index 62af9f7f9e96..ef11b721230e 100644 --- a/arch/arm/include/asm/page.h +++ b/arch/arm/include/asm/page.h @@ -7,10 +7,7 @@ #ifndef _ASMARM_PAGE_H #define _ASMARM_PAGE_H -/* PAGE_SHIFT determines the page size */ -#define PAGE_SHIFT CONFIG_PAGE_SHIFT -#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) -#define PAGE_MASK (~((1 << PAGE_SHIFT) - 1)) +#include #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/page-def.h b/arch/arm64/include/asm/page-def.h index 792e9fe881dc..d402e08442ee 100644 --- a/arch/arm64/include/asm/page-def.h +++ b/arch/arm64/include/asm/page-def.h @@ -10,9 +10,6 @@ #include -/* PAGE_SHIFT determines the page size */ -#define PAGE_SHIFT CONFIG_PAGE_SHIFT -#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1)) +#include #endif /* __ASM_PAGE_DEF_H */ diff --git a/arch/csky/include/asm/page.h b/arch/csky/include/asm/page.h index 0ca6c408c07f..f8beae295afb 100644 --- a/arch/csky/include/asm/page.h +++ b/arch/csky/include/asm/page.h @@ -7,12 +7,8 @@ #include #include -/* - * PAGE_SHIFT determines the page size: 4KB - */ -#define PAGE_SHIFT CONFIG_PAGE_SHIFT -#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE - 1)) +#include + #define THREAD_SIZE (PAGE_SIZE * 2) #define THREAD_MASK (~(THREAD_SIZE - 1)) #define THREAD_SHIFT (PAGE_SHIFT + 1) diff --git a/arch/hexagon/include/asm/page.h b/arch/hexagon/include/asm/page.h index 8a6af57274c2..b01f8df69dd4 100644 --- a/arch/hexagon/include/asm/page.h +++ b/arch/hexagon/include/asm/page.h @@ -45,9 +45,7 @@ #define HVM_HUGEPAGE_SIZE 0x5 #endif -#define PAGE_SHIFT CONFIG_PAGE_SHIFT -#define PAGE_SIZE (1UL << PAGE_SHIFT) -#define PAGE_MASK (~((1 << PAGE_SHIFT) - 1)) +#include #ifdef __KERNEL__ #ifndef __ASSEMBLY__ diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h index e85df33f11c7..83f3533e31a4 100644 --- a/arch/loongarch/include/asm/page.h +++ b/arch/loongarch/include/asm/page.h @@ -8,12 +8,7 @@ #include #include -/* - * PAGE_SHIFT determines the page size - */ -#define PAGE_SHIFT CONFIG_PAGE_SHIFT -#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE - 1)) +#include #define HPAGE_SHIFT (PAGE_SHIFT + PAGE_SHIFT - 3) #define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT) diff --git a/arch/m68k/include/asm/page.h b/arch/m68k/include/asm/page.h index 8cfb84b49975..b173ba27d36f 100644 --- a/arch/m68k/include/asm/page.h +++ b/arch/m68k/include/asm/page.h @@ -6,10 +6,8 @@ #include #include -/* PAGE_SHIFT determines the page size */ -#define PAGE_SHIFT CONFIG_PAGE_SHIFT -#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1)) +#include + #define PAGE_OFFSET (PAGE_OFFSET_RAW) #ifndef __ASSEMBLY__ diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h index 8810f4f1c3b0..d1ec3806edab 100644 --- a/arch/microblaze/include/asm/page.h +++ b/arch/microblaze/include/asm/page.h @@ -19,10 +19,7 @@ #ifdef __KERNEL__ -/* PAGE_SHIFT determines the page size */ -#define PAGE_SHIFT CONFIG_PAGE_SHIFT -#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1)) +#include #define LOAD_OFFSET ASM_CONST((CONFIG_KERNEL_START-CONFIG_KERNEL_BASE_ADDR)) diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h index 4609cb0326cf..bc3e3484c1bf 100644 --- a/arch/mips/include/asm/page.h +++ b/arch/mips/include/asm/page.h @@ -14,12 +14,7 @@ #include #include -/* - * PAGE_SHIFT determines the page size - */ -#define PAGE_SHIFT CONFIG_PAGE_SHIFT -#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) -#define PAGE_MASK (~((1 << PAGE_SHIFT) - 1)) +#include /* * This is used for calculating the real page sizes diff --git a/arch/nios2/include/asm/page.h b/arch/nios2/include/asm/page.h index 0722f88e63cc..2897ec1b74f6 100644 --- a/arch/nios2/include/asm/page.h +++ b/arch/nios2/include/asm/page.h @@ -18,12 +18,7 @@ #include #include -/* - * PAGE_SHIFT determines the page size - */ -#define PAGE_SHIFT CONFIG_PAGE_SHIFT -#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE - 1)) +#include /* * PAGE_OFFSET -- the first address of the first page of memory. diff --git a/arch/openrisc/include/asm/page.h b/arch/openrisc/include/asm/page.h index 1d5913f67c31..124a2db4b160 100644 --- a/arch/openrisc/include/asm/page.h +++ b/arch/openrisc/include/asm/page.h @@ -15,16 +15,7 @@ #ifndef __ASM_OPENRISC_PAGE_H #define __ASM_OPENRISC_PAGE_H - -/* PAGE_SHIFT determines the page size */ - -#define PAGE_SHIFT CONFIG_PAGE_SHIFT -#ifdef __ASSEMBLY__ -#define PAGE_SIZE (1 << PAGE_SHIFT) -#else -#define PAGE_SIZE (1UL << PAGE_SHIFT) -#endif -#define PAGE_MASK (~(PAGE_SIZE-1)) +#include #define PAGE_OFFSET 0xc0000000 #define KERNELBASE PAGE_OFFSET diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h index 4bea2e95798f..6c4836fb5407 100644 --- a/arch/parisc/include/asm/page.h +++ b/arch/parisc/include/asm/page.h @@ -4,9 +4,7 @@ #include -#define PAGE_SHIFT CONFIG_PAGE_SHIFT -#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1)) +#include #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 83d0a4fc5f75..af9a2628d1df 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -21,8 +21,7 @@ * page size. When using 64K pages however, whether we are really supporting * 64K pages in HW or not is irrelevant to those definitions. */ -#define PAGE_SHIFT CONFIG_PAGE_SHIFT -#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT) +#include #ifndef __ASSEMBLY__ #ifndef CONFIG_HUGETLB_PAGE @@ -41,13 +40,6 @@ extern unsigned int hpage_shift; #define HUGE_MAX_HSTATE (MMU_PAGE_COUNT-1) #endif -/* - * Subtle: (1 << PAGE_SHIFT) is an int, not an unsigned long. So if we - * assign PAGE_MASK to a larger type it gets extended the way we want - * (i.e. with 1s in the high bits) - */ -#define PAGE_MASK (~((1 << PAGE_SHIFT) - 1)) - /* * KERNELBASE is the virtual address of the start of the kernel, it's often * the same as PAGE_OFFSET, but _might not be_. diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 32d308a3355f..9875399827c7 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -12,9 +12,7 @@ #include #include -#define PAGE_SHIFT CONFIG_PAGE_SHIFT -#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE - 1)) +#include #define HPAGE_SHIFT PMD_SHIFT #define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT) diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 73e1e03317b4..dbc25dc5fa0a 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -11,14 +11,11 @@ #include #include -#define _PAGE_SHIFT CONFIG_PAGE_SHIFT -#define _PAGE_SIZE (_AC(1, UL) << _PAGE_SHIFT) -#define _PAGE_MASK (~(_PAGE_SIZE - 1)) - -/* PAGE_SHIFT determines the page size */ -#define PAGE_SHIFT _PAGE_SHIFT -#define PAGE_SIZE _PAGE_SIZE -#define PAGE_MASK _PAGE_MASK +#include + +#define _PAGE_SHIFT PAGE_SHIFT +#define _PAGE_SIZE PAGE_SIZE +#define _PAGE_MASK PAGE_MASK #define PAGE_DEFAULT_ACC _AC(0, UL) /* storage-protection override */ #define PAGE_SPO_ACC 9 diff --git a/arch/sh/include/asm/page.h b/arch/sh/include/asm/page.h index f780b467e75d..fc39b8171bfb 100644 --- a/arch/sh/include/asm/page.h +++ b/arch/sh/include/asm/page.h @@ -8,10 +8,8 @@ #include -/* PAGE_SHIFT determines the page size */ -#define PAGE_SHIFT CONFIG_PAGE_SHIFT -#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1)) +#include + #define PTE_MASK PAGE_MASK #if defined(CONFIG_HUGETLB_PAGE_SIZE_64K) diff --git a/arch/sparc/include/asm/page_32.h b/arch/sparc/include/asm/page_32.h index 9977c77374cd..9954254ea569 100644 --- a/arch/sparc/include/asm/page_32.h +++ b/arch/sparc/include/asm/page_32.h @@ -11,9 +11,7 @@ #include -#define PAGE_SHIFT CONFIG_PAGE_SHIFT -#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1)) +#include #ifndef __ASSEMBLY__ diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h index e9bd24821c93..2a68ff5b6eab 100644 --- a/arch/sparc/include/asm/page_64.h +++ b/arch/sparc/include/asm/page_64.h @@ -4,9 +4,7 @@ #include -#define PAGE_SHIFT CONFIG_PAGE_SHIFT -#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1)) +#include /* Flushing for D-cache alias handling is only needed if * the page size is smaller than 16K. diff --git a/arch/um/include/asm/page.h b/arch/um/include/asm/page.h index 9ef9a8aedfa6..834313ecd3d6 100644 --- a/arch/um/include/asm/page.h +++ b/arch/um/include/asm/page.h @@ -9,10 +9,7 @@ #include -/* PAGE_SHIFT determines the page size */ -#define PAGE_SHIFT CONFIG_PAGE_SHIFT -#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1)) +#include #ifndef __ASSEMBLY__ diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index 52f1b4ff0cc1..974688973cf6 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -6,10 +6,7 @@ #include #include -/* PAGE_SHIFT determines the page size */ -#define PAGE_SHIFT CONFIG_PAGE_SHIFT -#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1)) +#include #define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1) diff --git a/arch/xtensa/include/asm/page.h b/arch/xtensa/include/asm/page.h index 4db56ef052d2..595c1037b738 100644 --- a/arch/xtensa/include/asm/page.h +++ b/arch/xtensa/include/asm/page.h @@ -18,13 +18,7 @@ #include #include -/* - * PAGE_SHIFT determines the page size - */ - -#define PAGE_SHIFT CONFIG_PAGE_SHIFT -#define PAGE_SIZE (__XTENSA_UL_CONST(1) << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1)) +#include #ifdef CONFIG_MMU #define PAGE_OFFSET XCHAL_KSEG_CACHED_VADDR diff --git a/include/vdso/page.h b/include/vdso/page.h new file mode 100644 index 000000000000..4ada1ba6bd1f --- /dev/null +++ b/include/vdso/page.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __VDSO_PAGE_H +#define __VDSO_PAGE_H + +#include + +/* + * PAGE_SHIFT determines the page size. + * + * Note: This definition is required because PAGE_SHIFT is used + * in several places throuout the codebase. + */ +#define PAGE_SHIFT CONFIG_PAGE_SHIFT + +#define PAGE_SIZE (_AC(1,UL) << CONFIG_PAGE_SHIFT) + +#if defined(CONFIG_PHYS_ADDR_T_64BIT) && !defined(CONFIG_64BIT) +/* + * Applies only to 32-bit architectures with a 64-bit phys_addr_t. + * + * Subtle: (1 << CONFIG_PAGE_SHIFT) is an int, not an unsigned long. + * So if we assign PAGE_MASK to a larger type it gets extended the + * way we want (i.e. with 1s in the high bits) + */ +#define PAGE_MASK (~((1 << CONFIG_PAGE_SHIFT) - 1)) +#else +#define PAGE_MASK (~(PAGE_SIZE - 1)) +#endif + +#endif /* __VDSO_PAGE_H */ -- cgit v1.2.3 From 102f085d84607462234ac60f6027973b45a9bde2 Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Mon, 14 Oct 2024 10:22:21 +0200 Subject: timers: Rename usleep_idle_range() to usleep_range_idle() usleep_idle_range() is a variant of usleep_range(). Both are using usleep_range_state() as a base. To be able to find all the related functions in one go, rename it usleep_idle_range() to usleep_range_idle(). No functional change. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Reviewed-by: SeongJae Park Link: https://lore.kernel.org/all/20241014-devel-anna-maria-b4-timers-flseep-v3-4-dc8b907cb62f@linutronix.de --- include/linux/delay.h | 2 +- mm/damon/core.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/delay.h b/include/linux/delay.h index ff9cda975e30..2bc586aa2068 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -68,7 +68,7 @@ static inline void usleep_range(unsigned long min, unsigned long max) usleep_range_state(min, max, TASK_UNINTERRUPTIBLE); } -static inline void usleep_idle_range(unsigned long min, unsigned long max) +static inline void usleep_range_idle(unsigned long min, unsigned long max) { usleep_range_state(min, max, TASK_IDLE); } diff --git a/mm/damon/core.c b/mm/damon/core.c index a83f3b736d51..c725c78b43f0 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1896,7 +1896,7 @@ static void kdamond_usleep(unsigned long usecs) if (usecs > 20 * USEC_PER_MSEC) schedule_timeout_idle(usecs_to_jiffies(usecs)); else - usleep_idle_range(usecs, usecs + 1); + usleep_range_idle(usecs, usecs + 1); } /* Returns negative error code if it's not activated but should return */ -- cgit v1.2.3 From f36eb171410839325fff9cd9b7b7400f7e606962 Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Mon, 14 Oct 2024 10:22:22 +0200 Subject: timers: Update function descriptions of sleep/delay related functions A lot of commonly used functions for inserting a sleep or delay lack a proper function description. Add function descriptions to all of them to have important information in a central place close to the code. No functional change. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/all/20241014-devel-anna-maria-b4-timers-flseep-v3-5-dc8b907cb62f@linutronix.de --- include/asm-generic/delay.h | 41 +++++++++++++++++++++++++++++++---- include/linux/delay.h | 48 ++++++++++++++++++++++++++++++---------- kernel/time/sleep_timeout.c | 53 ++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 120 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/asm-generic/delay.h b/include/asm-generic/delay.h index e448ac61430c..a8cee41cc51b 100644 --- a/include/asm-generic/delay.h +++ b/include/asm-generic/delay.h @@ -12,11 +12,39 @@ extern void __const_udelay(unsigned long xloops); extern void __delay(unsigned long loops); /* - * The weird n/20000 thing suppresses a "comparison is always false due to - * limited range of data type" warning with non-const 8-bit arguments. + * Implementation details: + * + * * The weird n/20000 thing suppresses a "comparison is always false due to + * limited range of data type" warning with non-const 8-bit arguments. + * * 0x10c7 is 2**32 / 1000000 (rounded up) -> udelay + * * 0x5 is 2**32 / 1000000000 (rounded up) -> ndelay */ -/* 0x10c7 is 2**32 / 1000000 (rounded up) */ +/** + * udelay - Inserting a delay based on microseconds with busy waiting + * @usec: requested delay in microseconds + * + * When delaying in an atomic context ndelay(), udelay() and mdelay() are the + * only valid variants of delaying/sleeping to go with. + * + * When inserting delays in non atomic context which are shorter than the time + * which is required to queue e.g. an hrtimer and to enter then the scheduler, + * it is also valuable to use udelay(). But it is not simple to specify a + * generic threshold for this which will fit for all systems. An approximation + * is a threshold for all delays up to 10 microseconds. + * + * When having a delay which is larger than the architecture specific + * %MAX_UDELAY_MS value, please make sure mdelay() is used. Otherwise a overflow + * risk is given. + * + * Please note that ndelay(), udelay() and mdelay() may return early for several + * reasons (https://lists.openwall.net/linux-kernel/2011/01/09/56): + * + * #. computed loops_per_jiffy too low (due to the time taken to execute the + * timer interrupt.) + * #. cache behaviour affecting the time it takes to execute the loop function. + * #. CPU clock rate changes. + */ #define udelay(n) \ ({ \ if (__builtin_constant_p(n)) { \ @@ -29,7 +57,12 @@ extern void __delay(unsigned long loops); } \ }) -/* 0x5 is 2**32 / 1000000000 (rounded up) */ +/** + * ndelay - Inserting a delay based on nanoseconds with busy waiting + * @nsec: requested delay in nanoseconds + * + * See udelay() for basic information about ndelay() and it's variants. + */ #define ndelay(n) \ ({ \ if (__builtin_constant_p(n)) { \ diff --git a/include/linux/delay.h b/include/linux/delay.h index 2bc586aa2068..2de509e4adce 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -6,17 +6,7 @@ * Copyright (C) 1993 Linus Torvalds * * Delay routines, using a pre-computed "loops_per_jiffy" value. - * - * Please note that ndelay(), udelay() and mdelay() may return early for - * several reasons: - * 1. computed loops_per_jiffy too low (due to the time taken to - * execute the timer interrupt.) - * 2. cache behaviour affecting the time it takes to execute the - * loop function. - * 3. CPU clock rate changes. - * - * Please see this thread: - * https://lists.openwall.net/linux-kernel/2011/01/09/56 + * Sleep routines using timer list timers or hrtimers. */ #include @@ -35,12 +25,21 @@ extern unsigned long loops_per_jiffy; * The 2nd mdelay() definition ensures GCC will optimize away the * while loop for the common cases where n <= MAX_UDELAY_MS -- Paul G. */ - #ifndef MAX_UDELAY_MS #define MAX_UDELAY_MS 5 #endif #ifndef mdelay +/** + * mdelay - Inserting a delay based on milliseconds with busy waiting + * @n: requested delay in milliseconds + * + * See udelay() for basic information about mdelay() and it's variants. + * + * Please double check, whether mdelay() is the right way to go or whether a + * refactoring of the code is the better variant to be able to use msleep() + * instead. + */ #define mdelay(n) (\ (__builtin_constant_p(n) && (n)<=MAX_UDELAY_MS) ? udelay((n)*1000) : \ ({unsigned long __ms=(n); while (__ms--) udelay(1000);})) @@ -63,16 +62,41 @@ unsigned long msleep_interruptible(unsigned int msecs); void usleep_range_state(unsigned long min, unsigned long max, unsigned int state); +/** + * usleep_range - Sleep for an approximate time + * @min: Minimum time in microseconds to sleep + * @max: Maximum time in microseconds to sleep + * + * For basic information please refere to usleep_range_state(). + * + * The task will be in the state TASK_UNINTERRUPTIBLE during the sleep. + */ static inline void usleep_range(unsigned long min, unsigned long max) { usleep_range_state(min, max, TASK_UNINTERRUPTIBLE); } +/** + * usleep_range_idle - Sleep for an approximate time with idle time accounting + * @min: Minimum time in microseconds to sleep + * @max: Maximum time in microseconds to sleep + * + * For basic information please refere to usleep_range_state(). + * + * The sleeping task has the state TASK_IDLE during the sleep to prevent + * contribution to the load avarage. + */ static inline void usleep_range_idle(unsigned long min, unsigned long max) { usleep_range_state(min, max, TASK_IDLE); } +/** + * ssleep - wrapper for seconds around msleep + * @seconds: Requested sleep duration in seconds + * + * Please refere to msleep() for detailed information. + */ static inline void ssleep(unsigned int seconds) { msleep(seconds * 1000); diff --git a/kernel/time/sleep_timeout.c b/kernel/time/sleep_timeout.c index 560d17c30aa5..f3f246e4c8d1 100644 --- a/kernel/time/sleep_timeout.c +++ b/kernel/time/sleep_timeout.c @@ -281,7 +281,34 @@ EXPORT_SYMBOL_GPL(schedule_hrtimeout); /** * msleep - sleep safely even with waitqueue interruptions - * @msecs: Time in milliseconds to sleep for + * @msecs: Requested sleep duration in milliseconds + * + * msleep() uses jiffy based timeouts for the sleep duration. Because of the + * design of the timer wheel, the maximum additional percentage delay (slack) is + * 12.5%. This is only valid for timers which will end up in level 1 or a higher + * level of the timer wheel. For explanation of those 12.5% please check the + * detailed description about the basics of the timer wheel. + * + * The slack of timers which will end up in level 0 depends on sleep duration + * (msecs) and HZ configuration and can be calculated in the following way (with + * the timer wheel design restriction that the slack is not less than 12.5%): + * + * ``slack = MSECS_PER_TICK / msecs`` + * + * When the allowed slack of the callsite is known, the calculation could be + * turned around to find the minimal allowed sleep duration to meet the + * constraints. For example: + * + * * ``HZ=1000`` with ``slack=25%``: ``MSECS_PER_TICK / slack = 1 / (1/4) = 4``: + * all sleep durations greater or equal 4ms will meet the constraints. + * * ``HZ=1000`` with ``slack=12.5%``: ``MSECS_PER_TICK / slack = 1 / (1/8) = 8``: + * all sleep durations greater or equal 8ms will meet the constraints. + * * ``HZ=250`` with ``slack=25%``: ``MSECS_PER_TICK / slack = 4 / (1/4) = 16``: + * all sleep durations greater or equal 16ms will meet the constraints. + * * ``HZ=250`` with ``slack=12.5%``: ``MSECS_PER_TICK / slack = 4 / (1/8) = 32``: + * all sleep durations greater or equal 32ms will meet the constraints. + * + * See also the signal aware variant msleep_interruptible(). */ void msleep(unsigned int msecs) { @@ -294,7 +321,15 @@ EXPORT_SYMBOL(msleep); /** * msleep_interruptible - sleep waiting for signals - * @msecs: Time in milliseconds to sleep for + * @msecs: Requested sleep duration in milliseconds + * + * See msleep() for some basic information. + * + * The difference between msleep() and msleep_interruptible() is that the sleep + * could be interrupted by a signal delivery and then returns early. + * + * Returns: The remaining time of the sleep duration transformed to msecs (see + * schedule_timeout() for details). */ unsigned long msleep_interruptible(unsigned int msecs) { @@ -312,11 +347,17 @@ EXPORT_SYMBOL(msleep_interruptible); * @max: Maximum time in usecs to sleep * @state: State of the current task that will be while sleeping * + * usleep_range_state() sleeps at least for the minimum specified time but not + * longer than the maximum specified amount of time. The range might reduce + * power usage by allowing hrtimers to coalesce an already scheduled interrupt + * with this hrtimer. In the worst case, an interrupt is scheduled for the upper + * bound. + * + * The sleeping task is set to the specified state before starting the sleep. + * * In non-atomic context where the exact wakeup time is flexible, use - * usleep_range_state() instead of udelay(). The sleep improves responsiveness - * by avoiding the CPU-hogging busy-wait of udelay(), and the range reduces - * power usage by allowing hrtimers to take advantage of an already- - * scheduled interrupt instead of scheduling a new one just for this sleep. + * usleep_range() or its variants instead of udelay(). The sleep improves + * responsiveness by avoiding the CPU-hogging busy-wait of udelay(). */ void __sched usleep_range_state(unsigned long min, unsigned long max, unsigned int state) { -- cgit v1.2.3 From 19e2d91d8cb1f333adf04731f2788ff6ca06cebd Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Mon, 14 Oct 2024 10:22:23 +0200 Subject: delay: Rework udelay and ndelay udelay() as well as ndelay() are defines and no functions and are using constants to be able to transform a sleep time into loops and to prevent too long udelays/ndelays. There was a compiler error with non-const 8 bit arguments which was fixed by commit a87e553fabe8 ("asm-generic: delay.h fix udelay and ndelay for 8 bit args"). When using a function, the non-const 8 bit argument is type casted and the problem would be gone. Transform udelay() and ndelay() into proper functions, remove the no longer and confusing division, add defines for the magic values and add some explanations as well. Suggested-by: Thomas Gleixner Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/all/20241014-devel-anna-maria-b4-timers-flseep-v3-6-dc8b907cb62f@linutronix.de --- include/asm-generic/delay.h | 65 ++++++++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/asm-generic/delay.h b/include/asm-generic/delay.h index a8cee41cc51b..76cf237b6e4c 100644 --- a/include/asm-generic/delay.h +++ b/include/asm-generic/delay.h @@ -2,6 +2,9 @@ #ifndef __ASM_GENERIC_DELAY_H #define __ASM_GENERIC_DELAY_H +#include +#include + /* Undefined functions to get compile-time errors */ extern void __bad_udelay(void); extern void __bad_ndelay(void); @@ -12,13 +15,18 @@ extern void __const_udelay(unsigned long xloops); extern void __delay(unsigned long loops); /* - * Implementation details: - * - * * The weird n/20000 thing suppresses a "comparison is always false due to - * limited range of data type" warning with non-const 8-bit arguments. - * * 0x10c7 is 2**32 / 1000000 (rounded up) -> udelay - * * 0x5 is 2**32 / 1000000000 (rounded up) -> ndelay + * The microseconds/nanosecond delay multiplicators are used to convert a + * constant microseconds/nanoseconds value to a value which can be used by the + * architectures specific implementation to transform it into loops. + */ +#define UDELAY_CONST_MULT ((unsigned long)DIV_ROUND_UP(1ULL << 32, USEC_PER_SEC)) +#define NDELAY_CONST_MULT ((unsigned long)DIV_ROUND_UP(1ULL << 32, NSEC_PER_SEC)) + +/* + * The maximum constant udelay/ndelay value picked out of thin air to prevent + * too long constant udelays/ndelays. */ +#define DELAY_CONST_MAX 20000 /** * udelay - Inserting a delay based on microseconds with busy waiting @@ -45,17 +53,17 @@ extern void __delay(unsigned long loops); * #. cache behaviour affecting the time it takes to execute the loop function. * #. CPU clock rate changes. */ -#define udelay(n) \ - ({ \ - if (__builtin_constant_p(n)) { \ - if ((n) / 20000 >= 1) \ - __bad_udelay(); \ - else \ - __const_udelay((n) * 0x10c7ul); \ - } else { \ - __udelay(n); \ - } \ - }) +static __always_inline void udelay(unsigned long usec) +{ + if (__builtin_constant_p(usec)) { + if (usec >= DELAY_CONST_MAX) + __bad_udelay(); + else + __const_udelay(usec * UDELAY_CONST_MULT); + } else { + __udelay(usec); + } +} /** * ndelay - Inserting a delay based on nanoseconds with busy waiting @@ -63,16 +71,17 @@ extern void __delay(unsigned long loops); * * See udelay() for basic information about ndelay() and it's variants. */ -#define ndelay(n) \ - ({ \ - if (__builtin_constant_p(n)) { \ - if ((n) / 20000 >= 1) \ - __bad_ndelay(); \ - else \ - __const_udelay((n) * 5ul); \ - } else { \ - __ndelay(n); \ - } \ - }) +static __always_inline void ndelay(unsigned long nsec) +{ + if (__builtin_constant_p(nsec)) { + if (nsec >= DELAY_CONST_MAX) + __bad_udelay(); + else + __const_udelay(nsec * NDELAY_CONST_MULT); + } else { + __udelay(nsec); + } +} +#define ndelay(x) ndelay(x) #endif /* __ASM_GENERIC_DELAY_H */ -- cgit v1.2.3 From 82e11e47c1880362e05c065bef7dbe28a749555c Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Mon, 14 Oct 2024 10:22:24 +0200 Subject: timers: Adjust flseep() to reflect reality fsleep() simply implements the recommendations of the outdated documentation in "Documentation/timers/timers-howto.rst". This should be a user friendly interface to choose always the best timeout function approach: - udelay() for very short sleep durations shorter than 10 microseconds - usleep_range() for sleep durations until 20 milliseconds - msleep() for the others The actual implementation has several problems: - It does not take into account that HZ resolution also has an impact on granularity of jiffies and has also an impact on the granularity of the buckets of timer wheel levels. This means that accuracy for the timeout does not have an upper limit. When executing fsleep(20000) on a HZ=100 system, the possible additional slack will be 50% as the granularity of the buckets in the lowest level is 10 milliseconds. - The upper limit of usleep_range() is twice the requested timeout. When no other interrupts occur in this range, the maximum value is used. This means that the requested sleep length has then an additional delay of 100%. Change the thresholds for the decisions in fsleep() to make sure the maximum slack which is added to the sleep duration is 25%. Note: Outdated documentation will be updated in a followup patch. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/all/20241014-devel-anna-maria-b4-timers-flseep-v3-7-dc8b907cb62f@linutronix.de --- include/linux/delay.h | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/delay.h b/include/linux/delay.h index 2de509e4adce..89866bab100d 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -11,6 +11,7 @@ #include #include +#include extern unsigned long loops_per_jiffy; @@ -102,15 +103,35 @@ static inline void ssleep(unsigned int seconds) msleep(seconds * 1000); } -/* see Documentation/timers/timers-howto.rst for the thresholds */ +static const unsigned int max_slack_shift = 2; +#define USLEEP_RANGE_UPPER_BOUND ((TICK_NSEC << max_slack_shift) / NSEC_PER_USEC) + +/** + * fsleep - flexible sleep which autoselects the best mechanism + * @usecs: requested sleep duration in microseconds + * + * flseep() selects the best mechanism that will provide maximum 25% slack + * to the requested sleep duration. Therefore it uses: + * + * * udelay() loop for sleep durations <= 10 microseconds to avoid hrtimer + * overhead for really short sleep durations. + * * usleep_range() for sleep durations which would lead with the usage of + * msleep() to a slack larger than 25%. This depends on the granularity of + * jiffies. + * * msleep() for all other sleep durations. + * + * Note: When %CONFIG_HIGH_RES_TIMERS is not set, all sleeps are processed with + * the granularity of jiffies and the slack might exceed 25% especially for + * short sleep durations. + */ static inline void fsleep(unsigned long usecs) { if (usecs <= 10) udelay(usecs); - else if (usecs <= 20000) - usleep_range(usecs, 2 * usecs); + else if (usecs < USLEEP_RANGE_UPPER_BOUND) + usleep_range(usecs, usecs + (usecs >> max_slack_shift)); else - msleep(DIV_ROUND_UP(usecs, 1000)); + msleep(DIV_ROUND_UP(usecs, USEC_PER_MSEC)); } #endif /* defined(_LINUX_DELAY_H) */ -- cgit v1.2.3 From 89124747f096fc0fe44be0162c7b4fb3271739e8 Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Mon, 14 Oct 2024 10:22:29 +0200 Subject: iopoll/regmap/phy/snd: Fix comment referencing outdated timer documentation Function descriptions in iopoll.h, regmap.h, phy.h and sound/soc/sof/ops.h copied all the same outdated documentation about sleep/delay function limitations. In those comments, the generic (and still outdated) timer documentation file is referenced. As proper function descriptions for used delay and sleep functions are in place, simply update the descriptions to reference to them. While at it fix missing colon after "Returns" in function description and move return value description to the end of the function description. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Reviewed-by: Andrew Lunn # for phy.h Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/all/20241014-devel-anna-maria-b4-timers-flseep-v3-12-dc8b907cb62f@linutronix.de --- include/linux/iopoll.h | 52 +++++++++++++++++++++++++------------------------- include/linux/phy.h | 9 +++++---- include/linux/regmap.h | 38 ++++++++++++++++++------------------ sound/soc/sof/ops.h | 8 ++++---- 4 files changed, 54 insertions(+), 53 deletions(-) (limited to 'include') diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h index 19a7b00baff4..91324c331a4b 100644 --- a/include/linux/iopoll.h +++ b/include/linux/iopoll.h @@ -19,19 +19,19 @@ * @op: accessor function (takes @args as its arguments) * @val: Variable to read the value into * @cond: Break condition (usually involving @val) - * @sleep_us: Maximum time to sleep between reads in us (0 - * tight-loops). Should be less than ~20ms since usleep_range - * is used (see Documentation/timers/timers-howto.rst). + * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops). Please + * read usleep_range() function description for details and + * limitations. * @timeout_us: Timeout in us, 0 means never timeout * @sleep_before_read: if it is true, sleep @sleep_us before read. * @args: arguments for @op poll * - * Returns 0 on success and -ETIMEDOUT upon a timeout. In either - * case, the last read value at @args is stored in @val. Must not - * be called from atomic context if sleep_us or timeout_us are used. - * * When available, you'll probably want to use one of the specialized * macros defined below rather than this macro directly. + * + * Returns: 0 on success and -ETIMEDOUT upon a timeout. In either + * case, the last read value at @args is stored in @val. Must not + * be called from atomic context if sleep_us or timeout_us are used. */ #define read_poll_timeout(op, val, cond, sleep_us, timeout_us, \ sleep_before_read, args...) \ @@ -64,22 +64,22 @@ * @op: accessor function (takes @args as its arguments) * @val: Variable to read the value into * @cond: Break condition (usually involving @val) - * @delay_us: Time to udelay between reads in us (0 tight-loops). Should - * be less than ~10us since udelay is used (see - * Documentation/timers/timers-howto.rst). + * @delay_us: Time to udelay between reads in us (0 tight-loops). Please + * read udelay() function description for details and + * limitations. * @timeout_us: Timeout in us, 0 means never timeout * @delay_before_read: if it is true, delay @delay_us before read. * @args: arguments for @op poll * - * Returns 0 on success and -ETIMEDOUT upon a timeout. In either - * case, the last read value at @args is stored in @val. - * * This macro does not rely on timekeeping. Hence it is safe to call even when * timekeeping is suspended, at the expense of an underestimation of wall clock * time, which is rather minimal with a non-zero delay_us. * * When available, you'll probably want to use one of the specialized * macros defined below rather than this macro directly. + * + * Returns: 0 on success and -ETIMEDOUT upon a timeout. In either + * case, the last read value at @args is stored in @val. */ #define read_poll_timeout_atomic(op, val, cond, delay_us, timeout_us, \ delay_before_read, args...) \ @@ -119,17 +119,17 @@ * @addr: Address to poll * @val: Variable to read the value into * @cond: Break condition (usually involving @val) - * @sleep_us: Maximum time to sleep between reads in us (0 - * tight-loops). Should be less than ~20ms since usleep_range - * is used (see Documentation/timers/timers-howto.rst). + * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops). Please + * read usleep_range() function description for details and + * limitations. * @timeout_us: Timeout in us, 0 means never timeout * - * Returns 0 on success and -ETIMEDOUT upon a timeout. In either - * case, the last read value at @addr is stored in @val. Must not - * be called from atomic context if sleep_us or timeout_us are used. - * * When available, you'll probably want to use one of the specialized * macros defined below rather than this macro directly. + * + * Returns: 0 on success and -ETIMEDOUT upon a timeout. In either + * case, the last read value at @addr is stored in @val. Must not + * be called from atomic context if sleep_us or timeout_us are used. */ #define readx_poll_timeout(op, addr, val, cond, sleep_us, timeout_us) \ read_poll_timeout(op, val, cond, sleep_us, timeout_us, false, addr) @@ -140,16 +140,16 @@ * @addr: Address to poll * @val: Variable to read the value into * @cond: Break condition (usually involving @val) - * @delay_us: Time to udelay between reads in us (0 tight-loops). Should - * be less than ~10us since udelay is used (see - * Documentation/timers/timers-howto.rst). + * @delay_us: Time to udelay between reads in us (0 tight-loops). Please + * read udelay() function description for details and + * limitations. * @timeout_us: Timeout in us, 0 means never timeout * - * Returns 0 on success and -ETIMEDOUT upon a timeout. In either - * case, the last read value at @addr is stored in @val. - * * When available, you'll probably want to use one of the specialized * macros defined below rather than this macro directly. + * + * Returns: 0 on success and -ETIMEDOUT upon a timeout. In either + * case, the last read value at @addr is stored in @val. */ #define readx_poll_timeout_atomic(op, addr, val, cond, delay_us, timeout_us) \ read_poll_timeout_atomic(op, val, cond, delay_us, timeout_us, false, addr) diff --git a/include/linux/phy.h b/include/linux/phy.h index a98bc91a0cde..504766d4b2d5 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1378,12 +1378,13 @@ int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum); * @regnum: The register on the MMD to read * @val: Variable to read the register into * @cond: Break condition (usually involving @val) - * @sleep_us: Maximum time to sleep between reads in us (0 - * tight-loops). Should be less than ~20ms since usleep_range - * is used (see Documentation/timers/timers-howto.rst). + * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops). Please + * read usleep_range() function description for details and + * limitations. * @timeout_us: Timeout in us, 0 means never timeout * @sleep_before_read: if it is true, sleep @sleep_us before read. - * Returns 0 on success and -ETIMEDOUT upon a timeout. In either + * + * Returns: 0 on success and -ETIMEDOUT upon a timeout. In either * case, the last read value at @args is stored in @val. Must not * be called from atomic context if sleep_us or timeout_us are used. */ diff --git a/include/linux/regmap.h b/include/linux/regmap.h index f9ccad32fc5c..75f162b60ba1 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -106,17 +106,17 @@ struct reg_sequence { * @addr: Address to poll * @val: Unsigned integer variable to read the value into * @cond: Break condition (usually involving @val) - * @sleep_us: Maximum time to sleep between reads in us (0 - * tight-loops). Should be less than ~20ms since usleep_range - * is used (see Documentation/timers/timers-howto.rst). + * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops). Please + * read usleep_range() function description for details and + * limitations. * @timeout_us: Timeout in us, 0 means never timeout * - * Returns 0 on success and -ETIMEDOUT upon a timeout or the regmap_read + * This is modelled after the readx_poll_timeout macros in linux/iopoll.h. + * + * Returns: 0 on success and -ETIMEDOUT upon a timeout or the regmap_read * error return value in case of a error read. In the two former cases, * the last read value at @addr is stored in @val. Must not be called * from atomic context if sleep_us or timeout_us are used. - * - * This is modelled after the readx_poll_timeout macros in linux/iopoll.h. */ #define regmap_read_poll_timeout(map, addr, val, cond, sleep_us, timeout_us) \ ({ \ @@ -133,20 +133,20 @@ struct reg_sequence { * @addr: Address to poll * @val: Unsigned integer variable to read the value into * @cond: Break condition (usually involving @val) - * @delay_us: Time to udelay between reads in us (0 tight-loops). - * Should be less than ~10us since udelay is used - * (see Documentation/timers/timers-howto.rst). + * @delay_us: Time to udelay between reads in us (0 tight-loops). Please + * read udelay() function description for details and + * limitations. * @timeout_us: Timeout in us, 0 means never timeout * - * Returns 0 on success and -ETIMEDOUT upon a timeout or the regmap_read - * error return value in case of a error read. In the two former cases, - * the last read value at @addr is stored in @val. - * * This is modelled after the readx_poll_timeout_atomic macros in linux/iopoll.h. * * Note: In general regmap cannot be used in atomic context. If you want to use * this macro then first setup your regmap for atomic use (flat or no cache * and MMIO regmap). + * + * Returns: 0 on success and -ETIMEDOUT upon a timeout or the regmap_read + * error return value in case of a error read. In the two former cases, + * the last read value at @addr is stored in @val. */ #define regmap_read_poll_timeout_atomic(map, addr, val, cond, delay_us, timeout_us) \ ({ \ @@ -177,17 +177,17 @@ struct reg_sequence { * @field: Regmap field to read from * @val: Unsigned integer variable to read the value into * @cond: Break condition (usually involving @val) - * @sleep_us: Maximum time to sleep between reads in us (0 - * tight-loops). Should be less than ~20ms since usleep_range - * is used (see Documentation/timers/timers-howto.rst). + * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops). Please + * read usleep_range() function description for details and + * limitations. * @timeout_us: Timeout in us, 0 means never timeout * - * Returns 0 on success and -ETIMEDOUT upon a timeout or the regmap_field_read + * This is modelled after the readx_poll_timeout macros in linux/iopoll.h. + * + * Returns: 0 on success and -ETIMEDOUT upon a timeout or the regmap_field_read * error return value in case of a error read. In the two former cases, * the last read value at @addr is stored in @val. Must not be called * from atomic context if sleep_us or timeout_us are used. - * - * This is modelled after the readx_poll_timeout macros in linux/iopoll.h. */ #define regmap_field_read_poll_timeout(field, val, cond, sleep_us, timeout_us) \ ({ \ diff --git a/sound/soc/sof/ops.h b/sound/soc/sof/ops.h index 2584621c3b2d..d73644e85b6e 100644 --- a/sound/soc/sof/ops.h +++ b/sound/soc/sof/ops.h @@ -597,12 +597,12 @@ snd_sof_is_chain_dma_supported(struct snd_sof_dev *sdev, u32 dai_type) * @addr: Address to poll * @val: Variable to read the value into * @cond: Break condition (usually involving @val) - * @sleep_us: Maximum time to sleep between reads in us (0 - * tight-loops). Should be less than ~20ms since usleep_range - * is used (see Documentation/timers/timers-howto.rst). + * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops). Please + * read usleep_range() function description for details and + * limitations. * @timeout_us: Timeout in us, 0 means never timeout * - * Returns 0 on success and -ETIMEDOUT upon a timeout. In either + * Returns: 0 on success and -ETIMEDOUT upon a timeout. In either * case, the last read value at @addr is stored in @val. Must not * be called from atomic context if sleep_us or timeout_us are used. * -- cgit v1.2.3 From 95b3120a485f77a9bb8060bf3398311e3dcb6c65 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 14 Oct 2024 16:52:16 -0700 Subject: neighbour: Remove NEIGH_DN_TABLE. Since commit 1202cdd66531 ("Remove DECnet support from kernel"), NEIGH_DN_TABLE is no longer used. MPLS has implicit dependency on it in nla_put_via(), but nla_get_via() does not support DECnet. Let's remove NEIGH_DN_TABLE. Now, neigh_tables[] has only 2 elements and no extra iteration for DECnet in many places. Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20241014235216.10785-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/neighbour.h | 1 - net/mpls/af_mpls.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index a44f262a7384..3887ed9e5026 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -239,7 +239,6 @@ struct neigh_table { enum { NEIGH_ARP_TABLE = 0, NEIGH_ND_TABLE = 1, - NEIGH_DN_TABLE = 2, NEIGH_NR_TABLES, NEIGH_LINK_TABLE = NEIGH_NR_TABLES /* Pseudo table for neigh_xmit */ }; diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index df62638b6498..a0573847bc55 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1664,7 +1664,7 @@ static int nla_put_via(struct sk_buff *skb, u8 table, const void *addr, int alen) { static const int table_to_family[NEIGH_NR_TABLES + 1] = { - AF_INET, AF_INET6, AF_DECnet, AF_PACKET, + AF_INET, AF_INET6, AF_PACKET, }; struct nlattr *nla; struct rtvia *via; -- cgit v1.2.3 From e1c6c383123ab1caadbfe39b3362ce0cc09dd766 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 14 Oct 2024 13:18:28 -0700 Subject: rtnetlink: Remove rtnl_register() and rtnl_register_module(). No one uses rtnl_register() and rtnl_register_module(). Let's remove them. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241014201828.91221-12-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/rtnetlink.h | 15 ++++++---- net/core/rtnetlink.c | 74 ++++++++++++++----------------------------------- 2 files changed, 31 insertions(+), 58 deletions(-) (limited to 'include') diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 2d3eb7cb4dff..bb49c5708ce7 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -29,6 +29,16 @@ static inline enum rtnl_kinds rtnl_msgtype_kind(int msgtype) return msgtype & RTNL_KIND_MASK; } +/** + * struct rtnl_msg_handler - rtnetlink message type and handlers + * + * @owner: NULL for built-in, THIS_MODULE for module + * @protocol: Protocol family or PF_UNSPEC + * @msgtype: rtnetlink message type + * @doit: Function pointer called for each request message + * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message + * @flags: rtnl_link_flags to modify behaviour of doit/dumpit functions + */ struct rtnl_msg_handler { struct module *owner; int protocol; @@ -38,11 +48,6 @@ struct rtnl_msg_handler { int flags; }; -void rtnl_register(int protocol, int msgtype, - rtnl_doit_func, rtnl_dumpit_func, unsigned int flags); -int rtnl_register_module(struct module *owner, int protocol, int msgtype, - rtnl_doit_func, rtnl_dumpit_func, unsigned int flags); -int rtnl_unregister(int protocol, int msgtype); void rtnl_unregister_all(int protocol); int __rtnl_register_many(const struct rtnl_msg_handler *handlers, int n); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 0fbbfeb2cb50..a9c92392fb1d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -338,57 +338,6 @@ unlock: return ret; } -/** - * rtnl_register_module - Register a rtnetlink message type - * - * @owner: module registering the hook (THIS_MODULE) - * @protocol: Protocol family or PF_UNSPEC - * @msgtype: rtnetlink message type - * @doit: Function pointer called for each request message - * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message - * @flags: rtnl_link_flags to modify behaviour of doit/dumpit functions - * - * Like rtnl_register, but for use by removable modules. - */ -int rtnl_register_module(struct module *owner, - int protocol, int msgtype, - rtnl_doit_func doit, rtnl_dumpit_func dumpit, - unsigned int flags) -{ - return rtnl_register_internal(owner, protocol, msgtype, - doit, dumpit, flags); -} -EXPORT_SYMBOL_GPL(rtnl_register_module); - -/** - * rtnl_register - Register a rtnetlink message type - * @protocol: Protocol family or PF_UNSPEC - * @msgtype: rtnetlink message type - * @doit: Function pointer called for each request message - * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message - * @flags: rtnl_link_flags to modify behaviour of doit/dumpit functions - * - * Registers the specified function pointers (at least one of them has - * to be non-NULL) to be called whenever a request message for the - * specified protocol family and message type is received. - * - * The special protocol family PF_UNSPEC may be used to define fallback - * function pointers for the case when no entry for the specific protocol - * family exists. - */ -void rtnl_register(int protocol, int msgtype, - rtnl_doit_func doit, rtnl_dumpit_func dumpit, - unsigned int flags) -{ - int err; - - err = rtnl_register_internal(NULL, protocol, msgtype, doit, dumpit, - flags); - if (err) - pr_err("Unable to register rtnetlink message handler, " - "protocol = %d, message type = %d\n", protocol, msgtype); -} - /** * rtnl_unregister - Unregister a rtnetlink message type * @protocol: Protocol family or PF_UNSPEC @@ -396,7 +345,7 @@ void rtnl_register(int protocol, int msgtype, * * Returns 0 on success or a negative error code. */ -int rtnl_unregister(int protocol, int msgtype) +static int rtnl_unregister(int protocol, int msgtype) { struct rtnl_link __rcu **tab; struct rtnl_link *link; @@ -419,7 +368,6 @@ int rtnl_unregister(int protocol, int msgtype) return 0; } -EXPORT_SYMBOL_GPL(rtnl_unregister); /** * rtnl_unregister_all - Unregister all rtnetlink message type of a protocol @@ -454,6 +402,26 @@ void rtnl_unregister_all(int protocol) } EXPORT_SYMBOL_GPL(rtnl_unregister_all); +/** + * __rtnl_register_many - Register rtnetlink message types + * @handlers: Array of struct rtnl_msg_handlers + * @n: The length of @handlers + * + * Registers the specified function pointers (at least one of them has + * to be non-NULL) to be called whenever a request message for the + * specified protocol family and message type is received. + * + * The special protocol family PF_UNSPEC may be used to define fallback + * function pointers for the case when no entry for the specific protocol + * family exists. + * + * When one element of @handlers fails to register, + * 1) built-in: panics. + * 2) modules : the previous successful registrations are unwinded + * and an error is returned. + * + * Use rtnl_register_many(). + */ int __rtnl_register_many(const struct rtnl_msg_handler *handlers, int n) { const struct rtnl_msg_handler *handler; -- cgit v1.2.3 From 6390834c6f9b2c5e33f52f34579efa0d0df073db Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Thu, 3 Oct 2024 13:31:10 +0300 Subject: media: uapi: Add meta formats for PiSP FE config and stats Add two meta formats for PiSP FE: V4L2_META_FMT_RPI_FE_CFG and V4L2_META_FMT_RPI_FE_STATS. The former is used to provide configuration for the FE and the latter is used to read the statistics from the FE. Signed-off-by: Tomi Valkeinen Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- .../userspace-api/media/v4l/meta-formats.rst | 1 + .../userspace-api/media/v4l/metafmt-pisp-fe.rst | 39 ++++++++++++++++++++++ drivers/media/v4l2-core/v4l2-ioctl.c | 2 ++ include/uapi/linux/videodev2.h | 2 ++ 4 files changed, 44 insertions(+) create mode 100644 Documentation/userspace-api/media/v4l/metafmt-pisp-fe.rst (limited to 'include') diff --git a/Documentation/userspace-api/media/v4l/meta-formats.rst b/Documentation/userspace-api/media/v4l/meta-formats.rst index c6e56b5888bc..86ffb3bc8ade 100644 --- a/Documentation/userspace-api/media/v4l/meta-formats.rst +++ b/Documentation/userspace-api/media/v4l/meta-formats.rst @@ -16,6 +16,7 @@ These formats are used for the :ref:`metadata` interface only. metafmt-generic metafmt-intel-ipu3 metafmt-pisp-be + metafmt-pisp-fe metafmt-rkisp1 metafmt-uvc metafmt-vivid diff --git a/Documentation/userspace-api/media/v4l/metafmt-pisp-fe.rst b/Documentation/userspace-api/media/v4l/metafmt-pisp-fe.rst new file mode 100644 index 000000000000..fddeada83e4a --- /dev/null +++ b/Documentation/userspace-api/media/v4l/metafmt-pisp-fe.rst @@ -0,0 +1,39 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. _v4l2-meta-fmt-rpi-fe-cfg: + +************************ +V4L2_META_FMT_RPI_FE_CFG +************************ + +Raspberry Pi PiSP Front End configuration format +================================================ + +The Raspberry Pi PiSP Front End image signal processor is configured by +userspace by providing a buffer of configuration parameters to the +`rp1-cfe-fe-config` output video device node using the +:c:type:`v4l2_meta_format` interface. + +The `Raspberry Pi PiSP technical specification +`_ +provide detailed description of the Front End configuration and programming +model. + +.. _v4l2-meta-fmt-rpi-fe-stats: + +************************** +V4L2_META_FMT_RPI_FE_STATS +************************** + +Raspberry Pi PiSP Front End statistics format +============================================= + +The Raspberry Pi PiSP Front End image signal processor provides statistics data +by writing to a buffer provided via the `rp1-cfe-fe-stats` capture video device +node using the +:c:type:`v4l2_meta_format` interface. + +The `Raspberry Pi PiSP technical specification +`_ +provide detailed description of the Front End configuration and programming +model. diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index b9a3c6b20282..0304daa8471d 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -1468,6 +1468,8 @@ static void v4l_fill_fmtdesc(struct v4l2_fmtdesc *fmt) case V4L2_PIX_FMT_Y212: descr = "12-bit YUYV Packed"; break; case V4L2_PIX_FMT_Y216: descr = "16-bit YUYV Packed"; break; case V4L2_META_FMT_RPI_BE_CFG: descr = "RPi PiSP BE Config format"; break; + case V4L2_META_FMT_RPI_FE_CFG: descr = "RPi PiSP FE Config format"; break; + case V4L2_META_FMT_RPI_FE_STATS: descr = "RPi PiSP FE Statistics format"; break; case V4L2_META_FMT_GENERIC_8: descr = "8-bit Generic Metadata"; break; case V4L2_META_FMT_GENERIC_CSI2_10: descr = "8-bit Generic Meta, 10b CSI-2"; break; case V4L2_META_FMT_GENERIC_CSI2_12: descr = "8-bit Generic Meta, 12b CSI-2"; break; diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index ded023edac70..e7c4dce39007 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -860,6 +860,8 @@ struct v4l2_pix_format { /* Vendor specific - used for RaspberryPi PiSP */ #define V4L2_META_FMT_RPI_BE_CFG v4l2_fourcc('R', 'P', 'B', 'C') /* PiSP BE configuration */ +#define V4L2_META_FMT_RPI_FE_CFG v4l2_fourcc('R', 'P', 'F', 'C') /* PiSP FE configuration */ +#define V4L2_META_FMT_RPI_FE_STATS v4l2_fourcc('R', 'P', 'F', 'S') /* PiSP FE stats */ #ifdef __KERNEL__ /* -- cgit v1.2.3 From 6edb685abb2af445773876a326292b989dcb3c9f Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Thu, 3 Oct 2024 13:31:12 +0300 Subject: media: raspberrypi: Add support for RP1-CFE Add support for Raspberry Pi CFE. The CFE is a hardware block that contains: - MIPI D-PHY - MIPI CSI-2 receiver - Front End ISP (FE) The driver has been upported from the Raspberry Pi kernel commit 88a681df9623 ("ARM: dts: bcm2712-rpi: Add i2c_pins labels"). Co-developed-by: Naushir Patuck Signed-off-by: Naushir Patuck Signed-off-by: Tomi Valkeinen Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- MAINTAINERS | 7 + drivers/media/platform/raspberrypi/Kconfig | 1 + drivers/media/platform/raspberrypi/Makefile | 1 + drivers/media/platform/raspberrypi/rp1-cfe/Kconfig | 15 + .../media/platform/raspberrypi/rp1-cfe/Makefile | 6 + .../media/platform/raspberrypi/rp1-cfe/cfe-fmts.h | 332 +++ .../media/platform/raspberrypi/rp1-cfe/cfe-trace.h | 202 ++ drivers/media/platform/raspberrypi/rp1-cfe/cfe.c | 2504 ++++++++++++++++++++ drivers/media/platform/raspberrypi/rp1-cfe/cfe.h | 43 + drivers/media/platform/raspberrypi/rp1-cfe/csi2.c | 586 +++++ drivers/media/platform/raspberrypi/rp1-cfe/csi2.h | 89 + drivers/media/platform/raspberrypi/rp1-cfe/dphy.c | 181 ++ drivers/media/platform/raspberrypi/rp1-cfe/dphy.h | 27 + .../media/platform/raspberrypi/rp1-cfe/pisp-fe.c | 605 +++++ .../media/platform/raspberrypi/rp1-cfe/pisp-fe.h | 53 + .../uapi/linux/media/raspberrypi/pisp_fe_config.h | 273 +++ .../linux/media/raspberrypi/pisp_fe_statistics.h | 64 + 17 files changed, 4989 insertions(+) create mode 100644 drivers/media/platform/raspberrypi/rp1-cfe/Kconfig create mode 100644 drivers/media/platform/raspberrypi/rp1-cfe/Makefile create mode 100644 drivers/media/platform/raspberrypi/rp1-cfe/cfe-fmts.h create mode 100644 drivers/media/platform/raspberrypi/rp1-cfe/cfe-trace.h create mode 100644 drivers/media/platform/raspberrypi/rp1-cfe/cfe.c create mode 100644 drivers/media/platform/raspberrypi/rp1-cfe/cfe.h create mode 100644 drivers/media/platform/raspberrypi/rp1-cfe/csi2.c create mode 100644 drivers/media/platform/raspberrypi/rp1-cfe/csi2.h create mode 100644 drivers/media/platform/raspberrypi/rp1-cfe/dphy.c create mode 100644 drivers/media/platform/raspberrypi/rp1-cfe/dphy.h create mode 100644 drivers/media/platform/raspberrypi/rp1-cfe/pisp-fe.c create mode 100644 drivers/media/platform/raspberrypi/rp1-cfe/pisp-fe.h create mode 100644 include/uapi/linux/media/raspberrypi/pisp_fe_config.h create mode 100644 include/uapi/linux/media/raspberrypi/pisp_fe_statistics.h (limited to 'include') diff --git a/MAINTAINERS b/MAINTAINERS index a08759706711..7a14891a8fa9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19372,6 +19372,13 @@ F: Documentation/devicetree/bindings/media/raspberrypi,pispbe.yaml F: drivers/media/platform/raspberrypi/pisp_be/ F: include/uapi/linux/media/raspberrypi/ +RASPBERRY PI PISP CAMERA FRONT END +M: Tomi Valkeinen +M: Raspberry Pi Kernel Maintenance +S: Maintained +F: Documentation/devicetree/bindings/media/raspberrypi,rp1-cfe.yaml +F: drivers/media/platform/raspberrypi/rp1-cfe/ + RC-CORE / LIRC FRAMEWORK M: Sean Young L: linux-media@vger.kernel.org diff --git a/drivers/media/platform/raspberrypi/Kconfig b/drivers/media/platform/raspberrypi/Kconfig index e928f979019e..bd5101ffefb5 100644 --- a/drivers/media/platform/raspberrypi/Kconfig +++ b/drivers/media/platform/raspberrypi/Kconfig @@ -3,3 +3,4 @@ comment "Raspberry Pi media platform drivers" source "drivers/media/platform/raspberrypi/pisp_be/Kconfig" +source "drivers/media/platform/raspberrypi/rp1-cfe/Kconfig" diff --git a/drivers/media/platform/raspberrypi/Makefile b/drivers/media/platform/raspberrypi/Makefile index c0d1a2dab486..af7fde84eefe 100644 --- a/drivers/media/platform/raspberrypi/Makefile +++ b/drivers/media/platform/raspberrypi/Makefile @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 obj-y += pisp_be/ +obj-y += rp1-cfe/ diff --git a/drivers/media/platform/raspberrypi/rp1-cfe/Kconfig b/drivers/media/platform/raspberrypi/rp1-cfe/Kconfig new file mode 100644 index 000000000000..327b61f1134b --- /dev/null +++ b/drivers/media/platform/raspberrypi/rp1-cfe/Kconfig @@ -0,0 +1,15 @@ +# RP1 V4L2 camera support + +config VIDEO_RP1_CFE + tristate "Raspberry Pi RP1 Camera Front End (CFE) video capture driver" + depends on VIDEO_DEV + depends on PM + select VIDEO_V4L2_SUBDEV_API + select MEDIA_CONTROLLER + select VIDEOBUF2_DMA_CONTIG + select V4L2_FWNODE + help + Say Y here to enable support for the Raspberry Pi RP1 Camera Front End. + + To compile this driver as a module, choose M here. The module will be + called rp1-cfe. diff --git a/drivers/media/platform/raspberrypi/rp1-cfe/Makefile b/drivers/media/platform/raspberrypi/rp1-cfe/Makefile new file mode 100644 index 000000000000..3f0d0fc8570e --- /dev/null +++ b/drivers/media/platform/raspberrypi/rp1-cfe/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for RP1 Camera Front End driver +# +rp1-cfe-objs := cfe.o csi2.o pisp-fe.o dphy.o +obj-$(CONFIG_VIDEO_RP1_CFE) += rp1-cfe.o diff --git a/drivers/media/platform/raspberrypi/rp1-cfe/cfe-fmts.h b/drivers/media/platform/raspberrypi/rp1-cfe/cfe-fmts.h new file mode 100644 index 000000000000..7aecf7f83733 --- /dev/null +++ b/drivers/media/platform/raspberrypi/rp1-cfe/cfe-fmts.h @@ -0,0 +1,332 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * RP1 Camera Front End formats definition + * + * Copyright (C) 2021-2024 - Raspberry Pi Ltd. + */ +#ifndef _CFE_FMTS_H_ +#define _CFE_FMTS_H_ + +#include "cfe.h" +#include + +static const struct cfe_fmt formats[] = { + /* YUV Formats */ + { + .fourcc = V4L2_PIX_FMT_YUYV, + .code = MEDIA_BUS_FMT_YUYV8_1X16, + .depth = 16, + .csi_dt = MIPI_CSI2_DT_YUV422_8B, + }, + { + .fourcc = V4L2_PIX_FMT_UYVY, + .code = MEDIA_BUS_FMT_UYVY8_1X16, + .depth = 16, + .csi_dt = MIPI_CSI2_DT_YUV422_8B, + }, + { + .fourcc = V4L2_PIX_FMT_YVYU, + .code = MEDIA_BUS_FMT_YVYU8_1X16, + .depth = 16, + .csi_dt = MIPI_CSI2_DT_YUV422_8B, + }, + { + .fourcc = V4L2_PIX_FMT_VYUY, + .code = MEDIA_BUS_FMT_VYUY8_1X16, + .depth = 16, + .csi_dt = MIPI_CSI2_DT_YUV422_8B, + }, + { + /* RGB Formats */ + .fourcc = V4L2_PIX_FMT_RGB565, /* gggbbbbb rrrrrggg */ + .code = MEDIA_BUS_FMT_RGB565_2X8_LE, + .depth = 16, + .csi_dt = MIPI_CSI2_DT_RGB565, + }, + { .fourcc = V4L2_PIX_FMT_RGB565X, /* rrrrrggg gggbbbbb */ + .code = MEDIA_BUS_FMT_RGB565_2X8_BE, + .depth = 16, + .csi_dt = MIPI_CSI2_DT_RGB565, + }, + { + .fourcc = V4L2_PIX_FMT_RGB555, /* gggbbbbb arrrrrgg */ + .code = MEDIA_BUS_FMT_RGB555_2X8_PADHI_LE, + .depth = 16, + .csi_dt = MIPI_CSI2_DT_RGB555, + }, + { + .fourcc = V4L2_PIX_FMT_RGB555X, /* arrrrrgg gggbbbbb */ + .code = MEDIA_BUS_FMT_RGB555_2X8_PADHI_BE, + .depth = 16, + .csi_dt = MIPI_CSI2_DT_RGB555, + }, + { + .fourcc = V4L2_PIX_FMT_RGB24, /* rgb */ + .code = MEDIA_BUS_FMT_RGB888_1X24, + .depth = 24, + .csi_dt = MIPI_CSI2_DT_RGB888, + }, + { + .fourcc = V4L2_PIX_FMT_BGR24, /* bgr */ + .code = MEDIA_BUS_FMT_BGR888_1X24, + .depth = 24, + .csi_dt = MIPI_CSI2_DT_RGB888, + }, + { + .fourcc = V4L2_PIX_FMT_RGB32, /* argb */ + .code = MEDIA_BUS_FMT_ARGB8888_1X32, + .depth = 32, + .csi_dt = 0x0, + }, + + /* Bayer Formats */ + { + .fourcc = V4L2_PIX_FMT_SBGGR8, + .code = MEDIA_BUS_FMT_SBGGR8_1X8, + .depth = 8, + .csi_dt = MIPI_CSI2_DT_RAW8, + .remap = { V4L2_PIX_FMT_SBGGR16, V4L2_PIX_FMT_PISP_COMP1_BGGR }, + }, + { + .fourcc = V4L2_PIX_FMT_SGBRG8, + .code = MEDIA_BUS_FMT_SGBRG8_1X8, + .depth = 8, + .csi_dt = MIPI_CSI2_DT_RAW8, + .remap = { V4L2_PIX_FMT_SGBRG16, V4L2_PIX_FMT_PISP_COMP1_GBRG }, + }, + { + .fourcc = V4L2_PIX_FMT_SGRBG8, + .code = MEDIA_BUS_FMT_SGRBG8_1X8, + .depth = 8, + .csi_dt = MIPI_CSI2_DT_RAW8, + .remap = { V4L2_PIX_FMT_SGRBG16, V4L2_PIX_FMT_PISP_COMP1_GRBG }, + }, + { + .fourcc = V4L2_PIX_FMT_SRGGB8, + .code = MEDIA_BUS_FMT_SRGGB8_1X8, + .depth = 8, + .csi_dt = MIPI_CSI2_DT_RAW8, + .remap = { V4L2_PIX_FMT_SRGGB16, V4L2_PIX_FMT_PISP_COMP1_RGGB }, + }, + { + .fourcc = V4L2_PIX_FMT_SBGGR10P, + .code = MEDIA_BUS_FMT_SBGGR10_1X10, + .depth = 10, + .csi_dt = MIPI_CSI2_DT_RAW10, + .remap = { V4L2_PIX_FMT_SBGGR16, V4L2_PIX_FMT_PISP_COMP1_BGGR }, + }, + { + .fourcc = V4L2_PIX_FMT_SGBRG10P, + .code = MEDIA_BUS_FMT_SGBRG10_1X10, + .depth = 10, + .csi_dt = MIPI_CSI2_DT_RAW10, + .remap = { V4L2_PIX_FMT_SGBRG16, V4L2_PIX_FMT_PISP_COMP1_GBRG }, + }, + { + .fourcc = V4L2_PIX_FMT_SGRBG10P, + .code = MEDIA_BUS_FMT_SGRBG10_1X10, + .depth = 10, + .csi_dt = MIPI_CSI2_DT_RAW10, + .remap = { V4L2_PIX_FMT_SGRBG16, V4L2_PIX_FMT_PISP_COMP1_GRBG }, + }, + { + .fourcc = V4L2_PIX_FMT_SRGGB10P, + .code = MEDIA_BUS_FMT_SRGGB10_1X10, + .depth = 10, + .csi_dt = MIPI_CSI2_DT_RAW10, + .remap = { V4L2_PIX_FMT_SRGGB16, V4L2_PIX_FMT_PISP_COMP1_RGGB }, + }, + { + .fourcc = V4L2_PIX_FMT_SBGGR12P, + .code = MEDIA_BUS_FMT_SBGGR12_1X12, + .depth = 12, + .csi_dt = MIPI_CSI2_DT_RAW12, + .remap = { V4L2_PIX_FMT_SBGGR16, V4L2_PIX_FMT_PISP_COMP1_BGGR }, + }, + { + .fourcc = V4L2_PIX_FMT_SGBRG12P, + .code = MEDIA_BUS_FMT_SGBRG12_1X12, + .depth = 12, + .csi_dt = MIPI_CSI2_DT_RAW12, + .remap = { V4L2_PIX_FMT_SGBRG16, V4L2_PIX_FMT_PISP_COMP1_GBRG }, + }, + { + .fourcc = V4L2_PIX_FMT_SGRBG12P, + .code = MEDIA_BUS_FMT_SGRBG12_1X12, + .depth = 12, + .csi_dt = MIPI_CSI2_DT_RAW12, + .remap = { V4L2_PIX_FMT_SGRBG16, V4L2_PIX_FMT_PISP_COMP1_GRBG }, + }, + { + .fourcc = V4L2_PIX_FMT_SRGGB12P, + .code = MEDIA_BUS_FMT_SRGGB12_1X12, + .depth = 12, + .csi_dt = MIPI_CSI2_DT_RAW12, + .remap = { V4L2_PIX_FMT_SRGGB16, V4L2_PIX_FMT_PISP_COMP1_RGGB }, + }, + { + .fourcc = V4L2_PIX_FMT_SBGGR14P, + .code = MEDIA_BUS_FMT_SBGGR14_1X14, + .depth = 14, + .csi_dt = MIPI_CSI2_DT_RAW14, + .remap = { V4L2_PIX_FMT_SBGGR16, V4L2_PIX_FMT_PISP_COMP1_BGGR }, + }, + { + .fourcc = V4L2_PIX_FMT_SGBRG14P, + .code = MEDIA_BUS_FMT_SGBRG14_1X14, + .depth = 14, + .csi_dt = MIPI_CSI2_DT_RAW14, + .remap = { V4L2_PIX_FMT_SGBRG16, V4L2_PIX_FMT_PISP_COMP1_GBRG }, + }, + { + .fourcc = V4L2_PIX_FMT_SGRBG14P, + .code = MEDIA_BUS_FMT_SGRBG14_1X14, + .depth = 14, + .csi_dt = MIPI_CSI2_DT_RAW14, + .remap = { V4L2_PIX_FMT_SGRBG16, V4L2_PIX_FMT_PISP_COMP1_GRBG }, + }, + { + .fourcc = V4L2_PIX_FMT_SRGGB14P, + .code = MEDIA_BUS_FMT_SRGGB14_1X14, + .depth = 14, + .csi_dt = MIPI_CSI2_DT_RAW14, + .remap = { V4L2_PIX_FMT_SRGGB16, V4L2_PIX_FMT_PISP_COMP1_RGGB }, + }, + { + .fourcc = V4L2_PIX_FMT_SBGGR16, + .code = MEDIA_BUS_FMT_SBGGR16_1X16, + .depth = 16, + .csi_dt = MIPI_CSI2_DT_RAW16, + .flags = CFE_FORMAT_FLAG_FE_OUT, + .remap = { V4L2_PIX_FMT_SBGGR16, V4L2_PIX_FMT_PISP_COMP1_BGGR }, + }, + { + .fourcc = V4L2_PIX_FMT_SGBRG16, + .code = MEDIA_BUS_FMT_SGBRG16_1X16, + .depth = 16, + .csi_dt = MIPI_CSI2_DT_RAW16, + .flags = CFE_FORMAT_FLAG_FE_OUT, + .remap = { V4L2_PIX_FMT_SGBRG16, V4L2_PIX_FMT_PISP_COMP1_GBRG }, + }, + { + .fourcc = V4L2_PIX_FMT_SGRBG16, + .code = MEDIA_BUS_FMT_SGRBG16_1X16, + .depth = 16, + .csi_dt = MIPI_CSI2_DT_RAW16, + .flags = CFE_FORMAT_FLAG_FE_OUT, + .remap = { V4L2_PIX_FMT_SGRBG16, V4L2_PIX_FMT_PISP_COMP1_GRBG }, + }, + { + .fourcc = V4L2_PIX_FMT_SRGGB16, + .code = MEDIA_BUS_FMT_SRGGB16_1X16, + .depth = 16, + .csi_dt = MIPI_CSI2_DT_RAW16, + .flags = CFE_FORMAT_FLAG_FE_OUT, + .remap = { V4L2_PIX_FMT_SRGGB16, V4L2_PIX_FMT_PISP_COMP1_RGGB }, + }, + /* PiSP Compressed Mode 1 */ + { + .fourcc = V4L2_PIX_FMT_PISP_COMP1_RGGB, + .code = MEDIA_BUS_FMT_SRGGB16_1X16, + .depth = 8, + .flags = CFE_FORMAT_FLAG_FE_OUT, + }, + { + .fourcc = V4L2_PIX_FMT_PISP_COMP1_BGGR, + .code = MEDIA_BUS_FMT_SBGGR16_1X16, + .depth = 8, + .flags = CFE_FORMAT_FLAG_FE_OUT, + }, + { + .fourcc = V4L2_PIX_FMT_PISP_COMP1_GBRG, + .code = MEDIA_BUS_FMT_SGBRG16_1X16, + .depth = 8, + .flags = CFE_FORMAT_FLAG_FE_OUT, + }, + { + .fourcc = V4L2_PIX_FMT_PISP_COMP1_GRBG, + .code = MEDIA_BUS_FMT_SGRBG16_1X16, + .depth = 8, + .flags = CFE_FORMAT_FLAG_FE_OUT, + }, + /* Greyscale format */ + { + .fourcc = V4L2_PIX_FMT_GREY, + .code = MEDIA_BUS_FMT_Y8_1X8, + .depth = 8, + .csi_dt = MIPI_CSI2_DT_RAW8, + .remap = { V4L2_PIX_FMT_Y16, V4L2_PIX_FMT_PISP_COMP1_MONO }, + }, + { + .fourcc = V4L2_PIX_FMT_Y10P, + .code = MEDIA_BUS_FMT_Y10_1X10, + .depth = 10, + .csi_dt = MIPI_CSI2_DT_RAW10, + .remap = { V4L2_PIX_FMT_Y16, V4L2_PIX_FMT_PISP_COMP1_MONO }, + }, + { + .fourcc = V4L2_PIX_FMT_Y12P, + .code = MEDIA_BUS_FMT_Y12_1X12, + .depth = 12, + .csi_dt = MIPI_CSI2_DT_RAW12, + .remap = { V4L2_PIX_FMT_Y16, V4L2_PIX_FMT_PISP_COMP1_MONO }, + }, + { + .fourcc = V4L2_PIX_FMT_Y14P, + .code = MEDIA_BUS_FMT_Y14_1X14, + .depth = 14, + .csi_dt = MIPI_CSI2_DT_RAW14, + .remap = { V4L2_PIX_FMT_Y16, V4L2_PIX_FMT_PISP_COMP1_MONO }, + }, + { + .fourcc = V4L2_PIX_FMT_Y16, + .code = MEDIA_BUS_FMT_Y16_1X16, + .depth = 16, + .csi_dt = MIPI_CSI2_DT_RAW16, + .flags = CFE_FORMAT_FLAG_FE_OUT, + .remap = { V4L2_PIX_FMT_Y16, V4L2_PIX_FMT_PISP_COMP1_MONO }, + }, + { + .fourcc = V4L2_PIX_FMT_PISP_COMP1_MONO, + .code = MEDIA_BUS_FMT_Y16_1X16, + .depth = 8, + .flags = CFE_FORMAT_FLAG_FE_OUT, + }, + + /* Embedded data formats */ + { + .fourcc = V4L2_META_FMT_GENERIC_8, + .code = MEDIA_BUS_FMT_META_8, + .depth = 8, + .csi_dt = MIPI_CSI2_DT_EMBEDDED_8B, + .flags = CFE_FORMAT_FLAG_META_CAP, + }, + { + .fourcc = V4L2_META_FMT_GENERIC_CSI2_10, + .code = MEDIA_BUS_FMT_META_10, + .depth = 10, + .csi_dt = MIPI_CSI2_DT_EMBEDDED_8B, + .flags = CFE_FORMAT_FLAG_META_CAP, + }, + { + .fourcc = V4L2_META_FMT_GENERIC_CSI2_12, + .code = MEDIA_BUS_FMT_META_12, + .depth = 12, + .csi_dt = MIPI_CSI2_DT_EMBEDDED_8B, + .flags = CFE_FORMAT_FLAG_META_CAP, + }, + + /* Frontend formats */ + { + .fourcc = V4L2_META_FMT_RPI_FE_CFG, + .code = MEDIA_BUS_FMT_FIXED, + .flags = CFE_FORMAT_FLAG_META_OUT, + }, + { + .fourcc = V4L2_META_FMT_RPI_FE_STATS, + .code = MEDIA_BUS_FMT_FIXED, + .flags = CFE_FORMAT_FLAG_META_CAP, + }, +}; + +#endif /* _CFE_FMTS_H_ */ diff --git a/drivers/media/platform/raspberrypi/rp1-cfe/cfe-trace.h b/drivers/media/platform/raspberrypi/rp1-cfe/cfe-trace.h new file mode 100644 index 000000000000..1a36259f51b7 --- /dev/null +++ b/drivers/media/platform/raspberrypi/rp1-cfe/cfe-trace.h @@ -0,0 +1,202 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2024 Raspberry Pi Ltd. + * Copyright (c) 2024 Ideas on Board Oy + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM cfe + +#if !defined(_CFE_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _CFE_TRACE_H + +#include +#include + +TRACE_EVENT(cfe_return_buffer, + TP_PROTO(u32 node_id, u32 buf_idx, u32 queue_id), + TP_ARGS(node_id, buf_idx, queue_id), + TP_STRUCT__entry( + __field(u32, node_id) + __field(u32, buf_idx) + __field(u32, queue_id) + ), + TP_fast_assign( + __entry->node_id = node_id; + __entry->buf_idx = buf_idx; + __entry->queue_id = queue_id; + ), + TP_printk("node=%u buf=%u, queue=%u", __entry->node_id, + __entry->buf_idx, __entry->queue_id) +); + +DECLARE_EVENT_CLASS(cfe_buffer_template, + TP_PROTO(u32 node_id, struct vb2_buffer *buf), + TP_ARGS(node_id, buf), + TP_STRUCT__entry( + __field(u32, node_id) + __field(u32, buf_idx) + ), + TP_fast_assign( + __entry->node_id = node_id; + __entry->buf_idx = buf->index; + ), + TP_printk("node=%u buf=%u", __entry->node_id, __entry->buf_idx) +); + +DEFINE_EVENT(cfe_buffer_template, cfe_buffer_prepare, + TP_PROTO(u32 node_id, struct vb2_buffer *buf), + TP_ARGS(node_id, buf)); + +TRACE_EVENT(cfe_buffer_queue, + TP_PROTO(u32 node_id, struct vb2_buffer *buf, bool schedule_now), + TP_ARGS(node_id, buf, schedule_now), + TP_STRUCT__entry( + __field(u32, node_id) + __field(u32, buf_idx) + __field(bool, schedule_now) + ), + TP_fast_assign( + __entry->node_id = node_id; + __entry->buf_idx = buf->index; + __entry->schedule_now = schedule_now; + ), + TP_printk("node=%u buf=%u%s", __entry->node_id, __entry->buf_idx, + __entry->schedule_now ? " schedule immediately" : "") +); + +DEFINE_EVENT(cfe_buffer_template, cfe_csi2_schedule, + TP_PROTO(u32 node_id, struct vb2_buffer *buf), + TP_ARGS(node_id, buf)); + +DEFINE_EVENT(cfe_buffer_template, cfe_fe_schedule, + TP_PROTO(u32 node_id, struct vb2_buffer *buf), + TP_ARGS(node_id, buf)); + +TRACE_EVENT(cfe_buffer_complete, + TP_PROTO(u32 node_id, struct vb2_v4l2_buffer *buf), + TP_ARGS(node_id, buf), + TP_STRUCT__entry( + __field(u32, node_id) + __field(u32, buf_idx) + __field(u32, seq) + __field(u64, ts) + ), + TP_fast_assign( + __entry->node_id = node_id; + __entry->buf_idx = buf->vb2_buf.index; + __entry->seq = buf->sequence; + __entry->ts = buf->vb2_buf.timestamp; + ), + TP_printk("node=%u buf=%u seq=%u ts=%llu", __entry->node_id, + __entry->buf_idx, __entry->seq, __entry->ts) +); + +TRACE_EVENT(cfe_frame_start, + TP_PROTO(u32 node_id, u32 fs_count), + TP_ARGS(node_id, fs_count), + TP_STRUCT__entry( + __field(u32, node_id) + __field(u32, fs_count) + ), + TP_fast_assign( + __entry->node_id = node_id; + __entry->fs_count = fs_count; + ), + TP_printk("node=%u fs_count=%u", __entry->node_id, __entry->fs_count) +); + +TRACE_EVENT(cfe_frame_end, + TP_PROTO(u32 node_id, u32 fs_count), + TP_ARGS(node_id, fs_count), + TP_STRUCT__entry( + __field(u32, node_id) + __field(u32, fs_count) + ), + TP_fast_assign( + __entry->node_id = node_id; + __entry->fs_count = fs_count; + ), + TP_printk("node=%u fs_count=%u", __entry->node_id, __entry->fs_count) +); + +TRACE_EVENT(cfe_prepare_next_job, + TP_PROTO(bool fe_enabled), + TP_ARGS(fe_enabled), + TP_STRUCT__entry( + __field(bool, fe_enabled) + ), + TP_fast_assign( + __entry->fe_enabled = fe_enabled; + ), + TP_printk("fe_enabled=%u", __entry->fe_enabled) +); + +/* These are copied from csi2.c */ +#define CSI2_STATUS_IRQ_FS(x) (BIT(0) << (x)) +#define CSI2_STATUS_IRQ_FE(x) (BIT(4) << (x)) +#define CSI2_STATUS_IRQ_FE_ACK(x) (BIT(8) << (x)) +#define CSI2_STATUS_IRQ_LE(x) (BIT(12) << (x)) +#define CSI2_STATUS_IRQ_LE_ACK(x) (BIT(16) << (x)) + +TRACE_EVENT(csi2_irq, + TP_PROTO(u32 channel, u32 status, u32 dbg), + TP_ARGS(channel, status, dbg), + TP_STRUCT__entry( + __field(u32, channel) + __field(u32, status) + __field(u32, dbg) + ), + TP_fast_assign( + __entry->channel = channel; + __entry->status = status; + __entry->dbg = dbg; + ), + TP_printk("ch=%u flags=[ %s%s%s%s%s] frame=%u line=%u\n", + __entry->channel, + (__entry->status & CSI2_STATUS_IRQ_FS(__entry->channel)) ? + "FS " : "", + (__entry->status & CSI2_STATUS_IRQ_FE(__entry->channel)) ? + "FE " : "", + (__entry->status & CSI2_STATUS_IRQ_FE_ACK(__entry->channel)) ? + "FE_ACK " : "", + (__entry->status & CSI2_STATUS_IRQ_LE(__entry->channel)) ? + "LE " : "", + (__entry->status & CSI2_STATUS_IRQ_LE_ACK(__entry->channel)) ? + "LE_ACK " : "", + __entry->dbg >> 16, __entry->dbg & 0xffff) +); + +TRACE_EVENT(fe_irq, + TP_PROTO(u32 status, u32 output_status, u32 frame_status, + u32 error_status, u32 int_status), + TP_ARGS(status, output_status, frame_status, error_status, int_status), + TP_STRUCT__entry( + __field(u32, status) + __field(u32, output_status) + __field(u32, frame_status) + __field(u32, error_status) + __field(u32, int_status) + ), + TP_fast_assign( + __entry->status = status; + __entry->output_status = output_status; + __entry->frame_status = frame_status; + __entry->error_status = error_status; + __entry->int_status = int_status; + ), + TP_printk("status 0x%x out_status 0x%x frame_status 0x%x error_status 0x%x int_status 0x%x", + __entry->status, + __entry->output_status, + __entry->frame_status, + __entry->error_status, + __entry->int_status) +); + +#endif /* _CFE_TRACE_H */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE ../../drivers/media/platform/raspberrypi/rp1-cfe/cfe-trace +#include diff --git a/drivers/media/platform/raspberrypi/rp1-cfe/cfe.c b/drivers/media/platform/raspberrypi/rp1-cfe/cfe.c new file mode 100644 index 000000000000..045910de6c57 --- /dev/null +++ b/drivers/media/platform/raspberrypi/rp1-cfe/cfe.c @@ -0,0 +1,2504 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * RP1 Camera Front End Driver + * + * Copyright (c) 2021-2024 Raspberry Pi Ltd. + * Copyright (c) 2023-2024 Ideas on Board Oy + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "cfe-fmts.h" +#include "cfe.h" +#include "csi2.h" +#include "pisp-fe.h" + +#define CREATE_TRACE_POINTS +#include "cfe-trace.h" + +#define CFE_MODULE_NAME "rp1-cfe" +#define CFE_VERSION "1.0" + +#define cfe_dbg(cfe, fmt, arg...) dev_dbg(&(cfe)->pdev->dev, fmt, ##arg) +#define cfe_info(cfe, fmt, arg...) dev_info(&(cfe)->pdev->dev, fmt, ##arg) +#define cfe_err(cfe, fmt, arg...) dev_err(&(cfe)->pdev->dev, fmt, ##arg) + +/* MIPICFG registers */ +#define MIPICFG_CFG 0x004 +#define MIPICFG_INTR 0x028 +#define MIPICFG_INTE 0x02c +#define MIPICFG_INTF 0x030 +#define MIPICFG_INTS 0x034 + +#define MIPICFG_CFG_SEL_CSI BIT(0) + +#define MIPICFG_INT_CSI_DMA BIT(0) +#define MIPICFG_INT_CSI_HOST BIT(2) +#define MIPICFG_INT_PISP_FE BIT(4) + +#define BPL_ALIGNMENT 16 +#define MAX_BYTESPERLINE 0xffffff00 +#define MAX_BUFFER_SIZE 0xffffff00 +/* + * Max width is therefore determined by the max stride divided by the number of + * bits per pixel. + * + * However, to avoid overflow issues let's use a 16k maximum. This lets us + * calculate 16k * 16k * 4 with 32bits. If we need higher maximums, a careful + * review and adjustment of the code is needed so that it will deal with + * overflows correctly. + */ +#define MAX_WIDTH 16384 +#define MAX_HEIGHT MAX_WIDTH +/* Define a nominal minimum image size */ +#define MIN_WIDTH 16 +#define MIN_HEIGHT 16 + +#define MIN_META_WIDTH 4 +#define MIN_META_HEIGHT 1 + +const struct v4l2_mbus_framefmt cfe_default_format = { + .width = 640, + .height = 480, + .code = MEDIA_BUS_FMT_SRGGB10_1X10, + .field = V4L2_FIELD_NONE, + .colorspace = V4L2_COLORSPACE_RAW, + .ycbcr_enc = V4L2_YCBCR_ENC_601, + .quantization = V4L2_QUANTIZATION_FULL_RANGE, + .xfer_func = V4L2_XFER_FUNC_NONE, +}; + +enum node_ids { + /* CSI2 HW output nodes first. */ + CSI2_CH0, + CSI2_CH1, + CSI2_CH2, + CSI2_CH3, + /* FE only nodes from here on. */ + FE_OUT0, + FE_OUT1, + FE_STATS, + FE_CONFIG, + NUM_NODES +}; + +struct node_description { + enum node_ids id; + const char *name; + unsigned int caps; + unsigned int pad_flags; + unsigned int link_pad; +}; + +/* Must match the ordering of enum ids */ +static const struct node_description node_desc[NUM_NODES] = { + [CSI2_CH0] = { + .name = "csi2-ch0", + .caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_META_CAPTURE, + .pad_flags = MEDIA_PAD_FL_SINK | MEDIA_PAD_FL_MUST_CONNECT, + .link_pad = CSI2_PAD_FIRST_SOURCE + 0 + }, + /* + * At the moment the main userspace component (libcamera) doesn't + * support metadata with video nodes that support both video and + * metadata. So for the time being this node is set to only support + * V4L2_CAP_META_CAPTURE. + */ + [CSI2_CH1] = { + .name = "csi2-ch1", + .caps = V4L2_CAP_META_CAPTURE, + .pad_flags = MEDIA_PAD_FL_SINK | MEDIA_PAD_FL_MUST_CONNECT, + .link_pad = CSI2_PAD_FIRST_SOURCE + 1 + }, + [CSI2_CH2] = { + .name = "csi2-ch2", + .caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_META_CAPTURE, + .pad_flags = MEDIA_PAD_FL_SINK | MEDIA_PAD_FL_MUST_CONNECT, + .link_pad = CSI2_PAD_FIRST_SOURCE + 2 + }, + [CSI2_CH3] = { + .name = "csi2-ch3", + .caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_META_CAPTURE, + .pad_flags = MEDIA_PAD_FL_SINK | MEDIA_PAD_FL_MUST_CONNECT, + .link_pad = CSI2_PAD_FIRST_SOURCE + 3 + }, + [FE_OUT0] = { + .name = "fe-image0", + .caps = V4L2_CAP_VIDEO_CAPTURE, + .pad_flags = MEDIA_PAD_FL_SINK | MEDIA_PAD_FL_MUST_CONNECT, + .link_pad = FE_OUTPUT0_PAD + }, + [FE_OUT1] = { + .name = "fe-image1", + .caps = V4L2_CAP_VIDEO_CAPTURE, + .pad_flags = MEDIA_PAD_FL_SINK | MEDIA_PAD_FL_MUST_CONNECT, + .link_pad = FE_OUTPUT1_PAD + }, + [FE_STATS] = { + .name = "fe-stats", + .caps = V4L2_CAP_META_CAPTURE, + .pad_flags = MEDIA_PAD_FL_SINK | MEDIA_PAD_FL_MUST_CONNECT, + .link_pad = FE_STATS_PAD + }, + [FE_CONFIG] = { + .name = "fe-config", + .caps = V4L2_CAP_META_OUTPUT, + .pad_flags = MEDIA_PAD_FL_SOURCE | MEDIA_PAD_FL_MUST_CONNECT, + .link_pad = FE_CONFIG_PAD + }, +}; + +#define is_fe_node(node) (((node)->id) >= FE_OUT0) +#define is_csi2_node(node) (!is_fe_node(node)) + +#define node_supports_image_output(node) \ + (node_desc[(node)->id].caps & V4L2_CAP_VIDEO_CAPTURE) +#define node_supports_meta_output(node) \ + (node_desc[(node)->id].caps & V4L2_CAP_META_CAPTURE) +#define node_supports_image_input(node) \ + (node_desc[(node)->id].caps & V4L2_CAP_VIDEO_OUTPUT) +#define node_supports_meta_input(node) \ + (node_desc[(node)->id].caps & V4L2_CAP_META_OUTPUT) +#define node_supports_image(node) \ + (node_supports_image_output(node) || node_supports_image_input(node)) +#define node_supports_meta(node) \ + (node_supports_meta_output(node) || node_supports_meta_input(node)) + +#define is_image_output_node(node) \ + ((node)->buffer_queue.type == V4L2_BUF_TYPE_VIDEO_CAPTURE) +#define is_image_input_node(node) \ + ((node)->buffer_queue.type == V4L2_BUF_TYPE_VIDEO_OUTPUT) +#define is_image_node(node) \ + (is_image_output_node(node) || is_image_input_node(node)) +#define is_meta_output_node(node) \ + ((node)->buffer_queue.type == V4L2_BUF_TYPE_META_CAPTURE) +#define is_meta_input_node(node) \ + ((node)->buffer_queue.type == V4L2_BUF_TYPE_META_OUTPUT) +#define is_meta_node(node) \ + (is_meta_output_node(node) || is_meta_input_node(node)) + +/* To track state across all nodes. */ +#define NODE_REGISTERED BIT(0) +#define NODE_ENABLED BIT(1) +#define NODE_STREAMING BIT(2) +#define FS_INT BIT(3) +#define FE_INT BIT(4) +#define NUM_STATES 5 + +struct cfe_buffer { + struct vb2_v4l2_buffer vb; + struct list_head list; +}; + +struct cfe_config_buffer { + struct cfe_buffer buf; + struct pisp_fe_config config; +}; + +static inline struct cfe_buffer *to_cfe_buffer(struct vb2_buffer *vb) +{ + return container_of(vb, struct cfe_buffer, vb.vb2_buf); +} + +static inline +struct cfe_config_buffer *to_cfe_config_buffer(struct cfe_buffer *buf) +{ + return container_of(buf, struct cfe_config_buffer, buf); +} + +struct cfe_node { + /* Node id */ + enum node_ids id; + /* Pointer pointing to current v4l2_buffer */ + struct cfe_buffer *cur_frm; + /* Pointer pointing to next v4l2_buffer */ + struct cfe_buffer *next_frm; + /* Used to store current pixel format */ + struct v4l2_format vid_fmt; + /* Used to store current meta format */ + struct v4l2_format meta_fmt; + /* Buffer queue used in video-buf */ + struct vb2_queue buffer_queue; + /* Queue of filled frames */ + struct list_head dma_queue; + /* lock used to access this structure */ + struct mutex lock; + /* Identifies video device for this channel */ + struct video_device video_dev; + /* Pointer to the parent handle */ + struct cfe_device *cfe; + /* Media pad for this node */ + struct media_pad pad; + /* Frame-start counter */ + unsigned int fs_count; + /* Timestamp of the current buffer */ + u64 ts; +}; + +struct cfe_device { + struct dentry *debugfs; + struct kref kref; + + /* peripheral base address */ + void __iomem *mipi_cfg_base; + + struct clk *clk; + + /* V4l2 device */ + struct v4l2_device v4l2_dev; + struct media_device mdev; + struct media_pipeline pipe; + + /* IRQ lock for node state and DMA queues */ + spinlock_t state_lock; + bool job_ready; + bool job_queued; + + /* parent device */ + struct platform_device *pdev; + /* subdevice async Notifier */ + struct v4l2_async_notifier notifier; + + /* Source sub device */ + struct v4l2_subdev *source_sd; + /* Source subdev's pad */ + u32 source_pad; + + struct cfe_node node[NUM_NODES]; + DECLARE_BITMAP(node_flags, NUM_STATES * NUM_NODES); + + struct csi2_device csi2; + struct pisp_fe_device fe; + + int fe_csi2_channel; + + /* Mask of enabled streams */ + u64 streams_mask; +}; + +static inline bool is_fe_enabled(struct cfe_device *cfe) +{ + return cfe->fe_csi2_channel != -1; +} + +static inline struct cfe_device *to_cfe_device(struct v4l2_device *v4l2_dev) +{ + return container_of(v4l2_dev, struct cfe_device, v4l2_dev); +} + +static inline u32 cfg_reg_read(struct cfe_device *cfe, u32 offset) +{ + return readl(cfe->mipi_cfg_base + offset); +} + +static inline void cfg_reg_write(struct cfe_device *cfe, u32 offset, u32 val) +{ + writel(val, cfe->mipi_cfg_base + offset); +} + +static bool check_state(struct cfe_device *cfe, unsigned long state, + unsigned int node_id) +{ + unsigned long bit; + + for_each_set_bit(bit, &state, sizeof(state)) { + if (!test_bit(bit + (node_id * NUM_STATES), cfe->node_flags)) + return false; + } + + return true; +} + +static void set_state(struct cfe_device *cfe, unsigned long state, + unsigned int node_id) +{ + unsigned long bit; + + for_each_set_bit(bit, &state, sizeof(state)) + set_bit(bit + (node_id * NUM_STATES), cfe->node_flags); +} + +static void clear_state(struct cfe_device *cfe, unsigned long state, + unsigned int node_id) +{ + unsigned long bit; + + for_each_set_bit(bit, &state, sizeof(state)) + clear_bit(bit + (node_id * NUM_STATES), cfe->node_flags); +} + +static bool test_any_node(struct cfe_device *cfe, unsigned long cond) +{ + for (unsigned int i = 0; i < NUM_NODES; i++) { + if (check_state(cfe, cond, i)) + return true; + } + + return false; +} + +static bool test_all_nodes(struct cfe_device *cfe, unsigned long precond, + unsigned long cond) +{ + for (unsigned int i = 0; i < NUM_NODES; i++) { + if (check_state(cfe, precond, i)) { + if (!check_state(cfe, cond, i)) + return false; + } + } + + return true; +} + +static int mipi_cfg_regs_show(struct seq_file *s, void *data) +{ + struct cfe_device *cfe = s->private; + int ret; + + ret = pm_runtime_resume_and_get(&cfe->pdev->dev); + if (ret) + return ret; + +#define DUMP(reg) seq_printf(s, #reg " \t0x%08x\n", cfg_reg_read(cfe, reg)) + DUMP(MIPICFG_CFG); + DUMP(MIPICFG_INTR); + DUMP(MIPICFG_INTE); + DUMP(MIPICFG_INTF); + DUMP(MIPICFG_INTS); +#undef DUMP + + pm_runtime_put(&cfe->pdev->dev); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(mipi_cfg_regs); + +/* Format setup functions */ +const struct cfe_fmt *find_format_by_code(u32 code) +{ + for (unsigned int i = 0; i < ARRAY_SIZE(formats); i++) { + if (formats[i].code == code) + return &formats[i]; + } + + return NULL; +} + +const struct cfe_fmt *find_format_by_pix(u32 pixelformat) +{ + for (unsigned int i = 0; i < ARRAY_SIZE(formats); i++) { + if (formats[i].fourcc == pixelformat) + return &formats[i]; + } + + return NULL; +} + +static const struct cfe_fmt *find_format_by_code_and_fourcc(u32 code, + u32 fourcc) +{ + for (unsigned int i = 0; i < ARRAY_SIZE(formats); i++) { + if (formats[i].code == code && formats[i].fourcc == fourcc) + return &formats[i]; + } + + return NULL; +} + +/* + * Given the mbus code, find the 16 bit remapped code. Returns 0 if no remap + * possible. + */ +u32 cfe_find_16bit_code(u32 code) +{ + const struct cfe_fmt *cfe_fmt; + + cfe_fmt = find_format_by_code(code); + + if (!cfe_fmt || !cfe_fmt->remap[CFE_REMAP_16BIT]) + return 0; + + cfe_fmt = find_format_by_pix(cfe_fmt->remap[CFE_REMAP_16BIT]); + if (!cfe_fmt) + return 0; + + return cfe_fmt->code; +} + +/* + * Given the mbus code, find the 8 bit compressed code. Returns 0 if no remap + * possible. + */ +u32 cfe_find_compressed_code(u32 code) +{ + const struct cfe_fmt *cfe_fmt; + + cfe_fmt = find_format_by_code(code); + + if (!cfe_fmt || !cfe_fmt->remap[CFE_REMAP_COMPRESSED]) + return 0; + + cfe_fmt = find_format_by_pix(cfe_fmt->remap[CFE_REMAP_COMPRESSED]); + if (!cfe_fmt) + return 0; + + return cfe_fmt->code; +} + +static void cfe_calc_vid_format_size_bpl(struct cfe_device *cfe, + const struct cfe_fmt *fmt, + struct v4l2_format *f) +{ + unsigned int min_bytesperline; + + v4l_bound_align_image(&f->fmt.pix.width, MIN_WIDTH, MAX_WIDTH, 2, + &f->fmt.pix.height, MIN_HEIGHT, MAX_HEIGHT, 0, 0); + + min_bytesperline = + ALIGN((f->fmt.pix.width * fmt->depth) >> 3, BPL_ALIGNMENT); + + if (f->fmt.pix.bytesperline > min_bytesperline && + f->fmt.pix.bytesperline <= MAX_BYTESPERLINE) + f->fmt.pix.bytesperline = + ALIGN(f->fmt.pix.bytesperline, BPL_ALIGNMENT); + else + f->fmt.pix.bytesperline = min_bytesperline; + + f->fmt.pix.sizeimage = f->fmt.pix.height * f->fmt.pix.bytesperline; + + cfe_dbg(cfe, "%s: %p4cc size: %ux%u bpl:%u img_size:%u\n", __func__, + &f->fmt.pix.pixelformat, f->fmt.pix.width, f->fmt.pix.height, + f->fmt.pix.bytesperline, f->fmt.pix.sizeimage); +} + +static void cfe_calc_meta_format_size_bpl(struct cfe_device *cfe, + const struct cfe_fmt *fmt, + struct v4l2_format *f) +{ + v4l_bound_align_image(&f->fmt.meta.width, MIN_META_WIDTH, MAX_WIDTH, 2, + &f->fmt.meta.height, MIN_META_HEIGHT, MAX_HEIGHT, + 0, 0); + + f->fmt.meta.bytesperline = (f->fmt.meta.width * fmt->depth) >> 3; + f->fmt.meta.buffersize = f->fmt.meta.height * f->fmt.pix.bytesperline; + + cfe_dbg(cfe, "%s: %p4cc size: %ux%u bpl:%u buf_size:%u\n", __func__, + &f->fmt.meta.dataformat, f->fmt.meta.width, f->fmt.meta.height, + f->fmt.meta.bytesperline, f->fmt.meta.buffersize); +} + +static void cfe_schedule_next_csi2_job(struct cfe_device *cfe) +{ + struct cfe_buffer *buf; + dma_addr_t addr; + + for (unsigned int i = 0; i < CSI2_NUM_CHANNELS; i++) { + struct cfe_node *node = &cfe->node[i]; + unsigned int stride, size; + + if (!check_state(cfe, NODE_STREAMING, i)) + continue; + + buf = list_first_entry(&node->dma_queue, struct cfe_buffer, + list); + node->next_frm = buf; + list_del(&buf->list); + + trace_cfe_csi2_schedule(node->id, &buf->vb.vb2_buf); + + if (is_meta_node(node)) { + size = node->meta_fmt.fmt.meta.buffersize; + /* We use CSI2_CH_CTRL_PACK_BYTES, so stride == 0 */ + stride = 0; + } else { + size = node->vid_fmt.fmt.pix.sizeimage; + stride = node->vid_fmt.fmt.pix.bytesperline; + } + + addr = vb2_dma_contig_plane_dma_addr(&buf->vb.vb2_buf, 0); + csi2_set_buffer(&cfe->csi2, node->id, addr, stride, size); + } +} + +static void cfe_schedule_next_pisp_job(struct cfe_device *cfe) +{ + struct vb2_buffer *vb2_bufs[FE_NUM_PADS] = { 0 }; + struct cfe_config_buffer *config_buf; + struct cfe_buffer *buf; + + for (unsigned int i = CSI2_NUM_CHANNELS; i < NUM_NODES; i++) { + struct cfe_node *node = &cfe->node[i]; + + if (!check_state(cfe, NODE_STREAMING, i)) + continue; + + buf = list_first_entry(&node->dma_queue, struct cfe_buffer, + list); + + trace_cfe_fe_schedule(node->id, &buf->vb.vb2_buf); + + node->next_frm = buf; + vb2_bufs[node_desc[i].link_pad] = &buf->vb.vb2_buf; + list_del(&buf->list); + } + + config_buf = to_cfe_config_buffer(cfe->node[FE_CONFIG].next_frm); + pisp_fe_submit_job(&cfe->fe, vb2_bufs, &config_buf->config); +} + +static bool cfe_check_job_ready(struct cfe_device *cfe) +{ + for (unsigned int i = 0; i < NUM_NODES; i++) { + struct cfe_node *node = &cfe->node[i]; + + if (!check_state(cfe, NODE_ENABLED, i)) + continue; + + if (list_empty(&node->dma_queue)) + return false; + } + + return true; +} + +static void cfe_prepare_next_job(struct cfe_device *cfe) +{ + trace_cfe_prepare_next_job(is_fe_enabled(cfe)); + + cfe->job_queued = true; + cfe_schedule_next_csi2_job(cfe); + if (is_fe_enabled(cfe)) + cfe_schedule_next_pisp_job(cfe); + + /* Flag if another job is ready after this. */ + cfe->job_ready = cfe_check_job_ready(cfe); +} + +static void cfe_process_buffer_complete(struct cfe_node *node, + enum vb2_buffer_state state) +{ + trace_cfe_buffer_complete(node->id, &node->cur_frm->vb); + + node->cur_frm->vb.sequence = node->fs_count - 1; + vb2_buffer_done(&node->cur_frm->vb.vb2_buf, state); +} + +static void cfe_queue_event_sof(struct cfe_node *node) +{ + struct v4l2_event event = { + .type = V4L2_EVENT_FRAME_SYNC, + .u.frame_sync.frame_sequence = node->fs_count - 1, + }; + + v4l2_event_queue(&node->video_dev, &event); +} + +static void cfe_sof_isr(struct cfe_node *node) +{ + struct cfe_device *cfe = node->cfe; + bool matching_fs = true; + + trace_cfe_frame_start(node->id, node->fs_count); + + /* + * If the sensor is producing unexpected frame event ordering over a + * sustained period of time, guard against the possibility of coming + * here and orphaning the cur_frm if it's not been dequeued already. + * Unfortunately, there is not enough hardware state to tell if this + * may have occurred. + */ + if (WARN(node->cur_frm, "%s: [%s] Orphanded frame at seq %u\n", + __func__, node_desc[node->id].name, node->fs_count)) + cfe_process_buffer_complete(node, VB2_BUF_STATE_ERROR); + + node->cur_frm = node->next_frm; + node->next_frm = NULL; + node->fs_count++; + + node->ts = ktime_get_ns(); + for (unsigned int i = 0; i < NUM_NODES; i++) { + if (!check_state(cfe, NODE_STREAMING, i) || i == node->id) + continue; + /* + * This checks if any other node has seen a FS. If yes, use the + * same timestamp, eventually across all node buffers. + */ + if (cfe->node[i].fs_count >= node->fs_count) + node->ts = cfe->node[i].ts; + /* + * This checks if all other node have seen a matching FS. If + * yes, we can flag another job to be queued. + */ + if (matching_fs && cfe->node[i].fs_count != node->fs_count) + matching_fs = false; + } + + if (matching_fs) + cfe->job_queued = false; + + if (node->cur_frm) + node->cur_frm->vb.vb2_buf.timestamp = node->ts; + + set_state(cfe, FS_INT, node->id); + clear_state(cfe, FE_INT, node->id); + + if (is_image_output_node(node)) + cfe_queue_event_sof(node); +} + +static void cfe_eof_isr(struct cfe_node *node) +{ + struct cfe_device *cfe = node->cfe; + + trace_cfe_frame_end(node->id, node->fs_count - 1); + + if (node->cur_frm) + cfe_process_buffer_complete(node, VB2_BUF_STATE_DONE); + + node->cur_frm = NULL; + set_state(cfe, FE_INT, node->id); + clear_state(cfe, FS_INT, node->id); +} + +static irqreturn_t cfe_isr(int irq, void *dev) +{ + struct cfe_device *cfe = dev; + bool sof[NUM_NODES] = { 0 }, eof[NUM_NODES] = { 0 }; + u32 sts; + + sts = cfg_reg_read(cfe, MIPICFG_INTS); + + if (sts & MIPICFG_INT_CSI_DMA) + csi2_isr(&cfe->csi2, sof, eof); + + if (sts & MIPICFG_INT_PISP_FE) + pisp_fe_isr(&cfe->fe, sof + CSI2_NUM_CHANNELS, + eof + CSI2_NUM_CHANNELS); + + spin_lock(&cfe->state_lock); + + for (unsigned int i = 0; i < NUM_NODES; i++) { + struct cfe_node *node = &cfe->node[i]; + + /* + * The check_state(NODE_STREAMING) is to ensure we do not loop + * over the CSI2_CHx nodes when the FE is active since they + * generate interrupts even though the node is not streaming. + */ + if (!check_state(cfe, NODE_STREAMING, i) || !(sof[i] || eof[i])) + continue; + + /* + * There are 3 cases where we could get FS + FE_ACK at + * the same time: + * 1) FE of the current frame, and FS of the next frame. + * 2) FS + FE of the same frame. + * 3) FE of the current frame, and FS + FE of the next + * frame. To handle this, see the sof handler below. + * + * (1) is handled implicitly by the ordering of the FE and FS + * handlers below. + */ + if (eof[i]) { + /* + * The condition below tests for (2). Run the FS handler + * first before the FE handler, both for the current + * frame. + */ + if (sof[i] && !check_state(cfe, FS_INT, i)) { + cfe_sof_isr(node); + sof[i] = false; + } + + cfe_eof_isr(node); + } + + if (sof[i]) { + /* + * The condition below tests for (3). In such cases, we + * come in here with FS flag set in the node state from + * the previous frame since it only gets cleared in + * cfe_eof_isr(). Handle the FE for the previous + * frame first before the FS handler for the current + * frame. + */ + if (check_state(cfe, FS_INT, node->id) && + !check_state(cfe, FE_INT, node->id)) { + cfe_dbg(cfe, "%s: [%s] Handling missing previous FE interrupt\n", + __func__, node_desc[node->id].name); + cfe_eof_isr(node); + } + + cfe_sof_isr(node); + } + + if (!cfe->job_queued && cfe->job_ready) + cfe_prepare_next_job(cfe); + } + + spin_unlock(&cfe->state_lock); + + return IRQ_HANDLED; +} + +/* + * Stream helpers + */ + +static int cfe_get_vc_dt_fallback(struct cfe_device *cfe, u8 *vc, u8 *dt) +{ + struct v4l2_subdev_state *state; + struct v4l2_mbus_framefmt *fmt; + const struct cfe_fmt *cfe_fmt; + + state = v4l2_subdev_get_locked_active_state(&cfe->csi2.sd); + + fmt = v4l2_subdev_state_get_format(state, CSI2_PAD_SINK, 0); + if (!fmt) + return -EINVAL; + + cfe_fmt = find_format_by_code(fmt->code); + if (!cfe_fmt) + return -EINVAL; + + *vc = 0; + *dt = cfe_fmt->csi_dt; + + return 0; +} + +static int cfe_get_vc_dt(struct cfe_device *cfe, unsigned int channel, u8 *vc, + u8 *dt) +{ + struct v4l2_mbus_frame_desc remote_desc; + struct v4l2_subdev_state *state; + u32 sink_stream; + unsigned int i; + int ret; + + state = v4l2_subdev_get_locked_active_state(&cfe->csi2.sd); + + ret = v4l2_subdev_routing_find_opposite_end(&state->routing, + CSI2_PAD_FIRST_SOURCE + channel, 0, NULL, &sink_stream); + if (ret) + return ret; + + ret = v4l2_subdev_call(cfe->source_sd, pad, get_frame_desc, + cfe->source_pad, &remote_desc); + if (ret == -ENOIOCTLCMD) { + cfe_dbg(cfe, "source does not support get_frame_desc, use fallback\n"); + return cfe_get_vc_dt_fallback(cfe, vc, dt); + } else if (ret) { + cfe_err(cfe, "Failed to get frame descriptor\n"); + return ret; + } + + if (remote_desc.type != V4L2_MBUS_FRAME_DESC_TYPE_CSI2) { + cfe_err(cfe, "Frame descriptor does not describe CSI-2 link"); + return -EINVAL; + } + + for (i = 0; i < remote_desc.num_entries; i++) { + if (remote_desc.entry[i].stream == sink_stream) + break; + } + + if (i == remote_desc.num_entries) { + cfe_err(cfe, "Stream %u not found in remote frame desc\n", + sink_stream); + return -EINVAL; + } + + *vc = remote_desc.entry[i].bus.csi2.vc; + *dt = remote_desc.entry[i].bus.csi2.dt; + + return 0; +} + +static int cfe_start_channel(struct cfe_node *node) +{ + struct cfe_device *cfe = node->cfe; + struct v4l2_subdev_state *state; + struct v4l2_mbus_framefmt *source_fmt; + const struct cfe_fmt *fmt; + unsigned long flags; + bool start_fe; + int ret; + + cfe_dbg(cfe, "%s: [%s]\n", __func__, node_desc[node->id].name); + + start_fe = is_fe_enabled(cfe) && + test_all_nodes(cfe, NODE_ENABLED, NODE_STREAMING); + + state = v4l2_subdev_get_locked_active_state(&cfe->csi2.sd); + + if (start_fe) { + unsigned int width, height; + u8 vc, dt; + + cfe_dbg(cfe, "%s: %s using csi2 channel %d\n", __func__, + node_desc[FE_OUT0].name, cfe->fe_csi2_channel); + + ret = cfe_get_vc_dt(cfe, cfe->fe_csi2_channel, &vc, &dt); + if (ret) + return ret; + + source_fmt = v4l2_subdev_state_get_format(state, + node_desc[cfe->fe_csi2_channel].link_pad); + fmt = find_format_by_code(source_fmt->code); + + width = source_fmt->width; + height = source_fmt->height; + + /* Must have a valid CSI2 datatype. */ + WARN_ON(!fmt->csi_dt); + + /* + * Start the associated CSI2 Channel as well. + * + * Must write to the ADDR register to latch the ctrl values + * even if we are connected to the front end. Once running, + * this is handled by the CSI2 AUTO_ARM mode. + */ + csi2_start_channel(&cfe->csi2, cfe->fe_csi2_channel, + CSI2_MODE_FE_STREAMING, + true, false, width, height, vc, dt); + csi2_set_buffer(&cfe->csi2, cfe->fe_csi2_channel, 0, 0, -1); + pisp_fe_start(&cfe->fe); + } + + if (is_csi2_node(node)) { + unsigned int width = 0, height = 0; + u8 vc, dt; + + ret = cfe_get_vc_dt(cfe, node->id, &vc, &dt); + if (ret) { + if (start_fe) { + csi2_stop_channel(&cfe->csi2, + cfe->fe_csi2_channel); + pisp_fe_stop(&cfe->fe); + } + + return ret; + } + + u32 mode = CSI2_MODE_NORMAL; + + source_fmt = v4l2_subdev_state_get_format(state, + node_desc[node->id].link_pad); + fmt = find_format_by_code(source_fmt->code); + + /* Must have a valid CSI2 datatype. */ + WARN_ON(!fmt->csi_dt); + + if (is_image_output_node(node)) { + u32 pixfmt; + + width = source_fmt->width; + height = source_fmt->height; + + pixfmt = node->vid_fmt.fmt.pix.pixelformat; + + if (pixfmt == fmt->remap[CFE_REMAP_16BIT]) { + mode = CSI2_MODE_REMAP; + } else if (pixfmt == fmt->remap[CFE_REMAP_COMPRESSED]) { + mode = CSI2_MODE_COMPRESSED; + csi2_set_compression(&cfe->csi2, node->id, + CSI2_COMPRESSION_DELTA, 0, + 0); + } + } + /* Unconditionally start this CSI2 channel. */ + csi2_start_channel(&cfe->csi2, node->id, + mode, + /* Auto arm */ + false, + /* Pack bytes */ + is_meta_node(node) ? true : false, + width, height, vc, dt); + } + + spin_lock_irqsave(&cfe->state_lock, flags); + if (cfe->job_ready && test_all_nodes(cfe, NODE_ENABLED, NODE_STREAMING)) + cfe_prepare_next_job(cfe); + spin_unlock_irqrestore(&cfe->state_lock, flags); + + return 0; +} + +static void cfe_stop_channel(struct cfe_node *node, bool fe_stop) +{ + struct cfe_device *cfe = node->cfe; + + cfe_dbg(cfe, "%s: [%s] fe_stop %u\n", __func__, + node_desc[node->id].name, fe_stop); + + if (fe_stop) { + csi2_stop_channel(&cfe->csi2, cfe->fe_csi2_channel); + pisp_fe_stop(&cfe->fe); + } + + if (is_csi2_node(node)) + csi2_stop_channel(&cfe->csi2, node->id); +} + +static void cfe_return_buffers(struct cfe_node *node, + enum vb2_buffer_state state) +{ + struct cfe_device *cfe = node->cfe; + struct cfe_buffer *buf, *tmp; + unsigned long flags; + + cfe_dbg(cfe, "%s: [%s]\n", __func__, node_desc[node->id].name); + + spin_lock_irqsave(&cfe->state_lock, flags); + list_for_each_entry_safe(buf, tmp, &node->dma_queue, list) { + list_del(&buf->list); + trace_cfe_return_buffer(node->id, buf->vb.vb2_buf.index, 2); + vb2_buffer_done(&buf->vb.vb2_buf, state); + } + + if (node->cur_frm) { + trace_cfe_return_buffer(node->id, + node->cur_frm->vb.vb2_buf.index, 0); + vb2_buffer_done(&node->cur_frm->vb.vb2_buf, state); + } + if (node->next_frm && node->cur_frm != node->next_frm) { + trace_cfe_return_buffer(node->id, + node->next_frm->vb.vb2_buf.index, 1); + vb2_buffer_done(&node->next_frm->vb.vb2_buf, state); + } + + node->cur_frm = NULL; + node->next_frm = NULL; + spin_unlock_irqrestore(&cfe->state_lock, flags); +} + +/* + * vb2 ops + */ + +static int cfe_queue_setup(struct vb2_queue *vq, unsigned int *nbuffers, + unsigned int *nplanes, unsigned int sizes[], + struct device *alloc_devs[]) +{ + struct cfe_node *node = vb2_get_drv_priv(vq); + struct cfe_device *cfe = node->cfe; + unsigned int size = is_image_node(node) ? + node->vid_fmt.fmt.pix.sizeimage : + node->meta_fmt.fmt.meta.buffersize; + + cfe_dbg(cfe, "%s: [%s] type:%u\n", __func__, node_desc[node->id].name, + node->buffer_queue.type); + + if (vq->max_num_buffers + *nbuffers < 3) + *nbuffers = 3 - vq->max_num_buffers; + + if (*nplanes) { + if (sizes[0] < size) { + cfe_err(cfe, "sizes[0] %i < size %u\n", sizes[0], size); + return -EINVAL; + } + size = sizes[0]; + } + + *nplanes = 1; + sizes[0] = size; + + return 0; +} + +static int cfe_buffer_prepare(struct vb2_buffer *vb) +{ + struct cfe_node *node = vb2_get_drv_priv(vb->vb2_queue); + struct cfe_device *cfe = node->cfe; + struct cfe_buffer *buf = to_cfe_buffer(vb); + unsigned long size; + + trace_cfe_buffer_prepare(node->id, vb); + + size = is_image_node(node) ? node->vid_fmt.fmt.pix.sizeimage : + node->meta_fmt.fmt.meta.buffersize; + if (vb2_plane_size(vb, 0) < size) { + cfe_err(cfe, "data will not fit into plane (%lu < %lu)\n", + vb2_plane_size(vb, 0), size); + return -EINVAL; + } + + vb2_set_plane_payload(&buf->vb.vb2_buf, 0, size); + + if (node->id == FE_CONFIG) { + struct cfe_config_buffer *b = to_cfe_config_buffer(buf); + void *addr = vb2_plane_vaddr(vb, 0); + + memcpy(&b->config, addr, sizeof(struct pisp_fe_config)); + return pisp_fe_validate_config(&cfe->fe, &b->config, + &cfe->node[FE_OUT0].vid_fmt, + &cfe->node[FE_OUT1].vid_fmt); + } + + return 0; +} + +static void cfe_buffer_queue(struct vb2_buffer *vb) +{ + struct cfe_node *node = vb2_get_drv_priv(vb->vb2_queue); + struct cfe_device *cfe = node->cfe; + struct cfe_buffer *buf = to_cfe_buffer(vb); + unsigned long flags; + bool schedule_now; + + spin_lock_irqsave(&cfe->state_lock, flags); + + list_add_tail(&buf->list, &node->dma_queue); + + if (!cfe->job_ready) + cfe->job_ready = cfe_check_job_ready(cfe); + + schedule_now = !cfe->job_queued && cfe->job_ready && + test_all_nodes(cfe, NODE_ENABLED, NODE_STREAMING); + + trace_cfe_buffer_queue(node->id, vb, schedule_now); + + if (schedule_now) + cfe_prepare_next_job(cfe); + + spin_unlock_irqrestore(&cfe->state_lock, flags); +} + +static s64 cfe_get_source_link_freq(struct cfe_device *cfe) +{ + struct v4l2_subdev_state *state; + s64 link_freq; + u32 bpp; + + state = v4l2_subdev_get_locked_active_state(&cfe->csi2.sd); + + /* + * v4l2_get_link_freq() uses V4L2_CID_LINK_FREQ first, and falls back + * to V4L2_CID_PIXEL_RATE if V4L2_CID_LINK_FREQ is not available. + * + * With multistream input there is no single pixel rate, and thus we + * cannot use V4L2_CID_PIXEL_RATE, so we pass 0 as the bpp which + * causes v4l2_get_link_freq() to return an error if it falls back to + * V4L2_CID_PIXEL_RATE. + */ + + if (state->routing.num_routes == 1) { + struct v4l2_subdev_route *route = &state->routing.routes[0]; + struct v4l2_mbus_framefmt *source_fmt; + const struct cfe_fmt *fmt; + + source_fmt = v4l2_subdev_state_get_format(state, + route->sink_pad, + route->sink_stream); + + fmt = find_format_by_code(source_fmt->code); + if (!fmt) + return -EINVAL; + + bpp = fmt->depth; + } else { + bpp = 0; + } + + link_freq = v4l2_get_link_freq(cfe->source_sd->ctrl_handler, bpp, + 2 * cfe->csi2.dphy.active_lanes); + if (link_freq < 0) + cfe_err(cfe, "failed to get link freq for subdev '%s'\n", + cfe->source_sd->name); + + return link_freq; +} + +static int cfe_start_streaming(struct vb2_queue *vq, unsigned int count) +{ + struct v4l2_mbus_config mbus_config = { 0 }; + struct cfe_node *node = vb2_get_drv_priv(vq); + struct cfe_device *cfe = node->cfe; + struct v4l2_subdev_state *state; + struct v4l2_subdev_route *route; + s64 link_freq; + int ret; + + cfe_dbg(cfe, "%s: [%s]\n", __func__, node_desc[node->id].name); + + if (!check_state(cfe, NODE_ENABLED, node->id)) { + cfe_err(cfe, "%s node link is not enabled.\n", + node_desc[node->id].name); + ret = -EINVAL; + goto err_streaming; + } + + ret = pm_runtime_resume_and_get(&cfe->pdev->dev); + if (ret < 0) { + cfe_err(cfe, "pm_runtime_resume_and_get failed\n"); + goto err_streaming; + } + + /* When using the Frontend, we must enable the FE_CONFIG node. */ + if (is_fe_enabled(cfe) && + !check_state(cfe, NODE_ENABLED, cfe->node[FE_CONFIG].id)) { + cfe_err(cfe, "FE enabled, but FE_CONFIG node is not\n"); + ret = -EINVAL; + goto err_pm_put; + } + + ret = media_pipeline_start(&node->pad, &cfe->pipe); + if (ret < 0) { + cfe_err(cfe, "Failed to start media pipeline: %d\n", ret); + goto err_pm_put; + } + + state = v4l2_subdev_lock_and_get_active_state(&cfe->csi2.sd); + + clear_state(cfe, FS_INT | FE_INT, node->id); + set_state(cfe, NODE_STREAMING, node->id); + node->fs_count = 0; + + ret = cfe_start_channel(node); + if (ret) + goto err_unlock_state; + + if (!test_all_nodes(cfe, NODE_ENABLED, NODE_STREAMING)) { + cfe_dbg(cfe, "Streaming on hold, as all nodes are not set to streaming yet\n"); + v4l2_subdev_unlock_state(state); + return 0; + } + + cfg_reg_write(cfe, MIPICFG_CFG, MIPICFG_CFG_SEL_CSI); + cfg_reg_write(cfe, MIPICFG_INTE, + MIPICFG_INT_CSI_DMA | MIPICFG_INT_PISP_FE); + + ret = v4l2_subdev_call(cfe->source_sd, pad, get_mbus_config, 0, + &mbus_config); + if (ret < 0 && ret != -ENOIOCTLCMD) { + cfe_err(cfe, "g_mbus_config failed\n"); + goto err_clear_inte; + } + + cfe->csi2.dphy.active_lanes = mbus_config.bus.mipi_csi2.num_data_lanes; + if (!cfe->csi2.dphy.active_lanes) + cfe->csi2.dphy.active_lanes = cfe->csi2.dphy.max_lanes; + if (cfe->csi2.dphy.active_lanes > cfe->csi2.dphy.max_lanes) { + cfe_err(cfe, "Device has requested %u data lanes, which is >%u configured in DT\n", + cfe->csi2.dphy.active_lanes, cfe->csi2.dphy.max_lanes); + ret = -EINVAL; + goto err_clear_inte; + } + + link_freq = cfe_get_source_link_freq(cfe); + if (link_freq < 0) + goto err_clear_inte; + + cfe->csi2.dphy.dphy_rate = div_s64(link_freq * 2, 1000000); + csi2_open_rx(&cfe->csi2); + + cfe->streams_mask = 0; + + for_each_active_route(&state->routing, route) + cfe->streams_mask |= BIT_ULL(route->sink_stream); + + ret = v4l2_subdev_enable_streams(cfe->source_sd, cfe->source_pad, + cfe->streams_mask); + if (ret) { + cfe_err(cfe, "stream on failed in subdev\n"); + goto err_disable_cfe; + } + + cfe_dbg(cfe, "Streaming enabled\n"); + + v4l2_subdev_unlock_state(state); + + return 0; + +err_disable_cfe: + csi2_close_rx(&cfe->csi2); +err_clear_inte: + cfg_reg_write(cfe, MIPICFG_INTE, 0); + + cfe_stop_channel(node, + is_fe_enabled(cfe) && test_all_nodes(cfe, NODE_ENABLED, + NODE_STREAMING)); +err_unlock_state: + v4l2_subdev_unlock_state(state); + media_pipeline_stop(&node->pad); +err_pm_put: + pm_runtime_put(&cfe->pdev->dev); +err_streaming: + cfe_return_buffers(node, VB2_BUF_STATE_QUEUED); + clear_state(cfe, NODE_STREAMING, node->id); + + return ret; +} + +static void cfe_stop_streaming(struct vb2_queue *vq) +{ + struct cfe_node *node = vb2_get_drv_priv(vq); + struct cfe_device *cfe = node->cfe; + unsigned long flags; + bool fe_stop; + + cfe_dbg(cfe, "%s: [%s]\n", __func__, node_desc[node->id].name); + + spin_lock_irqsave(&cfe->state_lock, flags); + fe_stop = is_fe_enabled(cfe) && + test_all_nodes(cfe, NODE_ENABLED, NODE_STREAMING); + + cfe->job_ready = false; + clear_state(cfe, NODE_STREAMING, node->id); + spin_unlock_irqrestore(&cfe->state_lock, flags); + + cfe_stop_channel(node, fe_stop); + + if (!test_any_node(cfe, NODE_STREAMING)) { + struct v4l2_subdev_state *state; + int ret; + + state = v4l2_subdev_lock_and_get_active_state(&cfe->csi2.sd); + + ret = v4l2_subdev_disable_streams(cfe->source_sd, + cfe->source_pad, + cfe->streams_mask); + if (ret) + cfe_err(cfe, "stream disable failed in subdev\n"); + + v4l2_subdev_unlock_state(state); + + csi2_close_rx(&cfe->csi2); + + cfg_reg_write(cfe, MIPICFG_INTE, 0); + + cfe_dbg(cfe, "%s: Streaming disabled\n", __func__); + } + + media_pipeline_stop(&node->pad); + + /* Clear all queued buffers for the node */ + cfe_return_buffers(node, VB2_BUF_STATE_ERROR); + + pm_runtime_put(&cfe->pdev->dev); +} + +static const struct vb2_ops cfe_video_qops = { + .wait_prepare = vb2_ops_wait_prepare, + .wait_finish = vb2_ops_wait_finish, + .queue_setup = cfe_queue_setup, + .buf_prepare = cfe_buffer_prepare, + .buf_queue = cfe_buffer_queue, + .start_streaming = cfe_start_streaming, + .stop_streaming = cfe_stop_streaming, +}; + +/* + * v4l2 ioctl ops + */ + +static int cfe_querycap(struct file *file, void *priv, + struct v4l2_capability *cap) +{ + strscpy(cap->driver, CFE_MODULE_NAME, sizeof(cap->driver)); + strscpy(cap->card, CFE_MODULE_NAME, sizeof(cap->card)); + + cap->capabilities |= V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_META_CAPTURE | + V4L2_CAP_META_OUTPUT; + + return 0; +} + +static int cfe_enum_fmt_vid_cap(struct file *file, void *priv, + struct v4l2_fmtdesc *f) +{ + struct cfe_node *node = video_drvdata(file); + struct cfe_device *cfe = node->cfe; + unsigned int i, j; + + if (!node_supports_image_output(node)) + return -EINVAL; + + cfe_dbg(cfe, "%s: [%s]\n", __func__, node_desc[node->id].name); + + for (i = 0, j = 0; i < ARRAY_SIZE(formats); i++) { + if (f->mbus_code && formats[i].code != f->mbus_code) + continue; + + if (formats[i].flags & CFE_FORMAT_FLAG_META_OUT || + formats[i].flags & CFE_FORMAT_FLAG_META_CAP) + continue; + + if (is_fe_node(node) && + !(formats[i].flags & CFE_FORMAT_FLAG_FE_OUT)) + continue; + + if (j == f->index) { + f->pixelformat = formats[i].fourcc; + f->type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + return 0; + } + j++; + } + + return -EINVAL; +} + +static int cfe_g_fmt(struct file *file, void *priv, struct v4l2_format *f) +{ + struct cfe_node *node = video_drvdata(file); + + if (!node_supports_image(node)) + return -EINVAL; + + *f = node->vid_fmt; + + return 0; +} + +static int cfe_validate_fmt_vid_cap(struct cfe_node *node, + struct v4l2_format *f) +{ + struct cfe_device *cfe = node->cfe; + const struct cfe_fmt *fmt; + + cfe_dbg(cfe, "%s: [%s] %ux%u, V4L2 pix %p4cc\n", __func__, + node_desc[node->id].name, f->fmt.pix.width, f->fmt.pix.height, + &f->fmt.pix.pixelformat); + + if (!node_supports_image_output(node)) + return -EINVAL; + + /* + * Default to a format that works for both CSI2 and FE. + */ + fmt = find_format_by_pix(f->fmt.pix.pixelformat); + if (!fmt) + fmt = find_format_by_code(MEDIA_BUS_FMT_SBGGR10_1X10); + + f->fmt.pix.pixelformat = fmt->fourcc; + + if (is_fe_node(node) && fmt->remap[CFE_REMAP_16BIT]) { + f->fmt.pix.pixelformat = fmt->remap[CFE_REMAP_16BIT]; + fmt = find_format_by_pix(f->fmt.pix.pixelformat); + } + + f->fmt.pix.field = V4L2_FIELD_NONE; + + cfe_calc_vid_format_size_bpl(cfe, fmt, f); + + return 0; +} + +static int cfe_s_fmt_vid_cap(struct file *file, void *priv, + struct v4l2_format *f) +{ + struct cfe_node *node = video_drvdata(file); + struct cfe_device *cfe = node->cfe; + struct vb2_queue *q = &node->buffer_queue; + int ret; + + if (vb2_is_busy(q)) + return -EBUSY; + + ret = cfe_validate_fmt_vid_cap(node, f); + if (ret) + return ret; + + node->vid_fmt = *f; + + cfe_dbg(cfe, "%s: Set %ux%u, V4L2 pix %p4cc\n", __func__, + node->vid_fmt.fmt.pix.width, node->vid_fmt.fmt.pix.height, + &node->vid_fmt.fmt.pix.pixelformat); + + return 0; +} + +static int cfe_try_fmt_vid_cap(struct file *file, void *priv, + struct v4l2_format *f) +{ + struct cfe_node *node = video_drvdata(file); + struct cfe_device *cfe = node->cfe; + + cfe_dbg(cfe, "%s: [%s]\n", __func__, node_desc[node->id].name); + + return cfe_validate_fmt_vid_cap(node, f); +} + +static int cfe_enum_fmt_meta(struct file *file, void *priv, + struct v4l2_fmtdesc *f) +{ + struct cfe_node *node = video_drvdata(file); + struct cfe_device *cfe = node->cfe; + + cfe_dbg(cfe, "%s: [%s]\n", __func__, node_desc[node->id].name); + + if (!node_supports_meta(node)) + return -EINVAL; + + switch (node->id) { + case CSI2_CH0...CSI2_CH3: + f->flags = V4L2_FMT_FLAG_META_LINE_BASED; + + switch (f->index) { + case 0: + f->pixelformat = V4L2_META_FMT_GENERIC_8; + return 0; + case 1: + f->pixelformat = V4L2_META_FMT_GENERIC_CSI2_10; + return 0; + case 2: + f->pixelformat = V4L2_META_FMT_GENERIC_CSI2_12; + return 0; + default: + return -EINVAL; + } + default: + break; + } + + if (f->index != 0) + return -EINVAL; + + switch (node->id) { + case FE_STATS: + f->pixelformat = V4L2_META_FMT_RPI_FE_STATS; + return 0; + case FE_CONFIG: + f->pixelformat = V4L2_META_FMT_RPI_FE_CFG; + return 0; + default: + return -EINVAL; + } +} + +static int cfe_validate_fmt_meta(struct cfe_node *node, struct v4l2_format *f) +{ + struct cfe_device *cfe = node->cfe; + const struct cfe_fmt *fmt; + + switch (node->id) { + case CSI2_CH0...CSI2_CH3: + cfe_dbg(cfe, "%s: [%s] %ux%u, V4L2 meta %p4cc\n", __func__, + node_desc[node->id].name, f->fmt.meta.width, + f->fmt.meta.height, &f->fmt.meta.dataformat); + break; + case FE_STATS: + case FE_CONFIG: + cfe_dbg(cfe, "%s: [%s] %u bytes, V4L2 meta %p4cc\n", __func__, + node_desc[node->id].name, f->fmt.meta.buffersize, + &f->fmt.meta.dataformat); + break; + default: + return -EINVAL; + } + + if (!node_supports_meta(node)) + return -EINVAL; + + switch (node->id) { + case CSI2_CH0...CSI2_CH3: + fmt = find_format_by_pix(f->fmt.meta.dataformat); + if (!fmt || !(fmt->flags & CFE_FORMAT_FLAG_META_CAP)) + fmt = find_format_by_pix(V4L2_META_FMT_GENERIC_CSI2_10); + + f->fmt.meta.dataformat = fmt->fourcc; + + cfe_calc_meta_format_size_bpl(cfe, fmt, f); + + return 0; + case FE_STATS: + f->fmt.meta.dataformat = V4L2_META_FMT_RPI_FE_STATS; + f->fmt.meta.buffersize = sizeof(struct pisp_statistics); + return 0; + case FE_CONFIG: + f->fmt.meta.dataformat = V4L2_META_FMT_RPI_FE_CFG; + f->fmt.meta.buffersize = sizeof(struct pisp_fe_config); + return 0; + default: + return -EINVAL; + } +} + +static int cfe_g_fmt_meta(struct file *file, void *priv, struct v4l2_format *f) +{ + struct cfe_node *node = video_drvdata(file); + struct cfe_device *cfe = node->cfe; + + cfe_dbg(cfe, "%s: [%s]\n", __func__, node_desc[node->id].name); + + if (!node_supports_meta(node)) + return -EINVAL; + + *f = node->meta_fmt; + + return 0; +} + +static int cfe_s_fmt_meta(struct file *file, void *priv, struct v4l2_format *f) +{ + struct cfe_node *node = video_drvdata(file); + struct cfe_device *cfe = node->cfe; + struct vb2_queue *q = &node->buffer_queue; + int ret; + + cfe_dbg(cfe, "%s: [%s]\n", __func__, node_desc[node->id].name); + + if (vb2_is_busy(q)) + return -EBUSY; + + if (!node_supports_meta(node)) + return -EINVAL; + + ret = cfe_validate_fmt_meta(node, f); + if (ret) + return ret; + + node->meta_fmt = *f; + + cfe_dbg(cfe, "%s: Set %p4cc\n", __func__, + &node->meta_fmt.fmt.meta.dataformat); + + return 0; +} + +static int cfe_try_fmt_meta(struct file *file, void *priv, + struct v4l2_format *f) +{ + struct cfe_node *node = video_drvdata(file); + struct cfe_device *cfe = node->cfe; + + cfe_dbg(cfe, "%s: [%s]\n", __func__, node_desc[node->id].name); + return cfe_validate_fmt_meta(node, f); +} + +static int cfe_enum_framesizes(struct file *file, void *priv, + struct v4l2_frmsizeenum *fsize) +{ + struct cfe_node *node = video_drvdata(file); + struct cfe_device *cfe = node->cfe; + const struct cfe_fmt *fmt; + + cfe_dbg(cfe, "%s [%s]\n", __func__, node_desc[node->id].name); + + if (fsize->index > 0) + return -EINVAL; + + /* check for valid format */ + fmt = find_format_by_pix(fsize->pixel_format); + if (!fmt) { + cfe_dbg(cfe, "Invalid pixel code: %x\n", fsize->pixel_format); + return -EINVAL; + } + + /* TODO: Do we have limits on the step_width? */ + + fsize->type = V4L2_FRMSIZE_TYPE_STEPWISE; + fsize->stepwise.min_width = MIN_WIDTH; + fsize->stepwise.max_width = MAX_WIDTH; + fsize->stepwise.step_width = 2; + fsize->stepwise.min_height = MIN_HEIGHT; + fsize->stepwise.max_height = MAX_HEIGHT; + fsize->stepwise.step_height = 1; + + return 0; +} + +static int cfe_vb2_ioctl_reqbufs(struct file *file, void *priv, + struct v4l2_requestbuffers *p) +{ + struct video_device *vdev = video_devdata(file); + struct cfe_node *node = video_get_drvdata(vdev); + struct cfe_device *cfe = node->cfe; + int ret; + + cfe_dbg(cfe, "%s: [%s] type:%u\n", __func__, node_desc[node->id].name, + p->type); + + if (p->type != V4L2_BUF_TYPE_VIDEO_CAPTURE && + p->type != V4L2_BUF_TYPE_META_CAPTURE && + p->type != V4L2_BUF_TYPE_META_OUTPUT) + return -EINVAL; + + ret = vb2_queue_change_type(vdev->queue, p->type); + if (ret) + return ret; + + return vb2_ioctl_reqbufs(file, priv, p); +} + +static int cfe_vb2_ioctl_create_bufs(struct file *file, void *priv, + struct v4l2_create_buffers *p) +{ + struct video_device *vdev = video_devdata(file); + struct cfe_node *node = video_get_drvdata(vdev); + struct cfe_device *cfe = node->cfe; + int ret; + + cfe_dbg(cfe, "%s: [%s] type:%u\n", __func__, node_desc[node->id].name, + p->format.type); + + if (p->format.type != V4L2_BUF_TYPE_VIDEO_CAPTURE && + p->format.type != V4L2_BUF_TYPE_META_CAPTURE && + p->format.type != V4L2_BUF_TYPE_META_OUTPUT) + return -EINVAL; + + ret = vb2_queue_change_type(vdev->queue, p->format.type); + if (ret) + return ret; + + return vb2_ioctl_create_bufs(file, priv, p); +} + +static int cfe_subscribe_event(struct v4l2_fh *fh, + const struct v4l2_event_subscription *sub) +{ + struct cfe_node *node = video_get_drvdata(fh->vdev); + + switch (sub->type) { + case V4L2_EVENT_FRAME_SYNC: + if (!node_supports_image_output(node)) + break; + + return v4l2_event_subscribe(fh, sub, 2, NULL); + case V4L2_EVENT_SOURCE_CHANGE: + if (!node_supports_image_output(node) && + !node_supports_meta_output(node)) + break; + + return v4l2_event_subscribe(fh, sub, 4, NULL); + } + + return v4l2_ctrl_subscribe_event(fh, sub); +} + +static const struct v4l2_ioctl_ops cfe_ioctl_ops = { + .vidioc_querycap = cfe_querycap, + .vidioc_enum_fmt_vid_cap = cfe_enum_fmt_vid_cap, + .vidioc_g_fmt_vid_cap = cfe_g_fmt, + .vidioc_s_fmt_vid_cap = cfe_s_fmt_vid_cap, + .vidioc_try_fmt_vid_cap = cfe_try_fmt_vid_cap, + + .vidioc_enum_fmt_meta_cap = cfe_enum_fmt_meta, + .vidioc_g_fmt_meta_cap = cfe_g_fmt_meta, + .vidioc_s_fmt_meta_cap = cfe_s_fmt_meta, + .vidioc_try_fmt_meta_cap = cfe_try_fmt_meta, + + .vidioc_enum_fmt_meta_out = cfe_enum_fmt_meta, + .vidioc_g_fmt_meta_out = cfe_g_fmt_meta, + .vidioc_s_fmt_meta_out = cfe_s_fmt_meta, + .vidioc_try_fmt_meta_out = cfe_try_fmt_meta, + + .vidioc_enum_framesizes = cfe_enum_framesizes, + + .vidioc_reqbufs = cfe_vb2_ioctl_reqbufs, + .vidioc_create_bufs = cfe_vb2_ioctl_create_bufs, + .vidioc_prepare_buf = vb2_ioctl_prepare_buf, + .vidioc_querybuf = vb2_ioctl_querybuf, + .vidioc_qbuf = vb2_ioctl_qbuf, + .vidioc_dqbuf = vb2_ioctl_dqbuf, + .vidioc_expbuf = vb2_ioctl_expbuf, + .vidioc_streamon = vb2_ioctl_streamon, + .vidioc_streamoff = vb2_ioctl_streamoff, + + .vidioc_subscribe_event = cfe_subscribe_event, + .vidioc_unsubscribe_event = v4l2_event_unsubscribe, +}; + +static void cfe_notify(struct v4l2_subdev *sd, unsigned int notification, + void *arg) +{ + struct cfe_device *cfe = to_cfe_device(sd->v4l2_dev); + + switch (notification) { + case V4L2_DEVICE_NOTIFY_EVENT: + for (unsigned int i = 0; i < NUM_NODES; i++) { + struct cfe_node *node = &cfe->node[i]; + + if (check_state(cfe, NODE_REGISTERED, i)) + continue; + + v4l2_event_queue(&node->video_dev, arg); + } + break; + default: + break; + } +} + +/* cfe capture driver file operations */ +static const struct v4l2_file_operations cfe_fops = { + .owner = THIS_MODULE, + .open = v4l2_fh_open, + .release = vb2_fop_release, + .poll = vb2_fop_poll, + .unlocked_ioctl = video_ioctl2, + .mmap = vb2_fop_mmap, +}; + +static int cfe_video_link_validate(struct media_link *link) +{ + struct video_device *vd = container_of(link->sink->entity, + struct video_device, entity); + struct cfe_node *node = container_of(vd, struct cfe_node, video_dev); + struct cfe_device *cfe = node->cfe; + struct v4l2_mbus_framefmt *source_fmt; + struct v4l2_subdev_state *state; + struct v4l2_subdev *source_sd; + int ret = 0; + + cfe_dbg(cfe, "%s: [%s] link \"%s\":%u -> \"%s\":%u\n", __func__, + node_desc[node->id].name, + link->source->entity->name, link->source->index, + link->sink->entity->name, link->sink->index); + + if (!media_entity_remote_source_pad_unique(link->sink->entity)) { + cfe_err(cfe, "video node %s pad not connected\n", vd->name); + return -ENOTCONN; + } + + source_sd = media_entity_to_v4l2_subdev(link->source->entity); + + state = v4l2_subdev_lock_and_get_active_state(source_sd); + + source_fmt = v4l2_subdev_state_get_format(state, link->source->index); + if (!source_fmt) { + ret = -EINVAL; + goto out; + } + + if (is_image_output_node(node)) { + struct v4l2_pix_format *pix_fmt = &node->vid_fmt.fmt.pix; + const struct cfe_fmt *fmt; + + if (source_fmt->width != pix_fmt->width || + source_fmt->height != pix_fmt->height) { + cfe_err(cfe, "Wrong width or height %ux%u (remote pad set to %ux%u)\n", + pix_fmt->width, pix_fmt->height, + source_fmt->width, source_fmt->height); + ret = -EINVAL; + goto out; + } + + fmt = find_format_by_code_and_fourcc(source_fmt->code, + pix_fmt->pixelformat); + if (!fmt) { + cfe_err(cfe, "Format mismatch!\n"); + ret = -EINVAL; + goto out; + } + } else if (is_csi2_node(node) && is_meta_output_node(node)) { + struct v4l2_meta_format *meta_fmt = &node->meta_fmt.fmt.meta; + const struct cfe_fmt *fmt; + + if (source_fmt->width != meta_fmt->width || + source_fmt->height != meta_fmt->height) { + cfe_err(cfe, "Wrong width or height %ux%u (remote pad set to %ux%u)\n", + meta_fmt->width, meta_fmt->height, + source_fmt->width, source_fmt->height); + ret = -EINVAL; + goto out; + } + + fmt = find_format_by_code_and_fourcc(source_fmt->code, + meta_fmt->dataformat); + if (!fmt) { + cfe_err(cfe, "Format mismatch!\n"); + ret = -EINVAL; + goto out; + } + } + +out: + v4l2_subdev_unlock_state(state); + + return ret; +} + +static const struct media_entity_operations cfe_media_entity_ops = { + .link_validate = cfe_video_link_validate, +}; + +static int cfe_video_link_notify(struct media_link *link, u32 flags, + unsigned int notification) +{ + struct media_device *mdev = link->graph_obj.mdev; + struct cfe_device *cfe = container_of(mdev, struct cfe_device, mdev); + struct media_entity *fe = &cfe->fe.sd.entity; + struct media_entity *csi2 = &cfe->csi2.sd.entity; + unsigned long lock_flags; + + if (notification != MEDIA_DEV_NOTIFY_POST_LINK_CH) + return 0; + + cfe_dbg(cfe, "%s: %s[%u] -> %s[%u] 0x%x", __func__, + link->source->entity->name, link->source->index, + link->sink->entity->name, link->sink->index, flags); + + spin_lock_irqsave(&cfe->state_lock, lock_flags); + + for (unsigned int i = 0; i < NUM_NODES; i++) { + if (link->sink->entity != &cfe->node[i].video_dev.entity && + link->source->entity != &cfe->node[i].video_dev.entity) + continue; + + if (link->flags & MEDIA_LNK_FL_ENABLED) + set_state(cfe, NODE_ENABLED, i); + else + clear_state(cfe, NODE_ENABLED, i); + + break; + } + + spin_unlock_irqrestore(&cfe->state_lock, lock_flags); + + if (link->source->entity != csi2) + return 0; + if (link->sink->entity != fe) + return 0; + if (link->sink->index != 0) + return 0; + + cfe->fe_csi2_channel = -1; + if (link->flags & MEDIA_LNK_FL_ENABLED) { + if (link->source->index == node_desc[CSI2_CH0].link_pad) + cfe->fe_csi2_channel = CSI2_CH0; + else if (link->source->index == node_desc[CSI2_CH1].link_pad) + cfe->fe_csi2_channel = CSI2_CH1; + else if (link->source->index == node_desc[CSI2_CH2].link_pad) + cfe->fe_csi2_channel = CSI2_CH2; + else if (link->source->index == node_desc[CSI2_CH3].link_pad) + cfe->fe_csi2_channel = CSI2_CH3; + } + + if (is_fe_enabled(cfe)) + cfe_dbg(cfe, "%s: Found CSI2:%d -> FE:0 link\n", __func__, + cfe->fe_csi2_channel); + else + cfe_dbg(cfe, "%s: Unable to find CSI2:x -> FE:0 link\n", + __func__); + + return 0; +} + +static const struct media_device_ops cfe_media_device_ops = { + .link_notify = cfe_video_link_notify, +}; + +static void cfe_release(struct kref *kref) +{ + struct cfe_device *cfe = container_of(kref, struct cfe_device, kref); + + media_device_cleanup(&cfe->mdev); + + kfree(cfe); +} + +static void cfe_put(struct cfe_device *cfe) +{ + kref_put(&cfe->kref, cfe_release); +} + +static void cfe_get(struct cfe_device *cfe) +{ + kref_get(&cfe->kref); +} + +static void cfe_node_release(struct video_device *vdev) +{ + struct cfe_node *node = video_get_drvdata(vdev); + + cfe_put(node->cfe); +} + +static int cfe_register_node(struct cfe_device *cfe, int id) +{ + struct video_device *vdev; + const struct cfe_fmt *fmt; + struct vb2_queue *q; + struct cfe_node *node = &cfe->node[id]; + int ret; + + node->cfe = cfe; + node->id = id; + + if (node_supports_image(node)) { + if (node_supports_image_output(node)) + node->vid_fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + else + node->vid_fmt.type = V4L2_BUF_TYPE_VIDEO_OUTPUT; + + fmt = find_format_by_code(cfe_default_format.code); + if (!fmt) { + cfe_err(cfe, "Failed to find format code\n"); + return -EINVAL; + } + + node->vid_fmt.fmt.pix.pixelformat = fmt->fourcc; + v4l2_fill_pix_format(&node->vid_fmt.fmt.pix, + &cfe_default_format); + + ret = cfe_validate_fmt_vid_cap(node, &node->vid_fmt); + if (ret) + return ret; + } + + if (node_supports_meta(node)) { + if (node_supports_meta_output(node)) + node->meta_fmt.type = V4L2_BUF_TYPE_META_CAPTURE; + else + node->meta_fmt.type = V4L2_BUF_TYPE_META_OUTPUT; + + ret = cfe_validate_fmt_meta(node, &node->meta_fmt); + if (ret) + return ret; + } + + mutex_init(&node->lock); + + q = &node->buffer_queue; + q->type = node_supports_image(node) ? node->vid_fmt.type : + node->meta_fmt.type; + q->io_modes = VB2_MMAP | VB2_DMABUF; + q->drv_priv = node; + q->ops = &cfe_video_qops; + q->mem_ops = &vb2_dma_contig_memops; + q->buf_struct_size = id == FE_CONFIG ? sizeof(struct cfe_config_buffer) + : sizeof(struct cfe_buffer); + q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC; + q->lock = &node->lock; + q->min_queued_buffers = 1; + q->dev = &cfe->pdev->dev; + + ret = vb2_queue_init(q); + if (ret) { + cfe_err(cfe, "vb2_queue_init() failed\n"); + return ret; + } + + INIT_LIST_HEAD(&node->dma_queue); + + vdev = &node->video_dev; + vdev->release = cfe_node_release; + vdev->fops = &cfe_fops; + vdev->ioctl_ops = &cfe_ioctl_ops; + vdev->entity.ops = &cfe_media_entity_ops; + vdev->v4l2_dev = &cfe->v4l2_dev; + vdev->vfl_dir = (node_supports_image_output(node) || + node_supports_meta_output(node)) ? + VFL_DIR_RX : + VFL_DIR_TX; + vdev->queue = q; + vdev->lock = &node->lock; + vdev->device_caps = node_desc[id].caps; + vdev->device_caps |= V4L2_CAP_STREAMING | V4L2_CAP_IO_MC; + + /* Define the device names */ + snprintf(vdev->name, sizeof(vdev->name), "%s-%s", CFE_MODULE_NAME, + node_desc[id].name); + + video_set_drvdata(vdev, node); + node->pad.flags = node_desc[id].pad_flags; + media_entity_pads_init(&vdev->entity, 1, &node->pad); + + if (!node_supports_image(node)) { + v4l2_disable_ioctl(&node->video_dev, + VIDIOC_ENUM_FRAMEINTERVALS); + v4l2_disable_ioctl(&node->video_dev, VIDIOC_ENUM_FRAMESIZES); + } + + ret = video_register_device(vdev, VFL_TYPE_VIDEO, -1); + if (ret) { + cfe_err(cfe, "Unable to register video device %s\n", + vdev->name); + return ret; + } + + cfe_info(cfe, "Registered [%s] node id %d as /dev/video%u\n", + vdev->name, id, vdev->num); + + /* + * Acquire a reference to cfe, which will be released when the video + * device will be unregistered and userspace will have closed all open + * file handles. + */ + cfe_get(cfe); + set_state(cfe, NODE_REGISTERED, id); + + return 0; +} + +static void cfe_unregister_nodes(struct cfe_device *cfe) +{ + for (unsigned int i = 0; i < NUM_NODES; i++) { + struct cfe_node *node = &cfe->node[i]; + + if (check_state(cfe, NODE_REGISTERED, i)) { + clear_state(cfe, NODE_REGISTERED, i); + video_unregister_device(&node->video_dev); + } + } +} + +static int cfe_link_node_pads(struct cfe_device *cfe) +{ + struct media_pad *remote_pad; + int ret; + + /* Source -> CSI2 */ + + ret = v4l2_create_fwnode_links_to_pad(cfe->source_sd, + &cfe->csi2.pad[CSI2_PAD_SINK], + MEDIA_LNK_FL_IMMUTABLE | MEDIA_LNK_FL_ENABLED); + + if (ret) { + cfe_err(cfe, "Failed to create links to the source: %d\n", ret); + return ret; + } + + remote_pad = media_pad_remote_pad_unique(&cfe->csi2.pad[CSI2_PAD_SINK]); + if (IS_ERR(remote_pad)) { + ret = PTR_ERR(remote_pad); + cfe_err(cfe, "Failed to get unique remote source pad: %d\n", + ret); + return ret; + } + + cfe->source_pad = remote_pad->index; + + for (unsigned int i = 0; i < CSI2_NUM_CHANNELS; i++) { + struct cfe_node *node = &cfe->node[i]; + + if (!check_state(cfe, NODE_REGISTERED, i)) + continue; + + /* CSI2 channel # -> /dev/video# */ + ret = media_create_pad_link(&cfe->csi2.sd.entity, + node_desc[i].link_pad, + &node->video_dev.entity, 0, 0); + if (ret) + return ret; + + if (node_supports_image(node)) { + /* CSI2 channel # -> FE Input */ + ret = media_create_pad_link(&cfe->csi2.sd.entity, + node_desc[i].link_pad, + &cfe->fe.sd.entity, + FE_STREAM_PAD, 0); + if (ret) + return ret; + } + } + + for (unsigned int i = CSI2_NUM_CHANNELS; i < NUM_NODES; i++) { + struct cfe_node *node = &cfe->node[i]; + struct media_entity *src, *dst; + unsigned int src_pad, dst_pad; + + if (node_desc[i].pad_flags & MEDIA_PAD_FL_SINK) { + /* FE -> /dev/video# */ + src = &cfe->fe.sd.entity; + src_pad = node_desc[i].link_pad; + dst = &node->video_dev.entity; + dst_pad = 0; + } else { + /* /dev/video# -> FE */ + dst = &cfe->fe.sd.entity; + dst_pad = node_desc[i].link_pad; + src = &node->video_dev.entity; + src_pad = 0; + } + + ret = media_create_pad_link(src, src_pad, dst, dst_pad, 0); + if (ret) + return ret; + } + + return 0; +} + +static int cfe_probe_complete(struct cfe_device *cfe) +{ + int ret; + + cfe->v4l2_dev.notify = cfe_notify; + + for (unsigned int i = 0; i < NUM_NODES; i++) { + ret = cfe_register_node(cfe, i); + if (ret) { + cfe_err(cfe, "Unable to register video node %u.\n", i); + goto unregister; + } + } + + ret = cfe_link_node_pads(cfe); + if (ret) { + cfe_err(cfe, "Unable to link node pads.\n"); + goto unregister; + } + + ret = v4l2_device_register_subdev_nodes(&cfe->v4l2_dev); + if (ret) { + cfe_err(cfe, "Unable to register subdev nodes.\n"); + goto unregister; + } + + return 0; + +unregister: + cfe_unregister_nodes(cfe); + return ret; +} + +static int cfe_async_bound(struct v4l2_async_notifier *notifier, + struct v4l2_subdev *subdev, + struct v4l2_async_connection *asd) +{ + struct cfe_device *cfe = to_cfe_device(notifier->v4l2_dev); + + if (cfe->source_sd) { + cfe_err(cfe, "Rejecting subdev %s (Already set!!)", + subdev->name); + return 0; + } + + cfe->source_sd = subdev; + + cfe_dbg(cfe, "Using source %s for capture\n", subdev->name); + + return 0; +} + +static int cfe_async_complete(struct v4l2_async_notifier *notifier) +{ + struct cfe_device *cfe = to_cfe_device(notifier->v4l2_dev); + + return cfe_probe_complete(cfe); +} + +static const struct v4l2_async_notifier_operations cfe_async_ops = { + .bound = cfe_async_bound, + .complete = cfe_async_complete, +}; + +static int cfe_register_async_nf(struct cfe_device *cfe) +{ + struct platform_device *pdev = cfe->pdev; + struct v4l2_fwnode_endpoint ep = { .bus_type = V4L2_MBUS_CSI2_DPHY }; + struct fwnode_handle *local_ep_fwnode; + struct v4l2_async_connection *asd; + int ret; + + local_ep_fwnode = fwnode_graph_get_endpoint_by_id(pdev->dev.fwnode, 0, + 0, 0); + if (!local_ep_fwnode) { + cfe_err(cfe, "Failed to find local endpoint fwnode\n"); + return -ENODEV; + } + + /* Parse the local endpoint and validate its configuration. */ + ret = v4l2_fwnode_endpoint_parse(local_ep_fwnode, &ep); + if (ret) { + cfe_err(cfe, "Failed to find remote endpoint fwnode\n"); + goto err_put_local_fwnode; + } + + for (unsigned int lane = 0; lane < ep.bus.mipi_csi2.num_data_lanes; + lane++) { + if (ep.bus.mipi_csi2.data_lanes[lane] != lane + 1) { + cfe_err(cfe, "Data lanes reordering not supported\n"); + ret = -EINVAL; + goto err_put_local_fwnode; + } + } + + cfe->csi2.dphy.max_lanes = ep.bus.mipi_csi2.num_data_lanes; + cfe->csi2.bus_flags = ep.bus.mipi_csi2.flags; + + /* Initialize and register the async notifier. */ + v4l2_async_nf_init(&cfe->notifier, &cfe->v4l2_dev); + cfe->notifier.ops = &cfe_async_ops; + + asd = v4l2_async_nf_add_fwnode_remote(&cfe->notifier, local_ep_fwnode, + struct v4l2_async_connection); + if (IS_ERR(asd)) { + ret = PTR_ERR(asd); + cfe_err(cfe, "Error adding subdevice: %d\n", ret); + goto err_put_local_fwnode; + } + + ret = v4l2_async_nf_register(&cfe->notifier); + if (ret) { + cfe_err(cfe, "Error registering async notifier: %d\n", ret); + goto err_nf_cleanup; + } + + fwnode_handle_put(local_ep_fwnode); + + return 0; + +err_nf_cleanup: + v4l2_async_nf_cleanup(&cfe->notifier); +err_put_local_fwnode: + fwnode_handle_put(local_ep_fwnode); + + return ret; +} + +static int cfe_probe(struct platform_device *pdev) +{ + struct cfe_device *cfe; + char debugfs_name[32]; + int ret; + + cfe = kzalloc(sizeof(*cfe), GFP_KERNEL); + if (!cfe) + return -ENOMEM; + + platform_set_drvdata(pdev, cfe); + + kref_init(&cfe->kref); + cfe->pdev = pdev; + cfe->fe_csi2_channel = -1; + spin_lock_init(&cfe->state_lock); + + cfe->csi2.base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(cfe->csi2.base)) { + dev_err(&pdev->dev, "Failed to get dma io block\n"); + ret = PTR_ERR(cfe->csi2.base); + goto err_cfe_put; + } + + cfe->csi2.dphy.base = devm_platform_ioremap_resource(pdev, 1); + if (IS_ERR(cfe->csi2.dphy.base)) { + dev_err(&pdev->dev, "Failed to get host io block\n"); + ret = PTR_ERR(cfe->csi2.dphy.base); + goto err_cfe_put; + } + + cfe->mipi_cfg_base = devm_platform_ioremap_resource(pdev, 2); + if (IS_ERR(cfe->mipi_cfg_base)) { + dev_err(&pdev->dev, "Failed to get mipi cfg io block\n"); + ret = PTR_ERR(cfe->mipi_cfg_base); + goto err_cfe_put; + } + + cfe->fe.base = devm_platform_ioremap_resource(pdev, 3); + if (IS_ERR(cfe->fe.base)) { + dev_err(&pdev->dev, "Failed to get pisp fe io block\n"); + ret = PTR_ERR(cfe->fe.base); + goto err_cfe_put; + } + + ret = platform_get_irq(pdev, 0); + if (ret <= 0) { + dev_err(&pdev->dev, "No IRQ resource\n"); + ret = -EINVAL; + goto err_cfe_put; + } + + ret = devm_request_irq(&pdev->dev, ret, cfe_isr, 0, "rp1-cfe", cfe); + if (ret) { + dev_err(&pdev->dev, "Unable to request interrupt\n"); + ret = -EINVAL; + goto err_cfe_put; + } + + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (ret) { + dev_err(&pdev->dev, "DMA enable failed\n"); + goto err_cfe_put; + } + + /* TODO: Enable clock only when running. */ + cfe->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(cfe->clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(cfe->clk), + "clock not found\n"); + + cfe->mdev.dev = &pdev->dev; + cfe->mdev.ops = &cfe_media_device_ops; + strscpy(cfe->mdev.model, CFE_MODULE_NAME, sizeof(cfe->mdev.model)); + strscpy(cfe->mdev.serial, "", sizeof(cfe->mdev.serial)); + snprintf(cfe->mdev.bus_info, sizeof(cfe->mdev.bus_info), "platform:%s", + dev_name(&pdev->dev)); + + media_device_init(&cfe->mdev); + + cfe->v4l2_dev.mdev = &cfe->mdev; + + ret = v4l2_device_register(&pdev->dev, &cfe->v4l2_dev); + if (ret) { + cfe_err(cfe, "Unable to register v4l2 device.\n"); + goto err_cfe_put; + } + + snprintf(debugfs_name, sizeof(debugfs_name), "rp1-cfe:%s", + dev_name(&pdev->dev)); + cfe->debugfs = debugfs_create_dir(debugfs_name, NULL); + debugfs_create_file("regs", 0440, cfe->debugfs, cfe, + &mipi_cfg_regs_fops); + + /* Enable the block power domain */ + pm_runtime_enable(&pdev->dev); + + ret = pm_runtime_resume_and_get(&cfe->pdev->dev); + if (ret) + goto err_runtime_disable; + + cfe->csi2.v4l2_dev = &cfe->v4l2_dev; + ret = csi2_init(&cfe->csi2, cfe->debugfs); + if (ret) { + cfe_err(cfe, "Failed to init csi2 (%d)\n", ret); + goto err_runtime_put; + } + + cfe->fe.v4l2_dev = &cfe->v4l2_dev; + ret = pisp_fe_init(&cfe->fe, cfe->debugfs); + if (ret) { + cfe_err(cfe, "Failed to init pisp fe (%d)\n", ret); + goto err_csi2_uninit; + } + + cfe->mdev.hw_revision = cfe->fe.hw_revision; + ret = media_device_register(&cfe->mdev); + if (ret < 0) { + cfe_err(cfe, "Unable to register media-controller device.\n"); + goto err_pisp_fe_uninit; + } + + ret = cfe_register_async_nf(cfe); + if (ret) { + cfe_err(cfe, "Failed to connect subdevs\n"); + goto err_media_unregister; + } + + pm_runtime_put(&cfe->pdev->dev); + + return 0; + +err_media_unregister: + media_device_unregister(&cfe->mdev); +err_pisp_fe_uninit: + pisp_fe_uninit(&cfe->fe); +err_csi2_uninit: + csi2_uninit(&cfe->csi2); +err_runtime_put: + pm_runtime_put(&cfe->pdev->dev); +err_runtime_disable: + pm_runtime_disable(&pdev->dev); + debugfs_remove(cfe->debugfs); + v4l2_device_unregister(&cfe->v4l2_dev); +err_cfe_put: + cfe_put(cfe); + + return ret; +} + +static void cfe_remove(struct platform_device *pdev) +{ + struct cfe_device *cfe = platform_get_drvdata(pdev); + + debugfs_remove(cfe->debugfs); + + v4l2_async_nf_unregister(&cfe->notifier); + v4l2_async_nf_cleanup(&cfe->notifier); + + media_device_unregister(&cfe->mdev); + cfe_unregister_nodes(cfe); + + pisp_fe_uninit(&cfe->fe); + csi2_uninit(&cfe->csi2); + + pm_runtime_disable(&pdev->dev); + + v4l2_device_unregister(&cfe->v4l2_dev); + + cfe_put(cfe); +} + +static int cfe_runtime_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct cfe_device *cfe = platform_get_drvdata(pdev); + + clk_disable_unprepare(cfe->clk); + + return 0; +} + +static int cfe_runtime_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct cfe_device *cfe = platform_get_drvdata(pdev); + int ret; + + ret = clk_prepare_enable(cfe->clk); + if (ret) { + dev_err(dev, "Unable to enable clock\n"); + return ret; + } + + return 0; +} + +static const struct dev_pm_ops cfe_pm_ops = { + SET_RUNTIME_PM_OPS(cfe_runtime_suspend, cfe_runtime_resume, NULL) + SET_LATE_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) +}; + +static const struct of_device_id cfe_of_match[] = { + { .compatible = "raspberrypi,rp1-cfe" }, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(of, cfe_of_match); + +static struct platform_driver cfe_driver = { + .probe = cfe_probe, + .remove = cfe_remove, + .driver = { + .name = CFE_MODULE_NAME, + .of_match_table = cfe_of_match, + .pm = &cfe_pm_ops, + }, +}; + +module_platform_driver(cfe_driver); + +MODULE_AUTHOR("Naushir Patuck "); +MODULE_AUTHOR("Tomi Valkeinen "); +MODULE_DESCRIPTION("Raspberry Pi RP1 Camera Front End driver"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(CFE_VERSION); diff --git a/drivers/media/platform/raspberrypi/rp1-cfe/cfe.h b/drivers/media/platform/raspberrypi/rp1-cfe/cfe.h new file mode 100644 index 000000000000..c63cc314be3c --- /dev/null +++ b/drivers/media/platform/raspberrypi/rp1-cfe/cfe.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * RP1 CFE Driver + * + * Copyright (c) 2021-2024 Raspberry Pi Ltd. + * Copyright (c) 2023-2024 Ideas on Board Oy + */ +#ifndef _RP1_CFE_ +#define _RP1_CFE_ + +#include +#include +#include + +extern bool cfe_debug_verbose; + +enum cfe_remap_types { + CFE_REMAP_16BIT, + CFE_REMAP_COMPRESSED, + CFE_NUM_REMAP, +}; + +#define CFE_FORMAT_FLAG_META_OUT BIT(0) +#define CFE_FORMAT_FLAG_META_CAP BIT(1) +#define CFE_FORMAT_FLAG_FE_OUT BIT(2) + +struct cfe_fmt { + u32 fourcc; + u32 code; + u8 depth; + u8 csi_dt; + u32 remap[CFE_NUM_REMAP]; + u32 flags; +}; + +extern const struct v4l2_mbus_framefmt cfe_default_format; + +const struct cfe_fmt *find_format_by_code(u32 code); +const struct cfe_fmt *find_format_by_pix(u32 pixelformat); +u32 cfe_find_16bit_code(u32 code); +u32 cfe_find_compressed_code(u32 code); + +#endif diff --git a/drivers/media/platform/raspberrypi/rp1-cfe/csi2.c b/drivers/media/platform/raspberrypi/rp1-cfe/csi2.c new file mode 100644 index 000000000000..35c2ab1e2cd4 --- /dev/null +++ b/drivers/media/platform/raspberrypi/rp1-cfe/csi2.c @@ -0,0 +1,586 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * RP1 CSI-2 Driver + * + * Copyright (c) 2021-2024 Raspberry Pi Ltd. + * Copyright (c) 2023-2024 Ideas on Board Oy + */ + +#include +#include +#include +#include + +#include + +#include "cfe.h" +#include "csi2.h" + +#include "cfe-trace.h" + +static bool csi2_track_errors; +module_param_named(track_csi2_errors, csi2_track_errors, bool, 0); +MODULE_PARM_DESC(track_csi2_errors, "track csi-2 errors"); + +#define csi2_dbg(csi2, fmt, arg...) dev_dbg((csi2)->v4l2_dev->dev, fmt, ##arg) +#define csi2_err(csi2, fmt, arg...) dev_err((csi2)->v4l2_dev->dev, fmt, ##arg) + +/* CSI2-DMA registers */ +#define CSI2_STATUS 0x000 +#define CSI2_QOS 0x004 +#define CSI2_DISCARDS_OVERFLOW 0x008 +#define CSI2_DISCARDS_INACTIVE 0x00c +#define CSI2_DISCARDS_UNMATCHED 0x010 +#define CSI2_DISCARDS_LEN_LIMIT 0x014 + +#define CSI2_DISCARDS_AMOUNT_SHIFT 0 +#define CSI2_DISCARDS_AMOUNT_MASK GENMASK(23, 0) +#define CSI2_DISCARDS_DT_SHIFT 24 +#define CSI2_DISCARDS_DT_MASK GENMASK(29, 24) +#define CSI2_DISCARDS_VC_SHIFT 30 +#define CSI2_DISCARDS_VC_MASK GENMASK(31, 30) + +#define CSI2_LLEV_PANICS 0x018 +#define CSI2_ULEV_PANICS 0x01c +#define CSI2_IRQ_MASK 0x020 +#define CSI2_IRQ_MASK_IRQ_OVERFLOW BIT(0) +#define CSI2_IRQ_MASK_IRQ_DISCARD_OVERFLOW BIT(1) +#define CSI2_IRQ_MASK_IRQ_DISCARD_LENGTH_LIMIT BIT(2) +#define CSI2_IRQ_MASK_IRQ_DISCARD_UNMATCHED BIT(3) +#define CSI2_IRQ_MASK_IRQ_DISCARD_INACTIVE BIT(4) +#define CSI2_IRQ_MASK_IRQ_ALL \ + (CSI2_IRQ_MASK_IRQ_OVERFLOW | CSI2_IRQ_MASK_IRQ_DISCARD_OVERFLOW | \ + CSI2_IRQ_MASK_IRQ_DISCARD_LENGTH_LIMIT | \ + CSI2_IRQ_MASK_IRQ_DISCARD_UNMATCHED | \ + CSI2_IRQ_MASK_IRQ_DISCARD_INACTIVE) + +#define CSI2_CTRL 0x024 +#define CSI2_CH_CTRL(x) ((x) * 0x40 + 0x28) +#define CSI2_CH_ADDR0(x) ((x) * 0x40 + 0x2c) +#define CSI2_CH_ADDR1(x) ((x) * 0x40 + 0x3c) +#define CSI2_CH_STRIDE(x) ((x) * 0x40 + 0x30) +#define CSI2_CH_LENGTH(x) ((x) * 0x40 + 0x34) +#define CSI2_CH_DEBUG(x) ((x) * 0x40 + 0x38) +#define CSI2_CH_FRAME_SIZE(x) ((x) * 0x40 + 0x40) +#define CSI2_CH_COMP_CTRL(x) ((x) * 0x40 + 0x44) +#define CSI2_CH_FE_FRAME_ID(x) ((x) * 0x40 + 0x48) + +/* CSI2_STATUS */ +#define CSI2_STATUS_IRQ_FS(x) (BIT(0) << (x)) +#define CSI2_STATUS_IRQ_FE(x) (BIT(4) << (x)) +#define CSI2_STATUS_IRQ_FE_ACK(x) (BIT(8) << (x)) +#define CSI2_STATUS_IRQ_LE(x) (BIT(12) << (x)) +#define CSI2_STATUS_IRQ_LE_ACK(x) (BIT(16) << (x)) +#define CSI2_STATUS_IRQ_CH_MASK(x) \ + (CSI2_STATUS_IRQ_FS(x) | CSI2_STATUS_IRQ_FE(x) | \ + CSI2_STATUS_IRQ_FE_ACK(x) | CSI2_STATUS_IRQ_LE(x) | \ + CSI2_STATUS_IRQ_LE_ACK(x)) +#define CSI2_STATUS_IRQ_OVERFLOW BIT(20) +#define CSI2_STATUS_IRQ_DISCARD_OVERFLOW BIT(21) +#define CSI2_STATUS_IRQ_DISCARD_LEN_LIMIT BIT(22) +#define CSI2_STATUS_IRQ_DISCARD_UNMATCHED BIT(23) +#define CSI2_STATUS_IRQ_DISCARD_INACTIVE BIT(24) + +/* CSI2_CTRL */ +#define CSI2_CTRL_EOP_IS_EOL BIT(0) + +/* CSI2_CH_CTRL */ +#define CSI2_CH_CTRL_DMA_EN BIT(0) +#define CSI2_CH_CTRL_FORCE BIT(3) +#define CSI2_CH_CTRL_AUTO_ARM BIT(4) +#define CSI2_CH_CTRL_IRQ_EN_FS BIT(13) +#define CSI2_CH_CTRL_IRQ_EN_FE BIT(14) +#define CSI2_CH_CTRL_IRQ_EN_FE_ACK BIT(15) +#define CSI2_CH_CTRL_IRQ_EN_LE BIT(16) +#define CSI2_CH_CTRL_IRQ_EN_LE_ACK BIT(17) +#define CSI2_CH_CTRL_FLUSH_FE BIT(28) +#define CSI2_CH_CTRL_PACK_LINE BIT(29) +#define CSI2_CH_CTRL_PACK_BYTES BIT(30) +#define CSI2_CH_CTRL_CH_MODE_MASK GENMASK(2, 1) +#define CSI2_CH_CTRL_VC_MASK GENMASK(6, 5) +#define CSI2_CH_CTRL_DT_MASK GENMASK(12, 7) +#define CSI2_CH_CTRL_LC_MASK GENMASK(27, 18) + +/* CHx_COMPRESSION_CONTROL */ +#define CSI2_CH_COMP_CTRL_OFFSET_MASK GENMASK(15, 0) +#define CSI2_CH_COMP_CTRL_SHIFT_MASK GENMASK(19, 16) +#define CSI2_CH_COMP_CTRL_MODE_MASK GENMASK(25, 24) + +static inline u32 csi2_reg_read(struct csi2_device *csi2, u32 offset) +{ + return readl(csi2->base + offset); +} + +static inline void csi2_reg_write(struct csi2_device *csi2, u32 offset, u32 val) +{ + writel(val, csi2->base + offset); +} + +static inline void set_field(u32 *valp, u32 field, u32 mask) +{ + u32 val = *valp; + + val &= ~mask; + val |= (field << __ffs(mask)) & mask; + *valp = val; +} + +static int csi2_regs_show(struct seq_file *s, void *data) +{ + struct csi2_device *csi2 = s->private; + int ret; + + ret = pm_runtime_resume_and_get(csi2->v4l2_dev->dev); + if (ret) + return ret; + +#define DUMP(reg) seq_printf(s, #reg " \t0x%08x\n", csi2_reg_read(csi2, reg)) +#define DUMP_CH(idx, reg) seq_printf(s, #reg "(%u) \t0x%08x\n", idx, \ + csi2_reg_read(csi2, reg(idx))) + + DUMP(CSI2_STATUS); + DUMP(CSI2_DISCARDS_OVERFLOW); + DUMP(CSI2_DISCARDS_INACTIVE); + DUMP(CSI2_DISCARDS_UNMATCHED); + DUMP(CSI2_DISCARDS_LEN_LIMIT); + DUMP(CSI2_LLEV_PANICS); + DUMP(CSI2_ULEV_PANICS); + DUMP(CSI2_IRQ_MASK); + DUMP(CSI2_CTRL); + + for (unsigned int i = 0; i < CSI2_NUM_CHANNELS; ++i) { + DUMP_CH(i, CSI2_CH_CTRL); + DUMP_CH(i, CSI2_CH_ADDR0); + DUMP_CH(i, CSI2_CH_ADDR1); + DUMP_CH(i, CSI2_CH_STRIDE); + DUMP_CH(i, CSI2_CH_LENGTH); + DUMP_CH(i, CSI2_CH_DEBUG); + DUMP_CH(i, CSI2_CH_FRAME_SIZE); + DUMP_CH(i, CSI2_CH_COMP_CTRL); + DUMP_CH(i, CSI2_CH_FE_FRAME_ID); + } + +#undef DUMP +#undef DUMP_CH + + pm_runtime_put(csi2->v4l2_dev->dev); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(csi2_regs); + +static int csi2_errors_show(struct seq_file *s, void *data) +{ + struct csi2_device *csi2 = s->private; + unsigned long flags; + u32 discards_table[DISCARDS_TABLE_NUM_VCS][DISCARDS_TABLE_NUM_ENTRIES]; + u32 discards_dt_table[DISCARDS_TABLE_NUM_ENTRIES]; + u32 overflows; + + spin_lock_irqsave(&csi2->errors_lock, flags); + + memcpy(discards_table, csi2->discards_table, sizeof(discards_table)); + memcpy(discards_dt_table, csi2->discards_dt_table, + sizeof(discards_dt_table)); + overflows = csi2->overflows; + + csi2->overflows = 0; + memset(csi2->discards_table, 0, sizeof(discards_table)); + memset(csi2->discards_dt_table, 0, sizeof(discards_dt_table)); + + spin_unlock_irqrestore(&csi2->errors_lock, flags); + + seq_printf(s, "Overflows %u\n", overflows); + seq_puts(s, "Discards:\n"); + seq_puts(s, "VC OVLF LEN UNMATCHED INACTIVE\n"); + + for (unsigned int vc = 0; vc < DISCARDS_TABLE_NUM_VCS; ++vc) { + seq_printf(s, "%u %10u %10u %10u %10u\n", vc, + discards_table[vc][DISCARDS_TABLE_OVERFLOW], + discards_table[vc][DISCARDS_TABLE_LENGTH_LIMIT], + discards_table[vc][DISCARDS_TABLE_UNMATCHED], + discards_table[vc][DISCARDS_TABLE_INACTIVE]); + } + + seq_printf(s, "Last DT %10u %10u %10u %10u\n", + discards_dt_table[DISCARDS_TABLE_OVERFLOW], + discards_dt_table[DISCARDS_TABLE_LENGTH_LIMIT], + discards_dt_table[DISCARDS_TABLE_UNMATCHED], + discards_dt_table[DISCARDS_TABLE_INACTIVE]); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(csi2_errors); + +static void csi2_isr_handle_errors(struct csi2_device *csi2, u32 status) +{ + spin_lock(&csi2->errors_lock); + + if (status & CSI2_STATUS_IRQ_OVERFLOW) + csi2->overflows++; + + for (unsigned int i = 0; i < DISCARDS_TABLE_NUM_ENTRIES; ++i) { + static const u32 discard_bits[] = { + CSI2_STATUS_IRQ_DISCARD_OVERFLOW, + CSI2_STATUS_IRQ_DISCARD_LEN_LIMIT, + CSI2_STATUS_IRQ_DISCARD_UNMATCHED, + CSI2_STATUS_IRQ_DISCARD_INACTIVE, + }; + static const u8 discard_regs[] = { + CSI2_DISCARDS_OVERFLOW, + CSI2_DISCARDS_LEN_LIMIT, + CSI2_DISCARDS_UNMATCHED, + CSI2_DISCARDS_INACTIVE, + }; + u32 amount; + u8 dt, vc; + u32 v; + + if (!(status & discard_bits[i])) + continue; + + v = csi2_reg_read(csi2, discard_regs[i]); + csi2_reg_write(csi2, discard_regs[i], 0); + + amount = (v & CSI2_DISCARDS_AMOUNT_MASK) >> + CSI2_DISCARDS_AMOUNT_SHIFT; + dt = (v & CSI2_DISCARDS_DT_MASK) >> CSI2_DISCARDS_DT_SHIFT; + vc = (v & CSI2_DISCARDS_VC_MASK) >> CSI2_DISCARDS_VC_SHIFT; + + csi2->discards_table[vc][i] += amount; + csi2->discards_dt_table[i] = dt; + } + + spin_unlock(&csi2->errors_lock); +} + +void csi2_isr(struct csi2_device *csi2, bool *sof, bool *eof) +{ + u32 status; + + status = csi2_reg_read(csi2, CSI2_STATUS); + + /* Write value back to clear the interrupts */ + csi2_reg_write(csi2, CSI2_STATUS, status); + + for (unsigned int i = 0; i < CSI2_NUM_CHANNELS; i++) { + u32 dbg; + + if ((status & CSI2_STATUS_IRQ_CH_MASK(i)) == 0) + continue; + + dbg = csi2_reg_read(csi2, CSI2_CH_DEBUG(i)); + + trace_csi2_irq(i, status, dbg); + + sof[i] = !!(status & CSI2_STATUS_IRQ_FS(i)); + eof[i] = !!(status & CSI2_STATUS_IRQ_FE_ACK(i)); + } + + if (csi2_track_errors) + csi2_isr_handle_errors(csi2, status); +} + +void csi2_set_buffer(struct csi2_device *csi2, unsigned int channel, + dma_addr_t dmaaddr, unsigned int stride, unsigned int size) +{ + u64 addr = dmaaddr; + /* + * ADDRESS0 must be written last as it triggers the double buffering + * mechanism for all buffer registers within the hardware. + */ + addr >>= 4; + csi2_reg_write(csi2, CSI2_CH_LENGTH(channel), size >> 4); + csi2_reg_write(csi2, CSI2_CH_STRIDE(channel), stride >> 4); + csi2_reg_write(csi2, CSI2_CH_ADDR1(channel), addr >> 32); + csi2_reg_write(csi2, CSI2_CH_ADDR0(channel), addr & 0xffffffff); +} + +void csi2_set_compression(struct csi2_device *csi2, unsigned int channel, + enum csi2_compression_mode mode, unsigned int shift, + unsigned int offset) +{ + u32 compression = 0; + + set_field(&compression, CSI2_CH_COMP_CTRL_OFFSET_MASK, offset); + set_field(&compression, CSI2_CH_COMP_CTRL_SHIFT_MASK, shift); + set_field(&compression, CSI2_CH_COMP_CTRL_MODE_MASK, mode); + csi2_reg_write(csi2, CSI2_CH_COMP_CTRL(channel), compression); +} + +void csi2_start_channel(struct csi2_device *csi2, unsigned int channel, + enum csi2_mode mode, bool auto_arm, bool pack_bytes, + unsigned int width, unsigned int height, + u8 vc, u8 dt) +{ + u32 ctrl; + + csi2_dbg(csi2, "%s [%u]\n", __func__, channel); + + csi2_reg_write(csi2, CSI2_CH_CTRL(channel), 0); + csi2_reg_write(csi2, CSI2_CH_DEBUG(channel), 0); + csi2_reg_write(csi2, CSI2_STATUS, CSI2_STATUS_IRQ_CH_MASK(channel)); + + /* Enable channel and FS/FE interrupts. */ + ctrl = CSI2_CH_CTRL_DMA_EN | CSI2_CH_CTRL_IRQ_EN_FS | + CSI2_CH_CTRL_IRQ_EN_FE_ACK | CSI2_CH_CTRL_PACK_LINE; + /* PACK_BYTES ensures no striding for embedded data. */ + if (pack_bytes) + ctrl |= CSI2_CH_CTRL_PACK_BYTES; + + if (auto_arm) + ctrl |= CSI2_CH_CTRL_AUTO_ARM; + + if (width && height) { + set_field(&ctrl, mode, CSI2_CH_CTRL_CH_MODE_MASK); + csi2_reg_write(csi2, CSI2_CH_FRAME_SIZE(channel), + (height << 16) | width); + } else { + set_field(&ctrl, 0x0, CSI2_CH_CTRL_CH_MODE_MASK); + csi2_reg_write(csi2, CSI2_CH_FRAME_SIZE(channel), 0); + } + + set_field(&ctrl, vc, CSI2_CH_CTRL_VC_MASK); + set_field(&ctrl, dt, CSI2_CH_CTRL_DT_MASK); + csi2_reg_write(csi2, CSI2_CH_CTRL(channel), ctrl); + csi2->num_lines[channel] = height; +} + +void csi2_stop_channel(struct csi2_device *csi2, unsigned int channel) +{ + csi2_dbg(csi2, "%s [%u]\n", __func__, channel); + + /* Channel disable. Use FORCE to allow stopping mid-frame. */ + csi2_reg_write(csi2, CSI2_CH_CTRL(channel), CSI2_CH_CTRL_FORCE); + /* Latch the above change by writing to the ADDR0 register. */ + csi2_reg_write(csi2, CSI2_CH_ADDR0(channel), 0); + /* Write this again, the HW needs it! */ + csi2_reg_write(csi2, CSI2_CH_ADDR0(channel), 0); +} + +void csi2_open_rx(struct csi2_device *csi2) +{ + csi2_reg_write(csi2, CSI2_IRQ_MASK, + csi2_track_errors ? CSI2_IRQ_MASK_IRQ_ALL : 0); + + dphy_start(&csi2->dphy); + + csi2_reg_write(csi2, CSI2_CTRL, CSI2_CTRL_EOP_IS_EOL); +} + +void csi2_close_rx(struct csi2_device *csi2) +{ + dphy_stop(&csi2->dphy); + + csi2_reg_write(csi2, CSI2_IRQ_MASK, 0); +} + +static int csi2_init_state(struct v4l2_subdev *sd, + struct v4l2_subdev_state *state) +{ + struct v4l2_subdev_route routes[] = { { + .sink_pad = CSI2_PAD_SINK, + .sink_stream = 0, + .source_pad = CSI2_PAD_FIRST_SOURCE, + .source_stream = 0, + .flags = V4L2_SUBDEV_ROUTE_FL_ACTIVE, + } }; + + struct v4l2_subdev_krouting routing = { + .num_routes = ARRAY_SIZE(routes), + .routes = routes, + }; + + int ret; + + ret = v4l2_subdev_set_routing_with_fmt(sd, state, &routing, + &cfe_default_format); + if (ret) + return ret; + + return 0; +} + +static int csi2_pad_set_fmt(struct v4l2_subdev *sd, + struct v4l2_subdev_state *state, + struct v4l2_subdev_format *format) +{ + if (format->pad == CSI2_PAD_SINK) { + /* Store the sink format and propagate it to the source. */ + + const struct cfe_fmt *cfe_fmt; + + cfe_fmt = find_format_by_code(format->format.code); + if (!cfe_fmt) { + cfe_fmt = find_format_by_code(MEDIA_BUS_FMT_SRGGB10_1X10); + format->format.code = cfe_fmt->code; + } + + struct v4l2_mbus_framefmt *fmt; + + fmt = v4l2_subdev_state_get_format(state, format->pad, + format->stream); + if (!fmt) + return -EINVAL; + + *fmt = format->format; + + fmt = v4l2_subdev_state_get_opposite_stream_format(state, + format->pad, + format->stream); + if (!fmt) + return -EINVAL; + + format->format.field = V4L2_FIELD_NONE; + + *fmt = format->format; + } else { + /* Only allow changing the source pad mbus code. */ + + struct v4l2_mbus_framefmt *sink_fmt, *source_fmt; + u32 sink_code; + u32 code; + + sink_fmt = v4l2_subdev_state_get_opposite_stream_format(state, + format->pad, + format->stream); + if (!sink_fmt) + return -EINVAL; + + source_fmt = v4l2_subdev_state_get_format(state, format->pad, + format->stream); + if (!source_fmt) + return -EINVAL; + + sink_code = sink_fmt->code; + code = format->format.code; + + /* + * Only allow changing the mbus code to: + * - The sink's mbus code + * - The 16-bit version of the sink's mbus code + * - The compressed version of the sink's mbus code + */ + if (code == sink_code || + code == cfe_find_16bit_code(sink_code) || + code == cfe_find_compressed_code(sink_code)) + source_fmt->code = code; + + format->format.code = source_fmt->code; + } + + return 0; +} + +static int csi2_set_routing(struct v4l2_subdev *sd, + struct v4l2_subdev_state *state, + enum v4l2_subdev_format_whence which, + struct v4l2_subdev_krouting *routing) +{ + int ret; + + ret = v4l2_subdev_routing_validate(sd, routing, + V4L2_SUBDEV_ROUTING_ONLY_1_TO_1 | + V4L2_SUBDEV_ROUTING_NO_SOURCE_MULTIPLEXING); + if (ret) + return ret; + + /* Only stream ID 0 allowed on source pads */ + for (unsigned int i = 0; i < routing->num_routes; ++i) { + const struct v4l2_subdev_route *route = &routing->routes[i]; + + if (route->source_stream != 0) + return -EINVAL; + } + + ret = v4l2_subdev_set_routing_with_fmt(sd, state, routing, + &cfe_default_format); + if (ret) + return ret; + + return 0; +} + +static const struct v4l2_subdev_pad_ops csi2_subdev_pad_ops = { + .get_fmt = v4l2_subdev_get_fmt, + .set_fmt = csi2_pad_set_fmt, + .set_routing = csi2_set_routing, + .link_validate = v4l2_subdev_link_validate_default, +}; + +static const struct media_entity_operations csi2_entity_ops = { + .link_validate = v4l2_subdev_link_validate, + .has_pad_interdep = v4l2_subdev_has_pad_interdep, +}; + +static const struct v4l2_subdev_ops csi2_subdev_ops = { + .pad = &csi2_subdev_pad_ops, +}; + +static const struct v4l2_subdev_internal_ops csi2_internal_ops = { + .init_state = csi2_init_state, +}; + +int csi2_init(struct csi2_device *csi2, struct dentry *debugfs) +{ + unsigned int ret; + + spin_lock_init(&csi2->errors_lock); + + csi2->dphy.dev = csi2->v4l2_dev->dev; + dphy_probe(&csi2->dphy); + + debugfs_create_file("csi2_regs", 0440, debugfs, csi2, &csi2_regs_fops); + + if (csi2_track_errors) + debugfs_create_file("csi2_errors", 0440, debugfs, csi2, + &csi2_errors_fops); + + csi2->pad[CSI2_PAD_SINK].flags = MEDIA_PAD_FL_SINK; + + for (unsigned int i = CSI2_PAD_FIRST_SOURCE; + i < CSI2_PAD_FIRST_SOURCE + CSI2_PAD_NUM_SOURCES; i++) + csi2->pad[i].flags = MEDIA_PAD_FL_SOURCE; + + ret = media_entity_pads_init(&csi2->sd.entity, ARRAY_SIZE(csi2->pad), + csi2->pad); + if (ret) + return ret; + + /* Initialize subdev */ + v4l2_subdev_init(&csi2->sd, &csi2_subdev_ops); + csi2->sd.internal_ops = &csi2_internal_ops; + csi2->sd.entity.function = MEDIA_ENT_F_VID_IF_BRIDGE; + csi2->sd.entity.ops = &csi2_entity_ops; + csi2->sd.flags = V4L2_SUBDEV_FL_HAS_DEVNODE | V4L2_SUBDEV_FL_STREAMS; + csi2->sd.owner = THIS_MODULE; + snprintf(csi2->sd.name, sizeof(csi2->sd.name), "csi2"); + + ret = v4l2_subdev_init_finalize(&csi2->sd); + if (ret) + goto err_entity_cleanup; + + ret = v4l2_device_register_subdev(csi2->v4l2_dev, &csi2->sd); + if (ret) { + csi2_err(csi2, "Failed register csi2 subdev (%d)\n", ret); + goto err_subdev_cleanup; + } + + return 0; + +err_subdev_cleanup: + v4l2_subdev_cleanup(&csi2->sd); +err_entity_cleanup: + media_entity_cleanup(&csi2->sd.entity); + + return ret; +} + +void csi2_uninit(struct csi2_device *csi2) +{ + v4l2_device_unregister_subdev(&csi2->sd); + v4l2_subdev_cleanup(&csi2->sd); + media_entity_cleanup(&csi2->sd.entity); +} diff --git a/drivers/media/platform/raspberrypi/rp1-cfe/csi2.h b/drivers/media/platform/raspberrypi/rp1-cfe/csi2.h new file mode 100644 index 000000000000..a8ee5de565fb --- /dev/null +++ b/drivers/media/platform/raspberrypi/rp1-cfe/csi2.h @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * RP1 CSI-2 Driver + * + * Copyright (c) 2021-2024 Raspberry Pi Ltd. + * Copyright (c) 2023-2024 Ideas on Board Oy + */ + +#ifndef _RP1_CSI2_ +#define _RP1_CSI2_ + +#include +#include +#include +#include +#include + +#include "dphy.h" + +#define CSI2_NUM_CHANNELS 4 + +#define CSI2_PAD_SINK 0 +#define CSI2_PAD_FIRST_SOURCE 1 +#define CSI2_PAD_NUM_SOURCES 4 +#define CSI2_NUM_PADS 5 + +#define DISCARDS_TABLE_NUM_VCS 4 + +enum csi2_mode { + CSI2_MODE_NORMAL = 0, + CSI2_MODE_REMAP = 1, + CSI2_MODE_COMPRESSED = 2, + CSI2_MODE_FE_STREAMING = 3, +}; + +enum csi2_compression_mode { + CSI2_COMPRESSION_DELTA = 1, + CSI2_COMPRESSION_SIMPLE = 2, + CSI2_COMPRESSION_COMBINED = 3, +}; + +enum discards_table_index { + DISCARDS_TABLE_OVERFLOW = 0, + DISCARDS_TABLE_LENGTH_LIMIT, + DISCARDS_TABLE_UNMATCHED, + DISCARDS_TABLE_INACTIVE, + DISCARDS_TABLE_NUM_ENTRIES, +}; + +struct csi2_device { + /* Parent V4l2 device */ + struct v4l2_device *v4l2_dev; + + void __iomem *base; + + struct dphy_data dphy; + + enum v4l2_mbus_type bus_type; + unsigned int bus_flags; + unsigned int num_lines[CSI2_NUM_CHANNELS]; + + struct media_pad pad[CSI2_NUM_PADS]; + struct v4l2_subdev sd; + + /* lock for csi2 errors counters */ + spinlock_t errors_lock; + u32 overflows; + u32 discards_table[DISCARDS_TABLE_NUM_VCS][DISCARDS_TABLE_NUM_ENTRIES]; + u32 discards_dt_table[DISCARDS_TABLE_NUM_ENTRIES]; +}; + +void csi2_isr(struct csi2_device *csi2, bool *sof, bool *eof); +void csi2_set_buffer(struct csi2_device *csi2, unsigned int channel, + dma_addr_t dmaaddr, unsigned int stride, + unsigned int size); +void csi2_set_compression(struct csi2_device *csi2, unsigned int channel, + enum csi2_compression_mode mode, unsigned int shift, + unsigned int offset); +void csi2_start_channel(struct csi2_device *csi2, unsigned int channel, + enum csi2_mode mode, bool auto_arm, + bool pack_bytes, unsigned int width, + unsigned int height, u8 vc, u8 dt); +void csi2_stop_channel(struct csi2_device *csi2, unsigned int channel); +void csi2_open_rx(struct csi2_device *csi2); +void csi2_close_rx(struct csi2_device *csi2); +int csi2_init(struct csi2_device *csi2, struct dentry *debugfs); +void csi2_uninit(struct csi2_device *csi2); + +#endif diff --git a/drivers/media/platform/raspberrypi/rp1-cfe/dphy.c b/drivers/media/platform/raspberrypi/rp1-cfe/dphy.c new file mode 100644 index 000000000000..b443f0f56ddc --- /dev/null +++ b/drivers/media/platform/raspberrypi/rp1-cfe/dphy.c @@ -0,0 +1,181 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * RP1 CSI-2 Driver + * + * Copyright (c) 2021-2024 Raspberry Pi Ltd. + * Copyright (c) 2023-2024 Ideas on Board Oy + */ + +#include +#include + +#include "dphy.h" + +#define dphy_dbg(dphy, fmt, arg...) dev_dbg((dphy)->dev, fmt, ##arg) +#define dphy_err(dphy, fmt, arg...) dev_err((dphy)->dev, fmt, ##arg) + +/* DW dphy Host registers */ +#define DPHY_VERSION 0x000 +#define DPHY_N_LANES 0x004 +#define DPHY_RESETN 0x008 +#define DPHY_PHY_SHUTDOWNZ 0x040 +#define DPHY_PHY_RSTZ 0x044 +#define DPHY_PHY_RX 0x048 +#define DPHY_PHY_STOPSTATE 0x04c +#define DPHY_PHY_TST_CTRL0 0x050 +#define DPHY_PHY_TST_CTRL1 0x054 +#define DPHY_PHY2_TST_CTRL0 0x058 +#define DPHY_PHY2_TST_CTRL1 0x05c + +/* DW dphy Host Transactions */ +#define DPHY_HS_RX_CTRL_LANE0_OFFSET 0x44 +#define DPHY_PLL_INPUT_DIV_OFFSET 0x17 +#define DPHY_PLL_LOOP_DIV_OFFSET 0x18 +#define DPHY_PLL_DIV_CTRL_OFFSET 0x19 + +static u32 dw_csi2_host_read(struct dphy_data *dphy, u32 offset) +{ + return readl(dphy->base + offset); +} + +static void dw_csi2_host_write(struct dphy_data *dphy, u32 offset, u32 data) +{ + writel(data, dphy->base + offset); +} + +static void set_tstclr(struct dphy_data *dphy, u32 val) +{ + u32 ctrl0 = dw_csi2_host_read(dphy, DPHY_PHY_TST_CTRL0); + + dw_csi2_host_write(dphy, DPHY_PHY_TST_CTRL0, (ctrl0 & ~1) | val); +} + +static void set_tstclk(struct dphy_data *dphy, u32 val) +{ + u32 ctrl0 = dw_csi2_host_read(dphy, DPHY_PHY_TST_CTRL0); + + dw_csi2_host_write(dphy, DPHY_PHY_TST_CTRL0, (ctrl0 & ~2) | (val << 1)); +} + +static uint8_t get_tstdout(struct dphy_data *dphy) +{ + u32 ctrl1 = dw_csi2_host_read(dphy, DPHY_PHY_TST_CTRL1); + + return ((ctrl1 >> 8) & 0xff); +} + +static void set_testen(struct dphy_data *dphy, u32 val) +{ + u32 ctrl1 = dw_csi2_host_read(dphy, DPHY_PHY_TST_CTRL1); + + dw_csi2_host_write(dphy, DPHY_PHY_TST_CTRL1, + (ctrl1 & ~(1 << 16)) | (val << 16)); +} + +static void set_testdin(struct dphy_data *dphy, u32 val) +{ + u32 ctrl1 = dw_csi2_host_read(dphy, DPHY_PHY_TST_CTRL1); + + dw_csi2_host_write(dphy, DPHY_PHY_TST_CTRL1, (ctrl1 & ~0xff) | val); +} + +static uint8_t dphy_transaction(struct dphy_data *dphy, u8 test_code, + uint8_t test_data) +{ + /* See page 101 of the MIPI DPHY databook. */ + set_tstclk(dphy, 1); + set_testen(dphy, 0); + set_testdin(dphy, test_code); + set_testen(dphy, 1); + set_tstclk(dphy, 0); + set_testen(dphy, 0); + set_testdin(dphy, test_data); + set_tstclk(dphy, 1); + return get_tstdout(dphy); +} + +static void dphy_set_hsfreqrange(struct dphy_data *dphy, uint32_t mbps) +{ + /* See Table 5-1 on page 65 of dphy databook */ + static const u16 hsfreqrange_table[][2] = { + { 89, 0b000000 }, { 99, 0b010000 }, { 109, 0b100000 }, + { 129, 0b000001 }, { 139, 0b010001 }, { 149, 0b100001 }, + { 169, 0b000010 }, { 179, 0b010010 }, { 199, 0b100010 }, + { 219, 0b000011 }, { 239, 0b010011 }, { 249, 0b100011 }, + { 269, 0b000100 }, { 299, 0b010100 }, { 329, 0b000101 }, + { 359, 0b010101 }, { 399, 0b100101 }, { 449, 0b000110 }, + { 499, 0b010110 }, { 549, 0b000111 }, { 599, 0b010111 }, + { 649, 0b001000 }, { 699, 0b011000 }, { 749, 0b001001 }, + { 799, 0b011001 }, { 849, 0b101001 }, { 899, 0b111001 }, + { 949, 0b001010 }, { 999, 0b011010 }, { 1049, 0b101010 }, + { 1099, 0b111010 }, { 1149, 0b001011 }, { 1199, 0b011011 }, + { 1249, 0b101011 }, { 1299, 0b111011 }, { 1349, 0b001100 }, + { 1399, 0b011100 }, { 1449, 0b101100 }, { 1500, 0b111100 }, + }; + unsigned int i; + + if (mbps < 80 || mbps > 1500) + dphy_err(dphy, "DPHY: Datarate %u Mbps out of range\n", mbps); + + for (i = 0; i < ARRAY_SIZE(hsfreqrange_table) - 1; i++) { + if (mbps <= hsfreqrange_table[i][0]) + break; + } + + dphy_transaction(dphy, DPHY_HS_RX_CTRL_LANE0_OFFSET, + hsfreqrange_table[i][1] << 1); +} + +static void dphy_init(struct dphy_data *dphy) +{ + dw_csi2_host_write(dphy, DPHY_PHY_RSTZ, 0); + dw_csi2_host_write(dphy, DPHY_PHY_SHUTDOWNZ, 0); + set_tstclk(dphy, 1); + set_testen(dphy, 0); + set_tstclr(dphy, 1); + usleep_range(15, 20); + set_tstclr(dphy, 0); + usleep_range(15, 20); + + dphy_set_hsfreqrange(dphy, dphy->dphy_rate); + + usleep_range(5, 10); + dw_csi2_host_write(dphy, DPHY_PHY_SHUTDOWNZ, 1); + usleep_range(5, 10); + dw_csi2_host_write(dphy, DPHY_PHY_RSTZ, 1); +} + +void dphy_start(struct dphy_data *dphy) +{ + dphy_dbg(dphy, "%s: Link rate %u Mbps, %u data lanes\n", __func__, + dphy->dphy_rate, dphy->active_lanes); + + dw_csi2_host_write(dphy, DPHY_N_LANES, (dphy->active_lanes - 1)); + dphy_init(dphy); + dw_csi2_host_write(dphy, DPHY_RESETN, 0xffffffff); + usleep_range(10, 50); +} + +void dphy_stop(struct dphy_data *dphy) +{ + dphy_dbg(dphy, "%s\n", __func__); + + /* Set only one lane (lane 0) as active (ON) */ + dw_csi2_host_write(dphy, DPHY_N_LANES, 0); + dw_csi2_host_write(dphy, DPHY_RESETN, 0); +} + +void dphy_probe(struct dphy_data *dphy) +{ + u32 host_ver; + u8 host_ver_major, host_ver_minor; + + host_ver = dw_csi2_host_read(dphy, DPHY_VERSION); + host_ver_major = (u8)((host_ver >> 24) - '0'); + host_ver_minor = (u8)((host_ver >> 16) - '0'); + host_ver_minor = host_ver_minor * 10; + host_ver_minor += (u8)((host_ver >> 8) - '0'); + + dphy_dbg(dphy, "DW dphy Host HW v%u.%u\n", host_ver_major, + host_ver_minor); +} diff --git a/drivers/media/platform/raspberrypi/rp1-cfe/dphy.h b/drivers/media/platform/raspberrypi/rp1-cfe/dphy.h new file mode 100644 index 000000000000..84fa370957cc --- /dev/null +++ b/drivers/media/platform/raspberrypi/rp1-cfe/dphy.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2021-2024 Raspberry Pi Ltd. + * Copyright (c) 2023-2024 Ideas on Board Oy + */ + +#ifndef _RP1_DPHY_ +#define _RP1_DPHY_ + +#include +#include + +struct dphy_data { + struct device *dev; + + void __iomem *base; + + u32 dphy_rate; + u32 max_lanes; + u32 active_lanes; +}; + +void dphy_probe(struct dphy_data *dphy); +void dphy_start(struct dphy_data *dphy); +void dphy_stop(struct dphy_data *dphy); + +#endif diff --git a/drivers/media/platform/raspberrypi/rp1-cfe/pisp-fe.c b/drivers/media/platform/raspberrypi/rp1-cfe/pisp-fe.c new file mode 100644 index 000000000000..05762b1be2bc --- /dev/null +++ b/drivers/media/platform/raspberrypi/rp1-cfe/pisp-fe.c @@ -0,0 +1,605 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * PiSP Front End Driver + * + * Copyright (c) 2021-2024 Raspberry Pi Ltd. + */ + +#include +#include +#include +#include +#include + +#include + +#include "cfe.h" +#include "pisp-fe.h" + +#include "cfe-trace.h" + +#define FE_VERSION 0x000 +#define FE_CONTROL 0x004 +#define FE_STATUS 0x008 +#define FE_FRAME_STATUS 0x00c +#define FE_ERROR_STATUS 0x010 +#define FE_OUTPUT_STATUS 0x014 +#define FE_INT_EN 0x018 +#define FE_INT_STATUS 0x01c + +/* CONTROL */ +#define FE_CONTROL_QUEUE BIT(0) +#define FE_CONTROL_ABORT BIT(1) +#define FE_CONTROL_RESET BIT(2) +#define FE_CONTROL_LATCH_REGS BIT(3) + +/* INT_EN / INT_STATUS */ +#define FE_INT_EOF BIT(0) +#define FE_INT_SOF BIT(1) +#define FE_INT_LINES0 BIT(8) +#define FE_INT_LINES1 BIT(9) +#define FE_INT_STATS BIT(16) +#define FE_INT_QREADY BIT(24) + +/* STATUS */ +#define FE_STATUS_QUEUED BIT(0) +#define FE_STATUS_WAITING BIT(1) +#define FE_STATUS_ACTIVE BIT(2) + +#define PISP_FE_CONFIG_BASE_OFFSET 0x0040 + +#define PISP_FE_ENABLE_STATS_CLUSTER \ + (PISP_FE_ENABLE_STATS_CROP | PISP_FE_ENABLE_DECIMATE | \ + PISP_FE_ENABLE_BLC | PISP_FE_ENABLE_CDAF_STATS | \ + PISP_FE_ENABLE_AWB_STATS | PISP_FE_ENABLE_RGBY | \ + PISP_FE_ENABLE_LSC | PISP_FE_ENABLE_AGC_STATS) + +#define PISP_FE_ENABLE_OUTPUT_CLUSTER(i) \ + ((PISP_FE_ENABLE_CROP0 | PISP_FE_ENABLE_DOWNSCALE0 | \ + PISP_FE_ENABLE_COMPRESS0 | PISP_FE_ENABLE_OUTPUT0) << (4 * (i))) + +struct pisp_fe_config_param { + u32 dirty_flags; + u32 dirty_flags_extra; + size_t offset; + size_t size; +}; + +static const struct pisp_fe_config_param pisp_fe_config_map[] = { + /* *_dirty_flag_extra types */ + { 0, PISP_FE_DIRTY_GLOBAL, + offsetof(struct pisp_fe_config, global), + sizeof(struct pisp_fe_global_config) }, + { 0, PISP_FE_DIRTY_FLOATING, + offsetof(struct pisp_fe_config, floating_stats), + sizeof(struct pisp_fe_floating_stats_config) }, + { 0, PISP_FE_DIRTY_OUTPUT_AXI, + offsetof(struct pisp_fe_config, output_axi), + sizeof(struct pisp_fe_output_axi_config) }, + /* *_dirty_flag types */ + { PISP_FE_ENABLE_INPUT, 0, + offsetof(struct pisp_fe_config, input), + sizeof(struct pisp_fe_input_config) }, + { PISP_FE_ENABLE_DECOMPRESS, 0, + offsetof(struct pisp_fe_config, decompress), + sizeof(struct pisp_decompress_config) }, + { PISP_FE_ENABLE_DECOMPAND, 0, + offsetof(struct pisp_fe_config, decompand), + sizeof(struct pisp_fe_decompand_config) }, + { PISP_FE_ENABLE_BLA, 0, + offsetof(struct pisp_fe_config, bla), + sizeof(struct pisp_bla_config) }, + { PISP_FE_ENABLE_DPC, 0, + offsetof(struct pisp_fe_config, dpc), + sizeof(struct pisp_fe_dpc_config) }, + { PISP_FE_ENABLE_STATS_CROP, 0, + offsetof(struct pisp_fe_config, stats_crop), + sizeof(struct pisp_fe_crop_config) }, + { PISP_FE_ENABLE_BLC, 0, + offsetof(struct pisp_fe_config, blc), + sizeof(struct pisp_bla_config) }, + { PISP_FE_ENABLE_CDAF_STATS, 0, + offsetof(struct pisp_fe_config, cdaf_stats), + sizeof(struct pisp_fe_cdaf_stats_config) }, + { PISP_FE_ENABLE_AWB_STATS, 0, + offsetof(struct pisp_fe_config, awb_stats), + sizeof(struct pisp_fe_awb_stats_config) }, + { PISP_FE_ENABLE_RGBY, 0, + offsetof(struct pisp_fe_config, rgby), + sizeof(struct pisp_fe_rgby_config) }, + { PISP_FE_ENABLE_LSC, 0, + offsetof(struct pisp_fe_config, lsc), + sizeof(struct pisp_fe_lsc_config) }, + { PISP_FE_ENABLE_AGC_STATS, 0, + offsetof(struct pisp_fe_config, agc_stats), + sizeof(struct pisp_agc_statistics) }, + { PISP_FE_ENABLE_CROP0, 0, + offsetof(struct pisp_fe_config, ch[0].crop), + sizeof(struct pisp_fe_crop_config) }, + { PISP_FE_ENABLE_DOWNSCALE0, 0, + offsetof(struct pisp_fe_config, ch[0].downscale), + sizeof(struct pisp_fe_downscale_config) }, + { PISP_FE_ENABLE_COMPRESS0, 0, + offsetof(struct pisp_fe_config, ch[0].compress), + sizeof(struct pisp_compress_config) }, + { PISP_FE_ENABLE_OUTPUT0, 0, + offsetof(struct pisp_fe_config, ch[0].output), + sizeof(struct pisp_fe_output_config) }, + { PISP_FE_ENABLE_CROP1, 0, + offsetof(struct pisp_fe_config, ch[1].crop), + sizeof(struct pisp_fe_crop_config) }, + { PISP_FE_ENABLE_DOWNSCALE1, 0, + offsetof(struct pisp_fe_config, ch[1].downscale), + sizeof(struct pisp_fe_downscale_config) }, + { PISP_FE_ENABLE_COMPRESS1, 0, + offsetof(struct pisp_fe_config, ch[1].compress), + sizeof(struct pisp_compress_config) }, + { PISP_FE_ENABLE_OUTPUT1, 0, + offsetof(struct pisp_fe_config, ch[1].output), + sizeof(struct pisp_fe_output_config) }, +}; + +#define pisp_fe_dbg(fe, fmt, arg...) dev_dbg((fe)->v4l2_dev->dev, fmt, ##arg) +#define pisp_fe_info(fe, fmt, arg...) dev_info((fe)->v4l2_dev->dev, fmt, ##arg) +#define pisp_fe_err(fe, fmt, arg...) dev_err((fe)->v4l2_dev->dev, fmt, ##arg) + +static inline u32 pisp_fe_reg_read(struct pisp_fe_device *fe, u32 offset) +{ + return readl(fe->base + offset); +} + +static inline void pisp_fe_reg_write(struct pisp_fe_device *fe, u32 offset, + u32 val) +{ + writel(val, fe->base + offset); +} + +static inline void pisp_fe_reg_write_relaxed(struct pisp_fe_device *fe, + u32 offset, u32 val) +{ + writel_relaxed(val, fe->base + offset); +} + +static int pisp_fe_regs_show(struct seq_file *s, void *data) +{ + struct pisp_fe_device *fe = s->private; + int ret; + + ret = pm_runtime_resume_and_get(fe->v4l2_dev->dev); + if (ret) + return ret; + + pisp_fe_reg_write(fe, FE_CONTROL, FE_CONTROL_LATCH_REGS); + +#define DUMP(reg) seq_printf(s, #reg " \t0x%08x\n", pisp_fe_reg_read(fe, reg)) + DUMP(FE_VERSION); + DUMP(FE_CONTROL); + DUMP(FE_STATUS); + DUMP(FE_FRAME_STATUS); + DUMP(FE_ERROR_STATUS); + DUMP(FE_OUTPUT_STATUS); + DUMP(FE_INT_EN); + DUMP(FE_INT_STATUS); +#undef DUMP + + pm_runtime_put(fe->v4l2_dev->dev); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(pisp_fe_regs); + +static void pisp_fe_config_write(struct pisp_fe_device *fe, + struct pisp_fe_config *config, + unsigned int start_offset, unsigned int size) +{ + const unsigned int max_offset = + offsetof(struct pisp_fe_config, ch[PISP_FE_NUM_OUTPUTS]); + unsigned int end_offset; + u32 *cfg = (u32 *)config; + + start_offset = min(start_offset, max_offset); + end_offset = min(start_offset + size, max_offset); + + cfg += start_offset >> 2; + for (unsigned int i = start_offset; i < end_offset; i += 4, cfg++) + pisp_fe_reg_write_relaxed(fe, PISP_FE_CONFIG_BASE_OFFSET + i, + *cfg); +} + +void pisp_fe_isr(struct pisp_fe_device *fe, bool *sof, bool *eof) +{ + u32 status, int_status, out_status, frame_status, error_status; + + pisp_fe_reg_write(fe, FE_CONTROL, FE_CONTROL_LATCH_REGS); + status = pisp_fe_reg_read(fe, FE_STATUS); + out_status = pisp_fe_reg_read(fe, FE_OUTPUT_STATUS); + frame_status = pisp_fe_reg_read(fe, FE_FRAME_STATUS); + error_status = pisp_fe_reg_read(fe, FE_ERROR_STATUS); + + int_status = pisp_fe_reg_read(fe, FE_INT_STATUS); + pisp_fe_reg_write(fe, FE_INT_STATUS, int_status); + + trace_fe_irq(status, out_status, frame_status, error_status, + int_status); + + /* We do not report interrupts for the input/stream pad. */ + for (unsigned int i = 0; i < FE_NUM_PADS - 1; i++) { + sof[i] = !!(int_status & FE_INT_SOF); + eof[i] = !!(int_status & FE_INT_EOF); + } +} + +static bool pisp_fe_validate_output(struct pisp_fe_config const *cfg, + unsigned int c, struct v4l2_format const *f) +{ + unsigned int wbytes; + + wbytes = cfg->ch[c].output.format.width; + if (cfg->ch[c].output.format.format & PISP_IMAGE_FORMAT_BPS_MASK) + wbytes *= 2; + + /* Check output image dimensions are nonzero and not too big */ + if (cfg->ch[c].output.format.width < 2 || + cfg->ch[c].output.format.height < 2 || + cfg->ch[c].output.format.height > f->fmt.pix.height || + cfg->ch[c].output.format.stride > f->fmt.pix.bytesperline || + wbytes > f->fmt.pix.bytesperline) + return false; + + /* Check for zero-sized crops, which could cause lockup */ + if ((cfg->global.enables & PISP_FE_ENABLE_CROP(c)) && + ((cfg->ch[c].crop.offset_x >= (cfg->input.format.width & ~1) || + cfg->ch[c].crop.offset_y >= cfg->input.format.height || + cfg->ch[c].crop.width < 2 || cfg->ch[c].crop.height < 2))) + return false; + + if ((cfg->global.enables & PISP_FE_ENABLE_DOWNSCALE(c)) && + (cfg->ch[c].downscale.output_width < 2 || + cfg->ch[c].downscale.output_height < 2)) + return false; + + return true; +} + +static bool pisp_fe_validate_stats(struct pisp_fe_config const *cfg) +{ + /* Check for zero-sized crop, which could cause lockup */ + return (!(cfg->global.enables & PISP_FE_ENABLE_STATS_CROP) || + (cfg->stats_crop.offset_x < (cfg->input.format.width & ~1) && + cfg->stats_crop.offset_y < cfg->input.format.height && + cfg->stats_crop.width >= 2 && cfg->stats_crop.height >= 2)); +} + +int pisp_fe_validate_config(struct pisp_fe_device *fe, + struct pisp_fe_config *cfg, + struct v4l2_format const *f0, + struct v4l2_format const *f1) +{ + /* + * Check the input is enabled, streaming and has nonzero size; + * to avoid cases where the hardware might lock up or try to + * read inputs from memory (which this driver doesn't support). + */ + if (!(cfg->global.enables & PISP_FE_ENABLE_INPUT) || + cfg->input.streaming != 1 || cfg->input.format.width < 2 || + cfg->input.format.height < 2) { + pisp_fe_err(fe, "%s: Input config not valid", __func__); + return -EINVAL; + } + + for (unsigned int i = 0; i < PISP_FE_NUM_OUTPUTS; i++) { + if (!(cfg->global.enables & PISP_FE_ENABLE_OUTPUT(i))) { + if (cfg->global.enables & + PISP_FE_ENABLE_OUTPUT_CLUSTER(i)) { + pisp_fe_err(fe, "%s: Output %u not valid", + __func__, i); + return -EINVAL; + } + continue; + } + + if (!pisp_fe_validate_output(cfg, i, i ? f1 : f0)) + return -EINVAL; + } + + if ((cfg->global.enables & PISP_FE_ENABLE_STATS_CLUSTER) && + !pisp_fe_validate_stats(cfg)) { + pisp_fe_err(fe, "%s: Stats config not valid", __func__); + return -EINVAL; + } + + return 0; +} + +void pisp_fe_submit_job(struct pisp_fe_device *fe, struct vb2_buffer **vb2_bufs, + struct pisp_fe_config *cfg) +{ + u64 addr; + u32 status; + + /* + * Check output buffers exist and outputs are correctly configured. + * If valid, set the buffer's DMA address; otherwise disable. + */ + for (unsigned int i = 0; i < PISP_FE_NUM_OUTPUTS; i++) { + struct vb2_buffer *buf = vb2_bufs[FE_OUTPUT0_PAD + i]; + + if (!(cfg->global.enables & PISP_FE_ENABLE_OUTPUT(i))) + continue; + + addr = vb2_dma_contig_plane_dma_addr(buf, 0); + cfg->output_buffer[i].addr_lo = addr & 0xffffffff; + cfg->output_buffer[i].addr_hi = addr >> 32; + } + + if (vb2_bufs[FE_STATS_PAD]) { + addr = vb2_dma_contig_plane_dma_addr(vb2_bufs[FE_STATS_PAD], 0); + cfg->stats_buffer.addr_lo = addr & 0xffffffff; + cfg->stats_buffer.addr_hi = addr >> 32; + } + + /* Set up ILINES interrupts 3/4 of the way down each output */ + cfg->ch[0].output.ilines = + max(0x80u, (3u * cfg->ch[0].output.format.height) >> 2); + cfg->ch[1].output.ilines = + max(0x80u, (3u * cfg->ch[1].output.format.height) >> 2); + + /* + * The hardware must have consumed the previous config by now. + * This read of status also serves as a memory barrier before the + * sequence of relaxed writes which follow. + */ + status = pisp_fe_reg_read(fe, FE_STATUS); + if (WARN_ON(status & FE_STATUS_QUEUED)) + return; + + /* + * Unconditionally write buffers, global and input parameters. + * Write cropping and output parameters whenever they are enabled. + * Selectively write other parameters that have been marked as + * changed through the dirty flags. + */ + pisp_fe_config_write(fe, cfg, 0, + offsetof(struct pisp_fe_config, decompress)); + cfg->dirty_flags_extra &= ~PISP_FE_DIRTY_GLOBAL; + cfg->dirty_flags &= ~PISP_FE_ENABLE_INPUT; + cfg->dirty_flags |= (cfg->global.enables & + (PISP_FE_ENABLE_STATS_CROP | + PISP_FE_ENABLE_OUTPUT_CLUSTER(0) | + PISP_FE_ENABLE_OUTPUT_CLUSTER(1))); + for (unsigned int i = 0; i < ARRAY_SIZE(pisp_fe_config_map); i++) { + const struct pisp_fe_config_param *p = &pisp_fe_config_map[i]; + + if (cfg->dirty_flags & p->dirty_flags || + cfg->dirty_flags_extra & p->dirty_flags_extra) + pisp_fe_config_write(fe, cfg, p->offset, p->size); + } + + /* This final non-relaxed write serves as a memory barrier */ + pisp_fe_reg_write(fe, FE_CONTROL, FE_CONTROL_QUEUE); +} + +void pisp_fe_start(struct pisp_fe_device *fe) +{ + pisp_fe_reg_write(fe, FE_CONTROL, FE_CONTROL_RESET); + pisp_fe_reg_write(fe, FE_INT_STATUS, ~0); + pisp_fe_reg_write(fe, FE_INT_EN, FE_INT_EOF | FE_INT_SOF | + FE_INT_LINES0 | FE_INT_LINES1); + fe->inframe_count = 0; +} + +void pisp_fe_stop(struct pisp_fe_device *fe) +{ + pisp_fe_reg_write(fe, FE_INT_EN, 0); + pisp_fe_reg_write(fe, FE_CONTROL, FE_CONTROL_ABORT); + usleep_range(1000, 2000); + WARN_ON(pisp_fe_reg_read(fe, FE_STATUS)); + pisp_fe_reg_write(fe, FE_INT_STATUS, ~0); +} + +static int pisp_fe_init_state(struct v4l2_subdev *sd, + struct v4l2_subdev_state *state) +{ + struct v4l2_mbus_framefmt *fmt; + + fmt = v4l2_subdev_state_get_format(state, FE_STREAM_PAD); + *fmt = cfe_default_format; + fmt->code = MEDIA_BUS_FMT_SRGGB16_1X16; + + fmt = v4l2_subdev_state_get_format(state, FE_CONFIG_PAD); + fmt->code = MEDIA_BUS_FMT_FIXED; + fmt->width = sizeof(struct pisp_fe_config); + fmt->height = 1; + + fmt = v4l2_subdev_state_get_format(state, FE_OUTPUT0_PAD); + *fmt = cfe_default_format; + fmt->code = MEDIA_BUS_FMT_SRGGB16_1X16; + + fmt = v4l2_subdev_state_get_format(state, FE_OUTPUT1_PAD); + *fmt = cfe_default_format; + fmt->code = MEDIA_BUS_FMT_SRGGB16_1X16; + + fmt = v4l2_subdev_state_get_format(state, FE_STATS_PAD); + fmt->code = MEDIA_BUS_FMT_FIXED; + fmt->width = sizeof(struct pisp_statistics); + fmt->height = 1; + + return 0; +} + +static int pisp_fe_pad_set_fmt(struct v4l2_subdev *sd, + struct v4l2_subdev_state *state, + struct v4l2_subdev_format *format) +{ + struct v4l2_mbus_framefmt *fmt; + const struct cfe_fmt *cfe_fmt; + + /* TODO: format propagation to source pads */ + /* TODO: format validation */ + + switch (format->pad) { + case FE_STREAM_PAD: + cfe_fmt = find_format_by_code(format->format.code); + if (!cfe_fmt || !(cfe_fmt->flags & CFE_FORMAT_FLAG_FE_OUT)) + cfe_fmt = find_format_by_code(MEDIA_BUS_FMT_SRGGB16_1X16); + + format->format.code = cfe_fmt->code; + format->format.field = V4L2_FIELD_NONE; + + fmt = v4l2_subdev_state_get_format(state, FE_STREAM_PAD); + *fmt = format->format; + + fmt = v4l2_subdev_state_get_format(state, FE_OUTPUT0_PAD); + *fmt = format->format; + + fmt = v4l2_subdev_state_get_format(state, FE_OUTPUT1_PAD); + *fmt = format->format; + + return 0; + + case FE_OUTPUT0_PAD: + case FE_OUTPUT1_PAD: { + /* + * TODO: we should allow scaling and cropping by allowing the + * user to set the size here. + */ + struct v4l2_mbus_framefmt *sink_fmt, *source_fmt; + u32 sink_code; + u32 code; + + cfe_fmt = find_format_by_code(format->format.code); + if (!cfe_fmt || !(cfe_fmt->flags & CFE_FORMAT_FLAG_FE_OUT)) + cfe_fmt = find_format_by_code(MEDIA_BUS_FMT_SRGGB16_1X16); + + format->format.code = cfe_fmt->code; + + sink_fmt = v4l2_subdev_state_get_format(state, FE_STREAM_PAD); + if (!sink_fmt) + return -EINVAL; + + source_fmt = v4l2_subdev_state_get_format(state, format->pad); + if (!source_fmt) + return -EINVAL; + + sink_code = sink_fmt->code; + code = format->format.code; + + /* + * If the source code from the user does not match the code in + * the sink pad, check that the source code matches the + * compressed version of the sink code. + */ + + if (code != sink_code && + code == cfe_find_compressed_code(sink_code)) + source_fmt->code = code; + + return 0; + } + + case FE_CONFIG_PAD: + case FE_STATS_PAD: + default: + return v4l2_subdev_get_fmt(sd, state, format); + } +} + +static const struct v4l2_subdev_pad_ops pisp_fe_subdev_pad_ops = { + .get_fmt = v4l2_subdev_get_fmt, + .set_fmt = pisp_fe_pad_set_fmt, + .link_validate = v4l2_subdev_link_validate_default, +}; + +static int pisp_fe_link_validate(struct media_link *link) +{ + struct v4l2_subdev *sd = media_entity_to_v4l2_subdev(link->sink->entity); + struct pisp_fe_device *fe = container_of(sd, struct pisp_fe_device, sd); + + pisp_fe_dbg(fe, "%s: link \"%s\":%u -> \"%s\":%u\n", __func__, + link->source->entity->name, link->source->index, + link->sink->entity->name, link->sink->index); + + if (link->sink->index == FE_STREAM_PAD) + return v4l2_subdev_link_validate(link); + + if (link->sink->index == FE_CONFIG_PAD) + return 0; + + return -EINVAL; +} + +static const struct media_entity_operations pisp_fe_entity_ops = { + .link_validate = pisp_fe_link_validate, +}; + +static const struct v4l2_subdev_ops pisp_fe_subdev_ops = { + .pad = &pisp_fe_subdev_pad_ops, +}; + +static const struct v4l2_subdev_internal_ops pisp_fe_internal_ops = { + .init_state = pisp_fe_init_state, +}; + +int pisp_fe_init(struct pisp_fe_device *fe, struct dentry *debugfs) +{ + int ret; + + debugfs_create_file("fe_regs", 0440, debugfs, fe, &pisp_fe_regs_fops); + + fe->hw_revision = pisp_fe_reg_read(fe, FE_VERSION); + pisp_fe_info(fe, "PiSP FE HW v%u.%u\n", + (fe->hw_revision >> 24) & 0xff, + (fe->hw_revision >> 20) & 0x0f); + + fe->pad[FE_STREAM_PAD].flags = + MEDIA_PAD_FL_SINK | MEDIA_PAD_FL_MUST_CONNECT; + fe->pad[FE_CONFIG_PAD].flags = MEDIA_PAD_FL_SINK; + fe->pad[FE_OUTPUT0_PAD].flags = MEDIA_PAD_FL_SOURCE; + fe->pad[FE_OUTPUT1_PAD].flags = MEDIA_PAD_FL_SOURCE; + fe->pad[FE_STATS_PAD].flags = MEDIA_PAD_FL_SOURCE; + + ret = media_entity_pads_init(&fe->sd.entity, ARRAY_SIZE(fe->pad), + fe->pad); + if (ret) + return ret; + + /* Initialize subdev */ + v4l2_subdev_init(&fe->sd, &pisp_fe_subdev_ops); + fe->sd.internal_ops = &pisp_fe_internal_ops; + fe->sd.entity.function = MEDIA_ENT_F_PROC_VIDEO_SCALER; + fe->sd.entity.ops = &pisp_fe_entity_ops; + fe->sd.entity.name = "pisp-fe"; + fe->sd.flags = V4L2_SUBDEV_FL_HAS_DEVNODE; + fe->sd.owner = THIS_MODULE; + snprintf(fe->sd.name, sizeof(fe->sd.name), "pisp-fe"); + + ret = v4l2_subdev_init_finalize(&fe->sd); + if (ret) + goto err_entity_cleanup; + + ret = v4l2_device_register_subdev(fe->v4l2_dev, &fe->sd); + if (ret) { + pisp_fe_err(fe, "Failed register pisp fe subdev (%d)\n", ret); + goto err_subdev_cleanup; + } + + /* Must be in IDLE state (STATUS == 0) here. */ + WARN_ON(pisp_fe_reg_read(fe, FE_STATUS)); + + return 0; + +err_subdev_cleanup: + v4l2_subdev_cleanup(&fe->sd); +err_entity_cleanup: + media_entity_cleanup(&fe->sd.entity); + + return ret; +} + +void pisp_fe_uninit(struct pisp_fe_device *fe) +{ + v4l2_device_unregister_subdev(&fe->sd); + v4l2_subdev_cleanup(&fe->sd); + media_entity_cleanup(&fe->sd.entity); +} diff --git a/drivers/media/platform/raspberrypi/rp1-cfe/pisp-fe.h b/drivers/media/platform/raspberrypi/rp1-cfe/pisp-fe.h new file mode 100644 index 000000000000..54d506e19cf2 --- /dev/null +++ b/drivers/media/platform/raspberrypi/rp1-cfe/pisp-fe.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * PiSP Front End Driver + * + * Copyright (c) 2021-2024 Raspberry Pi Ltd. + */ +#ifndef _PISP_FE_H_ +#define _PISP_FE_H_ + +#include +#include +#include +#include + +#include +#include +#include + +#include + +enum pisp_fe_pads { + FE_STREAM_PAD, + FE_CONFIG_PAD, + FE_OUTPUT0_PAD, + FE_OUTPUT1_PAD, + FE_STATS_PAD, + FE_NUM_PADS +}; + +struct pisp_fe_device { + /* Parent V4l2 device */ + struct v4l2_device *v4l2_dev; + void __iomem *base; + u32 hw_revision; + + u16 inframe_count; + struct media_pad pad[FE_NUM_PADS]; + struct v4l2_subdev sd; +}; + +void pisp_fe_isr(struct pisp_fe_device *fe, bool *sof, bool *eof); +int pisp_fe_validate_config(struct pisp_fe_device *fe, + struct pisp_fe_config *cfg, + struct v4l2_format const *f0, + struct v4l2_format const *f1); +void pisp_fe_submit_job(struct pisp_fe_device *fe, struct vb2_buffer **vb2_bufs, + struct pisp_fe_config *cfg); +void pisp_fe_start(struct pisp_fe_device *fe); +void pisp_fe_stop(struct pisp_fe_device *fe); +int pisp_fe_init(struct pisp_fe_device *fe, struct dentry *debugfs); +void pisp_fe_uninit(struct pisp_fe_device *fe); + +#endif diff --git a/include/uapi/linux/media/raspberrypi/pisp_fe_config.h b/include/uapi/linux/media/raspberrypi/pisp_fe_config.h new file mode 100644 index 000000000000..77237460a3b5 --- /dev/null +++ b/include/uapi/linux/media/raspberrypi/pisp_fe_config.h @@ -0,0 +1,273 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +/* + * RP1 PiSP Front End Driver Configuration structures + * + * Copyright (C) 2021 - Raspberry Pi Ltd. + * + */ +#ifndef _UAPI_PISP_FE_CONFIG_ +#define _UAPI_PISP_FE_CONFIG_ + +#include + +#include "pisp_common.h" +#include "pisp_fe_statistics.h" + +#define PISP_FE_NUM_OUTPUTS 2 + +enum pisp_fe_enable { + PISP_FE_ENABLE_INPUT = 0x000001, + PISP_FE_ENABLE_DECOMPRESS = 0x000002, + PISP_FE_ENABLE_DECOMPAND = 0x000004, + PISP_FE_ENABLE_BLA = 0x000008, + PISP_FE_ENABLE_DPC = 0x000010, + PISP_FE_ENABLE_STATS_CROP = 0x000020, + PISP_FE_ENABLE_DECIMATE = 0x000040, + PISP_FE_ENABLE_BLC = 0x000080, + PISP_FE_ENABLE_CDAF_STATS = 0x000100, + PISP_FE_ENABLE_AWB_STATS = 0x000200, + PISP_FE_ENABLE_RGBY = 0x000400, + PISP_FE_ENABLE_LSC = 0x000800, + PISP_FE_ENABLE_AGC_STATS = 0x001000, + PISP_FE_ENABLE_CROP0 = 0x010000, + PISP_FE_ENABLE_DOWNSCALE0 = 0x020000, + PISP_FE_ENABLE_COMPRESS0 = 0x040000, + PISP_FE_ENABLE_OUTPUT0 = 0x080000, + PISP_FE_ENABLE_CROP1 = 0x100000, + PISP_FE_ENABLE_DOWNSCALE1 = 0x200000, + PISP_FE_ENABLE_COMPRESS1 = 0x400000, + PISP_FE_ENABLE_OUTPUT1 = 0x800000 +}; + +#define PISP_FE_ENABLE_CROP(i) (PISP_FE_ENABLE_CROP0 << (4 * (i))) +#define PISP_FE_ENABLE_DOWNSCALE(i) (PISP_FE_ENABLE_DOWNSCALE0 << (4 * (i))) +#define PISP_FE_ENABLE_COMPRESS(i) (PISP_FE_ENABLE_COMPRESS0 << (4 * (i))) +#define PISP_FE_ENABLE_OUTPUT(i) (PISP_FE_ENABLE_OUTPUT0 << (4 * (i))) + +/* + * We use the enable flags to show when blocks are "dirty", but we need some + * extra ones too. + */ +enum pisp_fe_dirty { + PISP_FE_DIRTY_GLOBAL = 0x0001, + PISP_FE_DIRTY_FLOATING = 0x0002, + PISP_FE_DIRTY_OUTPUT_AXI = 0x0004 +}; + +struct pisp_fe_global_config { + __u32 enables; + __u8 bayer_order; + __u8 pad[3]; +} __attribute__((packed)); + +struct pisp_fe_input_axi_config { + /* burst length minus one, in the range 0..15; OR'd with flags */ + __u8 maxlen_flags; + /* { prot[2:0], cache[3:0] } fields */ + __u8 cache_prot; + /* QoS (only 4 LS bits are used) */ + __u16 qos; +} __attribute__((packed)); + +struct pisp_fe_output_axi_config { + /* burst length minus one, in the range 0..15; OR'd with flags */ + __u8 maxlen_flags; + /* { prot[2:0], cache[3:0] } fields */ + __u8 cache_prot; + /* QoS (4 bitfields of 4 bits each for different panic levels) */ + __u16 qos; + /* For Panic mode: Output FIFO panic threshold */ + __u16 thresh; + /* For Panic mode: Output FIFO statistics throttle threshold */ + __u16 throttle; +} __attribute__((packed)); + +struct pisp_fe_input_config { + __u8 streaming; + __u8 pad[3]; + struct pisp_image_format_config format; + struct pisp_fe_input_axi_config axi; + /* Extra cycles delay before issuing each burst request */ + __u8 holdoff; + __u8 pad2[3]; +} __attribute__((packed)); + +struct pisp_fe_output_config { + struct pisp_image_format_config format; + __u16 ilines; + __u8 pad[2]; +} __attribute__((packed)); + +struct pisp_fe_input_buffer_config { + __u32 addr_lo; + __u32 addr_hi; + __u16 frame_id; + __u16 pad; +} __attribute__((packed)); + +#define PISP_FE_DECOMPAND_LUT_SIZE 65 + +struct pisp_fe_decompand_config { + __u16 lut[PISP_FE_DECOMPAND_LUT_SIZE]; + __u16 pad; +} __attribute__((packed)); + +struct pisp_fe_dpc_config { + __u8 coeff_level; + __u8 coeff_range; + __u8 coeff_range2; +#define PISP_FE_DPC_FLAG_FOLDBACK 1 +#define PISP_FE_DPC_FLAG_VFLAG 2 + __u8 flags; +} __attribute__((packed)); + +#define PISP_FE_LSC_LUT_SIZE 16 + +struct pisp_fe_lsc_config { + __u8 shift; + __u8 pad0; + __u16 scale; + __u16 centre_x; + __u16 centre_y; + __u16 lut[PISP_FE_LSC_LUT_SIZE]; +} __attribute__((packed)); + +struct pisp_fe_rgby_config { + __u16 gain_r; + __u16 gain_g; + __u16 gain_b; + __u8 maxflag; + __u8 pad; +} __attribute__((packed)); + +struct pisp_fe_agc_stats_config { + __u16 offset_x; + __u16 offset_y; + __u16 size_x; + __u16 size_y; + /* each weight only 4 bits */ + __u8 weights[PISP_AGC_STATS_NUM_ZONES / 2]; + __u16 row_offset_x; + __u16 row_offset_y; + __u16 row_size_x; + __u16 row_size_y; + __u8 row_shift; + __u8 float_shift; + __u8 pad1[2]; +} __attribute__((packed)); + +struct pisp_fe_awb_stats_config { + __u16 offset_x; + __u16 offset_y; + __u16 size_x; + __u16 size_y; + __u8 shift; + __u8 pad[3]; + __u16 r_lo; + __u16 r_hi; + __u16 g_lo; + __u16 g_hi; + __u16 b_lo; + __u16 b_hi; +} __attribute__((packed)); + +struct pisp_fe_floating_stats_region { + __u16 offset_x; + __u16 offset_y; + __u16 size_x; + __u16 size_y; +} __attribute__((packed)); + +struct pisp_fe_floating_stats_config { + struct pisp_fe_floating_stats_region + regions[PISP_FLOATING_STATS_NUM_ZONES]; +} __attribute__((packed)); + +#define PISP_FE_CDAF_NUM_WEIGHTS 8 + +struct pisp_fe_cdaf_stats_config { + __u16 noise_constant; + __u16 noise_slope; + __u16 offset_x; + __u16 offset_y; + __u16 size_x; + __u16 size_y; + __u16 skip_x; + __u16 skip_y; + __u32 mode; +} __attribute__((packed)); + +struct pisp_fe_stats_buffer_config { + __u32 addr_lo; + __u32 addr_hi; +} __attribute__((packed)); + +struct pisp_fe_crop_config { + __u16 offset_x; + __u16 offset_y; + __u16 width; + __u16 height; +} __attribute__((packed)); + +enum pisp_fe_downscale_flags { + /* downscale the four Bayer components independently... */ + DOWNSCALE_BAYER = 1, + /* ...without trying to preserve their spatial relationship */ + DOWNSCALE_BIN = 2, +}; + +struct pisp_fe_downscale_config { + __u8 xin; + __u8 xout; + __u8 yin; + __u8 yout; + __u8 flags; /* enum pisp_fe_downscale_flags */ + __u8 pad[3]; + __u16 output_width; + __u16 output_height; +} __attribute__((packed)); + +struct pisp_fe_output_buffer_config { + __u32 addr_lo; + __u32 addr_hi; +} __attribute__((packed)); + +/* Each of the two output channels/branches: */ +struct pisp_fe_output_branch_config { + struct pisp_fe_crop_config crop; + struct pisp_fe_downscale_config downscale; + struct pisp_compress_config compress; + struct pisp_fe_output_config output; + __u32 pad; +} __attribute__((packed)); + +/* And finally one to rule them all: */ +struct pisp_fe_config { + /* I/O configuration: */ + struct pisp_fe_stats_buffer_config stats_buffer; + struct pisp_fe_output_buffer_config output_buffer[PISP_FE_NUM_OUTPUTS]; + struct pisp_fe_input_buffer_config input_buffer; + /* processing configuration: */ + struct pisp_fe_global_config global; + struct pisp_fe_input_config input; + struct pisp_decompress_config decompress; + struct pisp_fe_decompand_config decompand; + struct pisp_bla_config bla; + struct pisp_fe_dpc_config dpc; + struct pisp_fe_crop_config stats_crop; + __u32 spare1; /* placeholder for future decimate configuration */ + struct pisp_bla_config blc; + struct pisp_fe_rgby_config rgby; + struct pisp_fe_lsc_config lsc; + struct pisp_fe_agc_stats_config agc_stats; + struct pisp_fe_awb_stats_config awb_stats; + struct pisp_fe_cdaf_stats_config cdaf_stats; + struct pisp_fe_floating_stats_config floating_stats; + struct pisp_fe_output_axi_config output_axi; + struct pisp_fe_output_branch_config ch[PISP_FE_NUM_OUTPUTS]; + /* non-register fields: */ + __u32 dirty_flags; /* these use pisp_fe_enable */ + __u32 dirty_flags_extra; /* these use pisp_fe_dirty */ +} __attribute__((packed)); + +#endif /* _UAPI_PISP_FE_CONFIG_ */ diff --git a/include/uapi/linux/media/raspberrypi/pisp_fe_statistics.h b/include/uapi/linux/media/raspberrypi/pisp_fe_statistics.h new file mode 100644 index 000000000000..a7d42985aee8 --- /dev/null +++ b/include/uapi/linux/media/raspberrypi/pisp_fe_statistics.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +/* + * RP1 PiSP Front End statistics definitions + * + * Copyright (C) 2021 - Raspberry Pi Ltd. + * + */ +#ifndef _UAPI_PISP_FE_STATISTICS_H_ +#define _UAPI_PISP_FE_STATISTICS_H_ + +#include + +#define PISP_FLOATING_STATS_NUM_ZONES 4 +#define PISP_AGC_STATS_NUM_BINS 1024 +#define PISP_AGC_STATS_SIZE 16 +#define PISP_AGC_STATS_NUM_ZONES (PISP_AGC_STATS_SIZE * PISP_AGC_STATS_SIZE) +#define PISP_AGC_STATS_NUM_ROW_SUMS 512 + +struct pisp_agc_statistics_zone { + __u64 Y_sum; + __u32 counted; + __u32 pad; +} __attribute__((packed)); + +struct pisp_agc_statistics { + __u32 row_sums[PISP_AGC_STATS_NUM_ROW_SUMS]; + /* + * 32-bits per bin means an image (just less than) 16384x16384 pixels + * in size can weight every pixel from 0 to 15. + */ + __u32 histogram[PISP_AGC_STATS_NUM_BINS]; + struct pisp_agc_statistics_zone floating[PISP_FLOATING_STATS_NUM_ZONES]; +} __attribute__((packed)); + +#define PISP_AWB_STATS_SIZE 32 +#define PISP_AWB_STATS_NUM_ZONES (PISP_AWB_STATS_SIZE * PISP_AWB_STATS_SIZE) + +struct pisp_awb_statistics_zone { + __u32 R_sum; + __u32 G_sum; + __u32 B_sum; + __u32 counted; +} __attribute__((packed)); + +struct pisp_awb_statistics { + struct pisp_awb_statistics_zone zones[PISP_AWB_STATS_NUM_ZONES]; + struct pisp_awb_statistics_zone floating[PISP_FLOATING_STATS_NUM_ZONES]; +} __attribute__((packed)); + +#define PISP_CDAF_STATS_SIZE 8 +#define PISP_CDAF_STATS_NUM_FOMS (PISP_CDAF_STATS_SIZE * PISP_CDAF_STATS_SIZE) + +struct pisp_cdaf_statistics { + __u64 foms[PISP_CDAF_STATS_NUM_FOMS]; + __u64 floating[PISP_FLOATING_STATS_NUM_ZONES]; +} __attribute__((packed)); + +struct pisp_statistics { + struct pisp_awb_statistics awb; + struct pisp_agc_statistics agc; + struct pisp_cdaf_statistics cdaf; +} __attribute__((packed)); + +#endif /* _UAPI_PISP_FE_STATISTICS_H_ */ -- cgit v1.2.3 From 30fe661eb9d32d0aa65a86507572bd654a374c56 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 17 Sep 2024 15:14:44 +0300 Subject: media: Documentation: Deprecate s_stream video op, update docs The scope of the s_stream video operation is now fully supported by {enable,disable}_streams. Explicitly document the s_stream() op as deprecated and update the related documentation. Signed-off-by: Sakari Ailus Reviewed-by: Laurent Pinchart Reviewed-by: Tomi Valkeinen Signed-off-by: Mauro Carvalho Chehab --- Documentation/driver-api/media/camera-sensor.rst | 8 ++++---- Documentation/driver-api/media/tx-rx.rst | 13 ++++++++----- include/media/v4l2-subdev.h | 5 +++-- 3 files changed, 15 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/Documentation/driver-api/media/camera-sensor.rst b/Documentation/driver-api/media/camera-sensor.rst index b4920b34cebc..c290833165e6 100644 --- a/Documentation/driver-api/media/camera-sensor.rst +++ b/Documentation/driver-api/media/camera-sensor.rst @@ -81,10 +81,10 @@ restart when the system is resumed. This requires coordination between the camera sensor and the rest of the camera pipeline. Bridge drivers are responsible for this coordination, and instruct camera sensors to stop and restart streaming by calling the appropriate subdev operations -(``.s_stream()``, ``.enable_streams()`` or ``.disable_streams()``). Camera -sensor drivers shall therefore **not** keep track of the streaming state to -stop streaming in the PM suspend handler and restart it in the resume handler. -Drivers should in general not implement the system PM handlers. +(``.enable_streams()`` or ``.disable_streams()``). Camera sensor drivers shall +therefore **not** keep track of the streaming state to stop streaming in the PM +suspend handler and restart it in the resume handler. Drivers should in general +not implement the system PM handlers. Camera sensor drivers shall **not** implement the subdev ``.s_power()`` operation, as it is deprecated. While this operation is implemented in some diff --git a/Documentation/driver-api/media/tx-rx.rst b/Documentation/driver-api/media/tx-rx.rst index 29d66a47b56e..dd09484df1d3 100644 --- a/Documentation/driver-api/media/tx-rx.rst +++ b/Documentation/driver-api/media/tx-rx.rst @@ -49,11 +49,14 @@ Link frequency The :ref:`V4L2_CID_LINK_FREQ ` control is used to tell the receiver the frequency of the bus (i.e. it is not the same as the symbol rate). -``.s_stream()`` callback -^^^^^^^^^^^^^^^^^^^^^^^^ +``.enable_streams()`` and ``.disable_streams()`` callbacks +^^^^^^^^^^^^^^^^^^^^^^^^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The struct struct v4l2_subdev_video_ops->s_stream() callback is used by the -receiver driver to control the transmitter driver's streaming state. +The struct v4l2_subdev_pad_ops->enable_streams() and struct +v4l2_subdev_pad_ops->disable_streams() callbacks are used by the receiver driver +to control the transmitter driver's streaming state. These callbacks may not be +called directly, but by using ``v4l2_subdev_enable_streams()`` and +``v4l2_subdev_disable_streams()``. CSI-2 transmitter drivers @@ -127,7 +130,7 @@ Stopping the transmitter ^^^^^^^^^^^^^^^^^^^^^^^^ A transmitter stops sending the stream of images as a result of -calling the ``.s_stream()`` callback. Some transmitters may stop the +calling the ``.disable_streams()`` callback. Some transmitters may stop the stream at a frame boundary whereas others stop immediately, effectively leaving the current frame unfinished. The receiver driver should not make assumptions either way, but function properly in both diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h index 8daa0929865c..3cc6b4a5935f 100644 --- a/include/media/v4l2-subdev.h +++ b/include/media/v4l2-subdev.h @@ -450,8 +450,9 @@ enum v4l2_subdev_pre_streamon_flags { * already started or stopped subdev. Also see call_s_stream wrapper in * v4l2-subdev.c. * - * New drivers should instead implement &v4l2_subdev_pad_ops.enable_streams - * and &v4l2_subdev_pad_ops.disable_streams operations, and use + * This callback is DEPRECATED. New drivers should instead implement + * &v4l2_subdev_pad_ops.enable_streams and + * &v4l2_subdev_pad_ops.disable_streams operations, and use * v4l2_subdev_s_stream_helper for the &v4l2_subdev_video_ops.s_stream * operation to support legacy users. * -- cgit v1.2.3 From 646aed96d51088c1f24b4d710fa6fa4a17ecc484 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 17 Sep 2024 15:31:47 +0300 Subject: media: Documentation: Update {enable,disable}_streams documentation Document the expected {enable,disable}_streams callback behaviour for drivers that are stream-unaware i.e. don't specify the V4L2_SUBDEV_CAP_STREAMS sub-device capability flag. In this specific case, the mask argument can be ignored. Signed-off-by: Sakari Ailus Reviewed-by: Tomi Valkeinen Reviewed-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab --- include/media/v4l2-subdev.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h index 3cc6b4a5935f..ff63fb6046b1 100644 --- a/include/media/v4l2-subdev.h +++ b/include/media/v4l2-subdev.h @@ -834,11 +834,19 @@ struct v4l2_subdev_state { * v4l2_subdev_init_finalize() at initialization time). Do not call * directly, use v4l2_subdev_enable_streams() instead. * + * Drivers that support only a single stream without setting the + * V4L2_SUBDEV_CAP_STREAMS sub-device capability flag can ignore the mask + * argument. + * * @disable_streams: Disable the streams defined in streams_mask on the given * source pad. Subdevs that implement this operation must use the active * state management provided by the subdev core (enabled through a call to * v4l2_subdev_init_finalize() at initialization time). Do not call * directly, use v4l2_subdev_disable_streams() instead. + * + * Drivers that support only a single stream without setting the + * V4L2_SUBDEV_CAP_STREAMS sub-device capability flag can ignore the mask + * argument. */ struct v4l2_subdev_pad_ops { int (*enum_mbus_code)(struct v4l2_subdev *sd, -- cgit v1.2.3 From fd0e579bc62cb1766469866a89471d2d8c8b721d Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 17 Sep 2024 17:58:58 +0300 Subject: media: Documentation: Improve v4l2_subdev_{en,dis}able_streams documentation Document that callers of v4l2_subdev_{en,dis}able_streams() need to set the mask to BIT_ULL(0). Signed-off-by: Sakari Ailus Reviewed-by: Tomi Valkeinen Reviewed-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab --- include/media/v4l2-subdev.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h index ff63fb6046b1..2f2200875b03 100644 --- a/include/media/v4l2-subdev.h +++ b/include/media/v4l2-subdev.h @@ -1685,6 +1685,8 @@ int v4l2_subdev_routing_validate(struct v4l2_subdev *sd, * function implements a best-effort compatibility by calling the .s_stream() * operation, limited to subdevs that have a single source pad. * + * Drivers that are not stream-aware shall set @streams_mask to BIT_ULL(0). + * * Return: * * 0: Success * * -EALREADY: One of the streams in streams_mask is already enabled @@ -1715,6 +1717,8 @@ int v4l2_subdev_enable_streams(struct v4l2_subdev *sd, u32 pad, * function implements a best-effort compatibility by calling the .s_stream() * operation, limited to subdevs that have a single source pad. * + * Drivers that are not stream-aware shall set @streams_mask to BIT_ULL(0). + * * Return: * * 0: Success * * -EALREADY: One of the streams in streams_mask is not enabled -- cgit v1.2.3 From 719258c55f7e9c123ef2a0941c6730b6d43c1bc2 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 9 Sep 2024 15:49:41 +0200 Subject: mfd: palmas: Constify strings with regulator names The names of regulators are static const strings, so pointers can be made as pointers to const for code safety. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240909134941.121847-1-krzysztof.kozlowski@linaro.org Signed-off-by: Lee Jones --- include/linux/mfd/palmas.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mfd/palmas.h b/include/linux/mfd/palmas.h index eda1ffd99c1a..dabcc0dea802 100644 --- a/include/linux/mfd/palmas.h +++ b/include/linux/mfd/palmas.h @@ -98,8 +98,8 @@ struct palmas_sleep_requestor_info { }; struct palmas_regs_info { - char *name; - char *sname; + const char *name; + const char *sname; u8 vsel_addr; u8 ctrl_addr; u8 tstep_addr; -- cgit v1.2.3 From d5340a18cd321075442677ce1002d8e163b17b67 Mon Sep 17 00:00:00 2001 From: Dzmitry Sankouski Date: Thu, 26 Sep 2024 13:28:10 +0300 Subject: mfd: max77693: Remove unused max77693_irq_source declarations Remove `enum max77693_irq_source` declaration because unused. Signed-off-by: Dzmitry Sankouski Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240617-starqltechn_integration_upstream-v5-1-125d9228d751@gmail.com Signed-off-by: Lee Jones --- include/linux/mfd/max77693-private.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include') diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h index 20c5e02ed9da..c324d548619e 100644 --- a/include/linux/mfd/max77693-private.h +++ b/include/linux/mfd/max77693-private.h @@ -419,17 +419,6 @@ enum max77693_haptic_reg { #define MAX77693_CONFIG2_MEN 6 #define MAX77693_CONFIG2_HTYP 5 -enum max77693_irq_source { - LED_INT = 0, - TOPSYS_INT, - CHG_INT, - MUIC_INT1, - MUIC_INT2, - MUIC_INT3, - - MAX77693_IRQ_GROUP_NR, -}; - #define SRC_IRQ_CHARGER BIT(0) #define SRC_IRQ_TOP BIT(1) #define SRC_IRQ_FLASH BIT(2) -- cgit v1.2.3 From bf231e5febcf9358d7e70a2c6974548f7f3e4f61 Mon Sep 17 00:00:00 2001 From: Dzmitry Sankouski Date: Thu, 26 Sep 2024 12:47:31 +0300 Subject: mfd: sec-core: Add support for the Samsung s2dos05 S2DOS05 is a panel/touchscreen PMIC, often found in Samsung phones. We define regulator sub-device for which driver will be added in subsequent patch. The device also has ADC for power and current measurements. Signed-off-by: Dzmitry Sankouski Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240617-starqltechn_integration_upstream-v5-2-ea1109029ba5@gmail.com Signed-off-by: Lee Jones --- drivers/mfd/sec-core.c | 11 +++++++++++ include/linux/mfd/samsung/core.h | 1 + 2 files changed, 12 insertions(+) (limited to 'include') diff --git a/drivers/mfd/sec-core.c b/drivers/mfd/sec-core.c index a6b0d7300b2d..cdfe738e1d76 100644 --- a/drivers/mfd/sec-core.c +++ b/drivers/mfd/sec-core.c @@ -34,6 +34,10 @@ static const struct mfd_cell s5m8767_devs[] = { }, }; +static const struct mfd_cell s2dos05_devs[] = { + { .name = "s2dos05-regulator", }, +}; + static const struct mfd_cell s2mps11_devs[] = { { .name = "s2mps11-regulator", }, { .name = "s2mps14-rtc", }, @@ -83,6 +87,9 @@ static const struct of_device_id sec_dt_match[] = { { .compatible = "samsung,s5m8767-pmic", .data = (void *)S5M8767X, + }, { + .compatible = "samsung,s2dos05", + .data = (void *)S2DOS05, }, { .compatible = "samsung,s2mps11-pmic", .data = (void *)S2MPS11X, @@ -339,6 +346,10 @@ static int sec_pmic_probe(struct i2c_client *i2c) sec_devs = s5m8767_devs; num_sec_devs = ARRAY_SIZE(s5m8767_devs); break; + case S2DOS05: + sec_devs = s2dos05_devs; + num_sec_devs = ARRAY_SIZE(s2dos05_devs); + break; case S2MPA01: sec_devs = s2mpa01_devs; num_sec_devs = ARRAY_SIZE(s2mpa01_devs); diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h index a212b9f72bc9..750274d41fc0 100644 --- a/include/linux/mfd/samsung/core.h +++ b/include/linux/mfd/samsung/core.h @@ -37,6 +37,7 @@ struct gpio_desc; enum sec_device_type { S5M8767X, + S2DOS05, S2MPA01, S2MPS11X, S2MPS13X, -- cgit v1.2.3 From 2b627246c0cf2b54f1b17531dde964cf5b99b2a5 Mon Sep 17 00:00:00 2001 From: Pavan Holla Date: Tue, 10 Sep 2024 10:15:21 +0000 Subject: platform/chrome: Update EC feature flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Define EC_FEATURE_UCSI_PPM to enable usage of the cros_ec_ucsi driver. Also, add any feature flags that are implemented by the EC but are missing in the kernel header. Signed-off-by: Pavan Holla Signed-off-by: Łukasz Bartosik Acked-by: Tzung-Bi Shih Link: https://lore.kernel.org/r/20240910101527.603452-3-ukaszb@chromium.org Signed-off-by: Lee Jones --- include/linux/platform_data/cros_ec_commands.h | 32 ++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include') diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index e574b790be6f..b3c4993e656e 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -1312,6 +1312,38 @@ enum ec_feature_code { * The EC supports the AP composing VDMs for us to send. */ EC_FEATURE_TYPEC_AP_VDM_SEND = 46, + /* + * The EC supports system safe mode panic recovery. + */ + EC_FEATURE_SYSTEM_SAFE_MODE = 47, + /* + * The EC will reboot on runtime assertion failures. + */ + EC_FEATURE_ASSERT_REBOOTS = 48, + /* + * The EC image is built with tokenized logging enabled. + */ + EC_FEATURE_TOKENIZED_LOGGING = 49, + /* + * The EC supports triggering an STB dump. + */ + EC_FEATURE_AMD_STB_DUMP = 50, + /* + * The EC supports memory dump commands. + */ + EC_FEATURE_MEMORY_DUMP = 51, + /* + * The EC supports DP2.1 capability + */ + EC_FEATURE_TYPEC_DP2_1 = 52, + /* + * The MCU is System Companion Processor Core 1 + */ + EC_FEATURE_SCP_C1 = 53, + /* + * The EC supports UCSI PPM. + */ + EC_FEATURE_UCSI_PPM = 54, }; #define EC_FEATURE_MASK_0(event_code) BIT(event_code % 32) -- cgit v1.2.3 From 7b5a58952fc3b51905c2963647485565df1e5e26 Mon Sep 17 00:00:00 2001 From: Akash Kumar Date: Fri, 27 Sep 2024 20:51:38 +0530 Subject: usb: gadget: uvc: configfs: Add frame-based frame format support Add support for frame-based frame format, which can be used to support multiple formats like H264 or H265, in addition to MJPEG and YUV frames. The frame-based format is set to H264 by default, but it can be updated to other formats by modifying the GUID through the guid configfs attribute. Different structures are used for all three formats, as H264 has a different structure compared to MJPEG and uncompressed formats. These structures will be passed to the frame make function based on the active format, using a common frame structure with additional parameters needed only for frame-based formats. These parameters are handled at runtime in the UVC driver. Signed-off-by: Akash Kumar Link: https://lore.kernel.org/r/20240927152138.31416-1-quic_akakum@quicinc.com Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/configfs-usb-gadget-uvc | 64 ++++ drivers/usb/gadget/function/uvc_configfs.c | 348 +++++++++++++++++++++- drivers/usb/gadget/function/uvc_configfs.h | 16 + drivers/usb/gadget/function/uvc_v4l2.c | 11 +- include/uapi/linux/usb/video.h | 58 ++++ 5 files changed, 485 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/Documentation/ABI/testing/configfs-usb-gadget-uvc b/Documentation/ABI/testing/configfs-usb-gadget-uvc index 4feb692c4c1d..b6720768d63d 100644 --- a/Documentation/ABI/testing/configfs-usb-gadget-uvc +++ b/Documentation/ABI/testing/configfs-usb-gadget-uvc @@ -342,6 +342,70 @@ Description: Specific uncompressed frame descriptors support ========================= ===================================== +What: /config/usb-gadget/gadget/functions/uvc.name/streaming/framebased +Date: Sept 2024 +KernelVersion: 5.15 +Description: Framebased format descriptors + +What: /config/usb-gadget/gadget/functions/uvc.name/streaming/framebased/name +Date: Sept 2024 +KernelVersion: 5.15 +Description: Specific framebased format descriptors + + ================== ======================================= + bFormatIndex unique id for this format descriptor; + only defined after parent header is + linked into the streaming class; + read-only + bmaControls this format's data for bmaControls in + the streaming header + bmInterlaceFlags specifies interlace information, + read-only + bAspectRatioY the X dimension of the picture aspect + ratio, read-only + bAspectRatioX the Y dimension of the picture aspect + ratio, read-only + bDefaultFrameIndex optimum frame index for this stream + bBitsPerPixel number of bits per pixel used to + specify color in the decoded video + frame + guidFormat globally unique id used to identify + stream-encoding format + ================== ======================================= + +What: /config/usb-gadget/gadget/functions/uvc.name/streaming/framebased/name/name +Date: Sept 2024 +KernelVersion: 5.15 +Description: Specific framebased frame descriptors + + ========================= ===================================== + bFrameIndex unique id for this framedescriptor; + only defined after parent format is + linked into the streaming header; + read-only + dwFrameInterval indicates how frame interval can be + programmed; a number of values + separated by newline can be specified + dwDefaultFrameInterval the frame interval the device would + like to use as default + dwBytesPerLine Specifies the number of bytes per line + of video for packed fixed frame size + formats, allowing the receiver to + perform stride alignment of the video. + If the bVariableSize value (above) is + TRUE (1), or if the format does not + permit such alignment, this value shall + be set to zero (0). + dwMaxBitRate the maximum bit rate at the shortest + frame interval in bps + dwMinBitRate the minimum bit rate at the longest + frame interval in bps + wHeight height of decoded bitmap frame in px + wWidth width of decoded bitmam frame in px + bmCapabilities still image support, fixed frame-rate + support + ========================= ===================================== + What: /config/usb-gadget/gadget/functions/uvc.name/streaming/header Date: Dec 2014 KernelVersion: 4.0 diff --git a/drivers/usb/gadget/function/uvc_configfs.c b/drivers/usb/gadget/function/uvc_configfs.c index 6fac696ea846..f131943254a4 100644 --- a/drivers/usb/gadget/function/uvc_configfs.c +++ b/drivers/usb/gadget/function/uvc_configfs.c @@ -1566,11 +1566,13 @@ static const struct uvcg_config_group_type uvcg_control_grp_type = { /* ----------------------------------------------------------------------------- * streaming/uncompressed * streaming/mjpeg + * streaming/framebased */ static const char * const uvcg_format_names[] = { "uncompressed", "mjpeg", + "framebased", }; static struct uvcg_color_matching * @@ -1777,6 +1779,9 @@ static int uvcg_streaming_header_allow_link(struct config_item *src, target_fmt = container_of(to_config_group(target), struct uvcg_format, group); + if (!target_fmt) + goto out; + uvcg_format_set_indices(to_config_group(target)); format_ptr = kzalloc(sizeof(*format_ptr), GFP_KERNEL); @@ -1816,6 +1821,9 @@ static void uvcg_streaming_header_drop_link(struct config_item *src, target_fmt = container_of(to_config_group(target), struct uvcg_format, group); + if (!target_fmt) + goto out; + list_for_each_entry_safe(format_ptr, tmp, &src_hdr->formats, entry) if (format_ptr->fmt == target_fmt) { list_del(&format_ptr->entry); @@ -1826,6 +1834,7 @@ static void uvcg_streaming_header_drop_link(struct config_item *src, --target_fmt->linked; +out: mutex_unlock(&opts->lock); mutex_unlock(su_mutex); } @@ -2022,6 +2031,7 @@ UVCG_FRAME_ATTR(dw_min_bit_rate, dwMinBitRate, 32); UVCG_FRAME_ATTR(dw_max_bit_rate, dwMaxBitRate, 32); UVCG_FRAME_ATTR(dw_max_video_frame_buffer_size, dwMaxVideoFrameBufferSize, 32); UVCG_FRAME_ATTR(dw_default_frame_interval, dwDefaultFrameInterval, 32); +UVCG_FRAME_ATTR(dw_bytes_perline, dwBytesPerLine, 32); #undef UVCG_FRAME_ATTR @@ -2035,7 +2045,7 @@ static ssize_t uvcg_frame_dw_frame_interval_show(struct config_item *item, int result, i; char *pg = page; - mutex_lock(su_mutex); /* for navigating configfs hierarchy */ + mutex_lock(su_mutex); /* for navigating configfs hierarchy */ opts_item = frm->item.ci_parent->ci_parent->ci_parent->ci_parent; opts = to_f_uvc_opts(opts_item); @@ -2105,7 +2115,7 @@ end: UVC_ATTR(uvcg_frame_, dw_frame_interval, dwFrameInterval); -static struct configfs_attribute *uvcg_frame_attrs[] = { +static struct configfs_attribute *uvcg_frame_attrs1[] = { &uvcg_frame_attr_b_frame_index, &uvcg_frame_attr_bm_capabilities, &uvcg_frame_attr_w_width, @@ -2118,12 +2128,31 @@ static struct configfs_attribute *uvcg_frame_attrs[] = { NULL, }; -static const struct config_item_type uvcg_frame_type = { +static struct configfs_attribute *uvcg_frame_attrs2[] = { + &uvcg_frame_attr_b_frame_index, + &uvcg_frame_attr_bm_capabilities, + &uvcg_frame_attr_w_width, + &uvcg_frame_attr_w_height, + &uvcg_frame_attr_dw_min_bit_rate, + &uvcg_frame_attr_dw_max_bit_rate, + &uvcg_frame_attr_dw_default_frame_interval, + &uvcg_frame_attr_dw_frame_interval, + &uvcg_frame_attr_dw_bytes_perline, + NULL, +}; + +static const struct config_item_type uvcg_frame_type1 = { .ct_item_ops = &uvcg_config_item_ops, - .ct_attrs = uvcg_frame_attrs, + .ct_attrs = uvcg_frame_attrs1, .ct_owner = THIS_MODULE, }; +static const struct config_item_type uvcg_frame_type2 = { + .ct_item_ops = &uvcg_config_item_ops, + .ct_attrs = uvcg_frame_attrs2, + .ct_owner = THIS_MODULE, +}; + static struct config_item *uvcg_frame_make(struct config_group *group, const char *name) { @@ -2145,6 +2174,7 @@ static struct config_item *uvcg_frame_make(struct config_group *group, h->frame.dw_max_bit_rate = 55296000; h->frame.dw_max_video_frame_buffer_size = 460800; h->frame.dw_default_frame_interval = 666666; + h->frame.dw_bytes_perline = 0; opts_item = group->cg_item.ci_parent->ci_parent->ci_parent; opts = to_f_uvc_opts(opts_item); @@ -2157,6 +2187,9 @@ static struct config_item *uvcg_frame_make(struct config_group *group, } else if (fmt->type == UVCG_MJPEG) { h->frame.b_descriptor_subtype = UVC_VS_FRAME_MJPEG; h->fmt_type = UVCG_MJPEG; + } else if (fmt->type == UVCG_FRAMEBASED) { + h->frame.b_descriptor_subtype = UVC_VS_FRAME_FRAME_BASED; + h->fmt_type = UVCG_FRAMEBASED; } else { mutex_unlock(&opts->lock); kfree(h); @@ -2175,7 +2208,10 @@ static struct config_item *uvcg_frame_make(struct config_group *group, ++fmt->num_frames; mutex_unlock(&opts->lock); - config_item_init_type_name(&h->item, name, &uvcg_frame_type); + if (fmt->type == UVCG_FRAMEBASED) + config_item_init_type_name(&h->item, name, &uvcg_frame_type2); + else + config_item_init_type_name(&h->item, name, &uvcg_frame_type1); return &h->item; } @@ -2215,9 +2251,6 @@ static void uvcg_format_set_indices(struct config_group *fmt) list_for_each_entry(ci, &fmt->cg_children, ci_entry) { struct uvcg_frame *frm; - if (ci->ci_type != &uvcg_frame_type) - continue; - frm = to_uvcg_frame(ci); frm->frame.b_frame_index = i++; } @@ -2677,6 +2710,251 @@ static const struct uvcg_config_group_type uvcg_mjpeg_grp_type = { .name = "mjpeg", }; +/* ----------------------------------------------------------------------------- + * streaming/framebased/ + */ + +static struct configfs_group_operations uvcg_framebased_group_ops = { + .make_item = uvcg_frame_make, + .drop_item = uvcg_frame_drop, +}; + +#define UVCG_FRAMEBASED_ATTR_RO(cname, aname, bits) \ +static ssize_t uvcg_framebased_##cname##_show(struct config_item *item, \ + char *page) \ +{ \ + struct uvcg_framebased *u = to_uvcg_framebased(item); \ + struct f_uvc_opts *opts; \ + struct config_item *opts_item; \ + struct mutex *su_mutex = &u->fmt.group.cg_subsys->su_mutex; \ + int result; \ + \ + mutex_lock(su_mutex); /* for navigating configfs hierarchy */ \ + \ + opts_item = u->fmt.group.cg_item.ci_parent->ci_parent->ci_parent;\ + opts = to_f_uvc_opts(opts_item); \ + \ + mutex_lock(&opts->lock); \ + result = sprintf(page, "%u\n", le##bits##_to_cpu(u->desc.aname));\ + mutex_unlock(&opts->lock); \ + \ + mutex_unlock(su_mutex); \ + return result; \ +} \ + \ +UVC_ATTR_RO(uvcg_framebased_, cname, aname) + +#define UVCG_FRAMEBASED_ATTR(cname, aname, bits) \ +static ssize_t uvcg_framebased_##cname##_show(struct config_item *item, \ + char *page) \ +{ \ + struct uvcg_framebased *u = to_uvcg_framebased(item); \ + struct f_uvc_opts *opts; \ + struct config_item *opts_item; \ + struct mutex *su_mutex = &u->fmt.group.cg_subsys->su_mutex; \ + int result; \ + \ + mutex_lock(su_mutex); /* for navigating configfs hierarchy */ \ + \ + opts_item = u->fmt.group.cg_item.ci_parent->ci_parent->ci_parent;\ + opts = to_f_uvc_opts(opts_item); \ + \ + mutex_lock(&opts->lock); \ + result = sprintf(page, "%u\n", le##bits##_to_cpu(u->desc.aname));\ + mutex_unlock(&opts->lock); \ + \ + mutex_unlock(su_mutex); \ + return result; \ +} \ + \ +static ssize_t \ +uvcg_framebased_##cname##_store(struct config_item *item, \ + const char *page, size_t len) \ +{ \ + struct uvcg_framebased *u = to_uvcg_framebased(item); \ + struct f_uvc_opts *opts; \ + struct config_item *opts_item; \ + struct mutex *su_mutex = &u->fmt.group.cg_subsys->su_mutex; \ + int ret; \ + u8 num; \ + \ + mutex_lock(su_mutex); /* for navigating configfs hierarchy */ \ + \ + opts_item = u->fmt.group.cg_item.ci_parent->ci_parent->ci_parent;\ + opts = to_f_uvc_opts(opts_item); \ + \ + mutex_lock(&opts->lock); \ + if (u->fmt.linked || opts->refcnt) { \ + ret = -EBUSY; \ + goto end; \ + } \ + \ + ret = kstrtou8(page, 0, &num); \ + if (ret) \ + goto end; \ + \ + if (num > 255) { \ + ret = -EINVAL; \ + goto end; \ + } \ + u->desc.aname = num; \ + ret = len; \ +end: \ + mutex_unlock(&opts->lock); \ + mutex_unlock(su_mutex); \ + return ret; \ +} \ + \ +UVC_ATTR(uvcg_framebased_, cname, aname) + +UVCG_FRAMEBASED_ATTR_RO(b_format_index, bFormatIndex, 8); +UVCG_FRAMEBASED_ATTR_RO(b_bits_per_pixel, bBitsPerPixel, 8); +UVCG_FRAMEBASED_ATTR(b_default_frame_index, bDefaultFrameIndex, 8); +UVCG_FRAMEBASED_ATTR_RO(b_aspect_ratio_x, bAspectRatioX, 8); +UVCG_FRAMEBASED_ATTR_RO(b_aspect_ratio_y, bAspectRatioY, 8); +UVCG_FRAMEBASED_ATTR_RO(bm_interface_flags, bmInterfaceFlags, 8); + +#undef UVCG_FRAMEBASED_ATTR +#undef UVCG_FRAMEBASED_ATTR_RO + +static ssize_t uvcg_framebased_guid_format_show(struct config_item *item, + char *page) +{ + struct uvcg_framebased *ch = to_uvcg_framebased(item); + struct f_uvc_opts *opts; + struct config_item *opts_item; + struct mutex *su_mutex = &ch->fmt.group.cg_subsys->su_mutex; + + mutex_lock(su_mutex); /* for navigating configfs hierarchy */ + + opts_item = ch->fmt.group.cg_item.ci_parent->ci_parent->ci_parent; + opts = to_f_uvc_opts(opts_item); + + mutex_lock(&opts->lock); + memcpy(page, ch->desc.guidFormat, sizeof(ch->desc.guidFormat)); + mutex_unlock(&opts->lock); + + mutex_unlock(su_mutex); + + return sizeof(ch->desc.guidFormat); +} + +static ssize_t uvcg_framebased_guid_format_store(struct config_item *item, + const char *page, size_t len) +{ + struct uvcg_framebased *ch = to_uvcg_framebased(item); + struct f_uvc_opts *opts; + struct config_item *opts_item; + struct mutex *su_mutex = &ch->fmt.group.cg_subsys->su_mutex; + int ret; + + mutex_lock(su_mutex); /* for navigating configfs hierarchy */ + + opts_item = ch->fmt.group.cg_item.ci_parent->ci_parent->ci_parent; + opts = to_f_uvc_opts(opts_item); + + mutex_lock(&opts->lock); + if (ch->fmt.linked || opts->refcnt) { + ret = -EBUSY; + goto end; + } + + memcpy(ch->desc.guidFormat, page, + min(sizeof(ch->desc.guidFormat), len)); + ret = sizeof(ch->desc.guidFormat); + +end: + mutex_unlock(&opts->lock); + mutex_unlock(su_mutex); + return ret; +} + +UVC_ATTR(uvcg_framebased_, guid_format, guidFormat); + +static inline ssize_t +uvcg_framebased_bma_controls_show(struct config_item *item, char *page) +{ + struct uvcg_framebased *u = to_uvcg_framebased(item); + + return uvcg_format_bma_controls_show(&u->fmt, page); +} + +static inline ssize_t +uvcg_framebased_bma_controls_store(struct config_item *item, + const char *page, size_t len) +{ + struct uvcg_framebased *u = to_uvcg_framebased(item); + + return uvcg_format_bma_controls_store(&u->fmt, page, len); +} + +UVC_ATTR(uvcg_framebased_, bma_controls, bmaControls); + +static struct configfs_attribute *uvcg_framebased_attrs[] = { + &uvcg_framebased_attr_b_format_index, + &uvcg_framebased_attr_b_default_frame_index, + &uvcg_framebased_attr_b_bits_per_pixel, + &uvcg_framebased_attr_b_aspect_ratio_x, + &uvcg_framebased_attr_b_aspect_ratio_y, + &uvcg_framebased_attr_bm_interface_flags, + &uvcg_framebased_attr_bma_controls, + &uvcg_framebased_attr_guid_format, + NULL, +}; + +static const struct config_item_type uvcg_framebased_type = { + .ct_item_ops = &uvcg_config_item_ops, + .ct_group_ops = &uvcg_framebased_group_ops, + .ct_attrs = uvcg_framebased_attrs, + .ct_owner = THIS_MODULE, +}; + +static struct config_group *uvcg_framebased_make(struct config_group *group, + const char *name) +{ + static char guid[] = { /*Declear frame based as H264 format*/ + 'H', '2', '6', '4', 0x00, 0x00, 0x10, 0x00, + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71 + }; + struct uvcg_framebased *h; + + h = kzalloc(sizeof(*h), GFP_KERNEL); + if (!h) + return ERR_PTR(-ENOMEM); + + h->desc.bLength = UVC_DT_FORMAT_FRAMEBASED_SIZE; + h->desc.bDescriptorType = USB_DT_CS_INTERFACE; + h->desc.bDescriptorSubType = UVC_VS_FORMAT_FRAME_BASED; + memcpy(h->desc.guidFormat, guid, sizeof(guid)); + h->desc.bBitsPerPixel = 0; + h->desc.bDefaultFrameIndex = 1; + h->desc.bAspectRatioX = 0; + h->desc.bAspectRatioY = 0; + h->desc.bmInterfaceFlags = 0; + h->desc.bCopyProtect = 0; + h->desc.bVariableSize = 1; + + INIT_LIST_HEAD(&h->fmt.frames); + h->fmt.type = UVCG_FRAMEBASED; + config_group_init_type_name(&h->fmt.group, name, + &uvcg_framebased_type); + + return &h->fmt.group; +} + +static struct configfs_group_operations uvcg_framebased_grp_ops = { + .make_group = uvcg_framebased_make, +}; + +static const struct uvcg_config_group_type uvcg_framebased_grp_type = { + .type = { + .ct_item_ops = &uvcg_config_item_ops, + .ct_group_ops = &uvcg_framebased_grp_ops, + .ct_owner = THIS_MODULE, + }, + .name = "framebased", +}; + /* ----------------------------------------------------------------------------- * streaming/color_matching/default */ @@ -2912,6 +3190,7 @@ static int __uvcg_iter_strm_cls(struct uvcg_streaming_header *h, if (ret) return ret; grp = &f->fmt->group; + j = 0; list_for_each_entry(item, &grp->cg_children, ci_entry) { frm = to_uvcg_frame(item); ret = fun(frm, priv2, priv3, j++, UVCG_FRAME); @@ -2965,6 +3244,11 @@ static int __uvcg_cnt_strm(void *priv1, void *priv2, void *priv3, int n, container_of(fmt, struct uvcg_mjpeg, fmt); *size += sizeof(m->desc); + } else if (fmt->type == UVCG_FRAMEBASED) { + struct uvcg_framebased *f = + container_of(fmt, struct uvcg_framebased, fmt); + + *size += sizeof(f->desc); } else { return -EINVAL; } @@ -2975,6 +3259,11 @@ static int __uvcg_cnt_strm(void *priv1, void *priv2, void *priv3, int n, int sz = sizeof(frm->dw_frame_interval); *size += sizeof(frm->frame); + /* + * framebased has duplicate member with uncompressed and + * mjpeg, so minus it + */ + *size -= sizeof(u32); *size += frm->frame.b_frame_interval_type * sz; } break; @@ -2991,6 +3280,27 @@ static int __uvcg_cnt_strm(void *priv1, void *priv2, void *priv3, int n, return 0; } +static int __uvcg_copy_framebased_desc(void *dest, struct uvcg_frame *frm, + int sz) +{ + struct uvc_frame_framebased *desc = dest; + + desc->bLength = frm->frame.b_length; + desc->bDescriptorType = frm->frame.b_descriptor_type; + desc->bDescriptorSubType = frm->frame.b_descriptor_subtype; + desc->bFrameIndex = frm->frame.b_frame_index; + desc->bmCapabilities = frm->frame.bm_capabilities; + desc->wWidth = frm->frame.w_width; + desc->wHeight = frm->frame.w_height; + desc->dwMinBitRate = frm->frame.dw_min_bit_rate; + desc->dwMaxBitRate = frm->frame.dw_max_bit_rate; + desc->dwDefaultFrameInterval = frm->frame.dw_default_frame_interval; + desc->bFrameIntervalType = frm->frame.b_frame_interval_type; + desc->dwBytesPerLine = frm->frame.dw_bytes_perline; + + return 0; +} + /* * Fill an array of streaming descriptors. * @@ -3045,6 +3355,15 @@ static int __uvcg_fill_strm(void *priv1, void *priv2, void *priv3, int n, m->desc.bNumFrameDescriptors = fmt->num_frames; memcpy(*dest, &m->desc, sizeof(m->desc)); *dest += sizeof(m->desc); + } else if (fmt->type == UVCG_FRAMEBASED) { + struct uvcg_framebased *f = + container_of(fmt, struct uvcg_framebased, + fmt); + + f->desc.bFormatIndex = n + 1; + f->desc.bNumFrameDescriptors = fmt->num_frames; + memcpy(*dest, &f->desc, sizeof(f->desc)); + *dest += sizeof(f->desc); } else { return -EINVAL; } @@ -3054,8 +3373,11 @@ static int __uvcg_fill_strm(void *priv1, void *priv2, void *priv3, int n, struct uvcg_frame *frm = priv1; struct uvc_descriptor_header *h = *dest; - sz = sizeof(frm->frame); - memcpy(*dest, &frm->frame, sz); + sz = sizeof(frm->frame) - 4; + if (frm->fmt_type != UVCG_FRAMEBASED) + memcpy(*dest, &frm->frame, sz); + else + __uvcg_copy_framebased_desc(*dest, frm, sz); *dest += sz; sz = frm->frame.b_frame_interval_type * sizeof(*frm->dw_frame_interval); @@ -3066,7 +3388,10 @@ static int __uvcg_fill_strm(void *priv1, void *priv2, void *priv3, int n, frm->frame.b_frame_interval_type); else if (frm->fmt_type == UVCG_MJPEG) h->bLength = UVC_DT_FRAME_MJPEG_SIZE( - frm->frame.b_frame_interval_type); + frm->frame.b_frame_interval_type); + else if (frm->fmt_type == UVCG_FRAMEBASED) + h->bLength = UVC_DT_FRAME_FRAMEBASED_SIZE( + frm->frame.b_frame_interval_type); } break; case UVCG_COLOR_MATCHING: { @@ -3285,6 +3610,7 @@ static const struct uvcg_config_group_type uvcg_streaming_grp_type = { &uvcg_streaming_header_grp_type, &uvcg_uncompressed_grp_type, &uvcg_mjpeg_grp_type, + &uvcg_framebased_grp_type, &uvcg_color_matching_grp_type, &uvcg_streaming_class_grp_type, NULL, diff --git a/drivers/usb/gadget/function/uvc_configfs.h b/drivers/usb/gadget/function/uvc_configfs.h index c6a690158138..2f78cd4f396f 100644 --- a/drivers/usb/gadget/function/uvc_configfs.h +++ b/drivers/usb/gadget/function/uvc_configfs.h @@ -49,6 +49,7 @@ container_of(group_ptr, struct uvcg_color_matching, group) enum uvcg_format_type { UVCG_UNCOMPRESSED = 0, UVCG_MJPEG, + UVCG_FRAMEBASED, }; struct uvcg_format { @@ -105,6 +106,7 @@ struct uvcg_frame { u32 dw_max_video_frame_buffer_size; u32 dw_default_frame_interval; u8 b_frame_interval_type; + u32 dw_bytes_perline; } __attribute__((packed)) frame; u32 *dw_frame_interval; }; @@ -142,6 +144,20 @@ static inline struct uvcg_mjpeg *to_uvcg_mjpeg(struct config_item *item) return container_of(to_uvcg_format(item), struct uvcg_mjpeg, fmt); } +/* ----------------------------------------------------------------------------- + * streaming/framebased/ + */ + +struct uvcg_framebased { + struct uvcg_format fmt; + struct uvc_format_framebased desc; +}; + +static inline struct uvcg_framebased *to_uvcg_framebased(struct config_item *item) +{ + return container_of(to_uvcg_format(item), struct uvcg_framebased, fmt); +} + /* ----------------------------------------------------------------------------- * control/extensions/ */ diff --git a/drivers/usb/gadget/function/uvc_v4l2.c b/drivers/usb/gadget/function/uvc_v4l2.c index de1736f834e6..836b91c73f18 100644 --- a/drivers/usb/gadget/function/uvc_v4l2.c +++ b/drivers/usb/gadget/function/uvc_v4l2.c @@ -31,13 +31,22 @@ static const struct uvc_format_desc *to_uvc_format(struct uvcg_format *uformat) { char guid[16] = UVC_GUID_FORMAT_MJPEG; const struct uvc_format_desc *format; - struct uvcg_uncompressed *unc; if (uformat->type == UVCG_UNCOMPRESSED) { + struct uvcg_uncompressed *unc; + unc = to_uvcg_uncompressed(&uformat->group.cg_item); if (!unc) return ERR_PTR(-EINVAL); + memcpy(guid, unc->desc.guidFormat, sizeof(guid)); + } else if (uformat->type == UVCG_FRAMEBASED) { + struct uvcg_framebased *unc; + + unc = to_uvcg_framebased(&uformat->group.cg_item); + if (!unc) + return ERR_PTR(-EINVAL); + memcpy(guid, unc->desc.guidFormat, sizeof(guid)); } diff --git a/include/uapi/linux/usb/video.h b/include/uapi/linux/usb/video.h index 2ff0e8a3a683..526b5155e23c 100644 --- a/include/uapi/linux/usb/video.h +++ b/include/uapi/linux/usb/video.h @@ -597,5 +597,63 @@ struct UVC_FRAME_MJPEG(n) { \ __le32 dwFrameInterval[n]; \ } __attribute__ ((packed)) +/* Frame Based Payload - 3.1.1. Frame Based Video Format Descriptor */ +struct uvc_format_framebased { + __u8 bLength; + __u8 bDescriptorType; + __u8 bDescriptorSubType; + __u8 bFormatIndex; + __u8 bNumFrameDescriptors; + __u8 guidFormat[16]; + __u8 bBitsPerPixel; + __u8 bDefaultFrameIndex; + __u8 bAspectRatioX; + __u8 bAspectRatioY; + __u8 bmInterfaceFlags; + __u8 bCopyProtect; + __u8 bVariableSize; +} __attribute__((__packed__)); + +#define UVC_DT_FORMAT_FRAMEBASED_SIZE 28 + +/* Frame Based Payload - 3.1.2. Frame Based Video Frame Descriptor */ +struct uvc_frame_framebased { + __u8 bLength; + __u8 bDescriptorType; + __u8 bDescriptorSubType; + __u8 bFrameIndex; + __u8 bmCapabilities; + __u16 wWidth; + __u16 wHeight; + __u32 dwMinBitRate; + __u32 dwMaxBitRate; + __u32 dwDefaultFrameInterval; + __u8 bFrameIntervalType; + __u32 dwBytesPerLine; + __u32 dwFrameInterval[]; +} __attribute__((__packed__)); + +#define UVC_DT_FRAME_FRAMEBASED_SIZE(n) (26+4*(n)) + +#define UVC_FRAME_FRAMEBASED(n) \ + uvc_frame_framebased_##n + +#define DECLARE_UVC_FRAME_FRAMEBASED(n) \ +struct UVC_FRAME_FRAMEBASED(n) { \ + __u8 bLength; \ + __u8 bDescriptorType; \ + __u8 bDescriptorSubType; \ + __u8 bFrameIndex; \ + __u8 bmCapabilities; \ + __u16 wWidth; \ + __u16 wHeight; \ + __u32 dwMinBitRate; \ + __u32 dwMaxBitRate; \ + __u32 dwDefaultFrameInterval; \ + __u8 bFrameIntervalType; \ + __u32 dwBytesPerLine; \ + __u32 dwFrameInterval[n]; \ +} __attribute__ ((packed)) + #endif /* __LINUX_USB_VIDEO_H */ -- cgit v1.2.3 From 522249f05c5551aec9ec0ba9b6438f1ec19c138d Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 3 Oct 2024 16:29:22 +0200 Subject: fanotify: allow reporting errors on failure to open fd When working in "fd mode", fanotify_read() needs to open an fd from a dentry to report event->fd to userspace. Opening an fd from dentry can fail for several reasons. For example, when tasks are gone and we try to open their /proc files or we try to open a WRONLY file like in sysfs or when trying to open a file that was deleted on the remote network server. Add a new flag FAN_REPORT_FD_ERROR for fanotify_init(). For a group with FAN_REPORT_FD_ERROR, we will send the event with the error instead of the open fd, otherwise userspace may not get the error at all. For an overflow event, we report -EBADF to avoid confusing FAN_NOFD with -EPERM. Similarly for pidfd open errors we report either -ESRCH or the open error instead of FAN_NOPIDFD and FAN_EPIDFD. In any case, userspace will not know which file failed to open, so add a debug print for further investigation. Reported-by: Krishna Vivek Vitta Link: https://lore.kernel.org/linux-fsdevel/SI2P153MB07182F3424619EDDD1F393EED46D2@SI2P153MB0718.APCP153.PROD.OUTLOOK.COM/ Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/20241003142922.111539-1-amir73il@gmail.com --- fs/notify/fanotify/fanotify_user.c | 85 +++++++++++++++++++++----------------- include/linux/fanotify.h | 1 + include/uapi/linux/fanotify.h | 1 + 3 files changed, 50 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 9644bc72e457..8e2d43fc6f7c 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -266,13 +266,6 @@ static int create_fd(struct fsnotify_group *group, const struct path *path, group->fanotify_data.f_flags | __FMODE_NONOTIFY, current_cred()); if (IS_ERR(new_file)) { - /* - * we still send an event even if we can't open the file. this - * can happen when say tasks are gone and we try to open their - * /proc files or we try to open a WRONLY file like in sysfs - * we just send the errno to userspace since there isn't much - * else we can do. - */ put_unused_fd(client_fd); client_fd = PTR_ERR(new_file); } else { @@ -663,7 +656,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, unsigned int info_mode = FAN_GROUP_FLAG(group, FANOTIFY_INFO_MODES); unsigned int pidfd_mode = info_mode & FAN_REPORT_PIDFD; struct file *f = NULL, *pidfd_file = NULL; - int ret, pidfd = FAN_NOPIDFD, fd = FAN_NOFD; + int ret, pidfd = -ESRCH, fd = -EBADF; pr_debug("%s: group=%p event=%p\n", __func__, group, event); @@ -691,10 +684,39 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, if (!FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV) && path && path->mnt && path->dentry) { fd = create_fd(group, path, &f); - if (fd < 0) - return fd; + /* + * Opening an fd from dentry can fail for several reasons. + * For example, when tasks are gone and we try to open their + * /proc files or we try to open a WRONLY file like in sysfs + * or when trying to open a file that was deleted on the + * remote network server. + * + * For a group with FAN_REPORT_FD_ERROR, we will send the + * event with the error instead of the open fd, otherwise + * Userspace may not get the error at all. + * In any case, userspace will not know which file failed to + * open, so add a debug print for further investigation. + */ + if (fd < 0) { + pr_debug("fanotify: create_fd(%pd2) failed err=%d\n", + path->dentry, fd); + if (!FAN_GROUP_FLAG(group, FAN_REPORT_FD_ERROR)) { + /* + * Historically, we've handled EOPENSTALE in a + * special way and silently dropped such + * events. Now we have to keep it to maintain + * backward compatibility... + */ + if (fd == -EOPENSTALE) + fd = 0; + return fd; + } + } } - metadata.fd = fd; + if (FAN_GROUP_FLAG(group, FAN_REPORT_FD_ERROR)) + metadata.fd = fd; + else + metadata.fd = fd >= 0 ? fd : FAN_NOFD; if (pidfd_mode) { /* @@ -709,18 +731,16 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, * The PIDTYPE_TGID check for an event->pid is performed * preemptively in an attempt to catch out cases where the event * listener reads events after the event generating process has - * already terminated. Report FAN_NOPIDFD to the event listener - * in those cases, with all other pidfd creation errors being - * reported as FAN_EPIDFD. + * already terminated. Depending on flag FAN_REPORT_FD_ERROR, + * report either -ESRCH or FAN_NOPIDFD to the event listener in + * those cases with all other pidfd creation errors reported as + * the error code itself or as FAN_EPIDFD. */ - if (metadata.pid == 0 || - !pid_has_task(event->pid, PIDTYPE_TGID)) { - pidfd = FAN_NOPIDFD; - } else { + if (metadata.pid && pid_has_task(event->pid, PIDTYPE_TGID)) pidfd = pidfd_prepare(event->pid, 0, &pidfd_file); - if (pidfd < 0) - pidfd = FAN_EPIDFD; - } + + if (!FAN_GROUP_FLAG(group, FAN_REPORT_FD_ERROR) && pidfd < 0) + pidfd = pidfd == -ESRCH ? FAN_NOPIDFD : FAN_EPIDFD; } ret = -EFAULT; @@ -737,9 +757,6 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, buf += FAN_EVENT_METADATA_LEN; count -= FAN_EVENT_METADATA_LEN; - if (fanotify_is_perm_event(event->mask)) - FANOTIFY_PERM(event)->fd = fd; - if (info_mode) { ret = copy_info_records_to_user(event, info, info_mode, pidfd, buf, count); @@ -753,15 +770,18 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, if (pidfd_file) fd_install(pidfd, pidfd_file); + if (fanotify_is_perm_event(event->mask)) + FANOTIFY_PERM(event)->fd = fd; + return metadata.event_len; out_close_fd: - if (fd != FAN_NOFD) { + if (f) { put_unused_fd(fd); fput(f); } - if (pidfd >= 0) { + if (pidfd_file) { put_unused_fd(pidfd); fput(pidfd_file); } @@ -828,15 +848,6 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, } ret = copy_event_to_user(group, event, buf, count); - if (unlikely(ret == -EOPENSTALE)) { - /* - * We cannot report events with stale fd so drop it. - * Setting ret to 0 will continue the event loop and - * do the right thing if there are no more events to - * read (i.e. return bytes read, -EAGAIN or wait). - */ - ret = 0; - } /* * Permission events get queued to wait for response. Other @@ -845,7 +856,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, if (!fanotify_is_perm_event(event->mask)) { fsnotify_destroy_event(group, &event->fse); } else { - if (ret <= 0) { + if (ret <= 0 || FANOTIFY_PERM(event)->fd < 0) { spin_lock(&group->notification_lock); finish_permission_event(group, FANOTIFY_PERM(event), FAN_DENY, NULL); @@ -1954,7 +1965,7 @@ static int __init fanotify_user_setup(void) FANOTIFY_DEFAULT_MAX_USER_MARKS); BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS); - BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 12); + BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 13); BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 11); fanotify_mark_cache = KMEM_CACHE(fanotify_mark, diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 4f1c4f603118..89ff45bd6f01 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -36,6 +36,7 @@ #define FANOTIFY_ADMIN_INIT_FLAGS (FANOTIFY_PERM_CLASSES | \ FAN_REPORT_TID | \ FAN_REPORT_PIDFD | \ + FAN_REPORT_FD_ERROR | \ FAN_UNLIMITED_QUEUE | \ FAN_UNLIMITED_MARKS) diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index a37de58ca571..34f221d3a1b9 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -60,6 +60,7 @@ #define FAN_REPORT_DIR_FID 0x00000400 /* Report unique directory id */ #define FAN_REPORT_NAME 0x00000800 /* Report events with name */ #define FAN_REPORT_TARGET_FID 0x00001000 /* Report dirent target id */ +#define FAN_REPORT_FD_ERROR 0x00002000 /* event->fd can report error */ /* Convenience macro - FAN_REPORT_NAME requires FAN_REPORT_DIR_FID */ #define FAN_REPORT_DFID_NAME (FAN_REPORT_DIR_FID | FAN_REPORT_NAME) -- cgit v1.2.3 From d6083f040d5d8f8d748462c77e90547097df936e Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Tue, 15 Oct 2024 23:02:06 +0800 Subject: bpf: Prevent tailcall infinite loop caused by freplace There is a potential infinite loop issue that can occur when using a combination of tail calls and freplace. In an upcoming selftest, the attach target for entry_freplace of tailcall_freplace.c is subprog_tc of tc_bpf2bpf.c, while the tail call in entry_freplace leads to entry_tc. This results in an infinite loop: entry_tc -> subprog_tc -> entry_freplace --tailcall-> entry_tc. The problem arises because the tail_call_cnt in entry_freplace resets to zero each time entry_freplace is executed, causing the tail call mechanism to never terminate, eventually leading to a kernel panic. To fix this issue, the solution is twofold: 1. Prevent updating a program extended by an freplace program to a prog_array map. 2. Prevent extending a program that is already part of a prog_array map with an freplace program. This ensures that: * If a program or its subprogram has been extended by an freplace program, it can no longer be updated to a prog_array map. * If a program has been added to a prog_array map, neither it nor its subprograms can be extended by an freplace program. Moreover, an extension program should not be tailcalled. As such, return -EINVAL if the program has a type of BPF_PROG_TYPE_EXT when adding it to a prog_array map. Additionally, fix a minor code style issue by replacing eight spaces with a tab for proper formatting. Reviewed-by: Eduard Zingerman Signed-off-by: Leon Hwang Link: https://lore.kernel.org/r/20241015150207.70264-2-leon.hwang@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 17 +++++++++++++---- kernel/bpf/arraymap.c | 26 ++++++++++++++++++++++++-- kernel/bpf/core.c | 1 + kernel/bpf/syscall.c | 7 ++++--- kernel/bpf/trampoline.c | 47 +++++++++++++++++++++++++++++++++++++++-------- 5 files changed, 81 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 19d8ca8ac960..0c216e71cec7 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1292,8 +1292,12 @@ void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len); bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr); #ifdef CONFIG_BPF_JIT -int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr); -int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr); +int bpf_trampoline_link_prog(struct bpf_tramp_link *link, + struct bpf_trampoline *tr, + struct bpf_prog *tgt_prog); +int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, + struct bpf_trampoline *tr, + struct bpf_prog *tgt_prog); struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info); void bpf_trampoline_put(struct bpf_trampoline *tr); @@ -1374,12 +1378,14 @@ void bpf_jit_uncharge_modmem(u32 size); bool bpf_prog_has_trampoline(const struct bpf_prog *prog); #else static inline int bpf_trampoline_link_prog(struct bpf_tramp_link *link, - struct bpf_trampoline *tr) + struct bpf_trampoline *tr, + struct bpf_prog *tgt_prog) { return -ENOTSUPP; } static inline int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, - struct bpf_trampoline *tr) + struct bpf_trampoline *tr, + struct bpf_prog *tgt_prog) { return -ENOTSUPP; } @@ -1483,6 +1489,9 @@ struct bpf_prog_aux { bool xdp_has_frags; bool exception_cb; bool exception_boundary; + bool is_extended; /* true if extended by freplace program */ + u64 prog_array_member_cnt; /* counts how many times as member of prog_array */ + struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */ struct bpf_arena *arena; /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 79660e3fca4c..6cdbb4c33d31 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -947,22 +947,44 @@ static void *prog_fd_array_get_ptr(struct bpf_map *map, struct file *map_file, int fd) { struct bpf_prog *prog = bpf_prog_get(fd); + bool is_extended; if (IS_ERR(prog)) return prog; - if (!bpf_prog_map_compatible(map, prog)) { + if (prog->type == BPF_PROG_TYPE_EXT || + !bpf_prog_map_compatible(map, prog)) { bpf_prog_put(prog); return ERR_PTR(-EINVAL); } + mutex_lock(&prog->aux->ext_mutex); + is_extended = prog->aux->is_extended; + if (!is_extended) + prog->aux->prog_array_member_cnt++; + mutex_unlock(&prog->aux->ext_mutex); + if (is_extended) { + /* Extended prog can not be tail callee. It's to prevent a + * potential infinite loop like: + * tail callee prog entry -> tail callee prog subprog -> + * freplace prog entry --tailcall-> tail callee prog entry. + */ + bpf_prog_put(prog); + return ERR_PTR(-EBUSY); + } + return prog; } static void prog_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer) { + struct bpf_prog *prog = ptr; + + mutex_lock(&prog->aux->ext_mutex); + prog->aux->prog_array_member_cnt--; + mutex_unlock(&prog->aux->ext_mutex); /* bpf_prog is freed after one RCU or tasks trace grace period */ - bpf_prog_put(ptr); + bpf_prog_put(prog); } static u32 prog_fd_array_sys_lookup_elem(void *ptr) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 5e77c58e0601..233ea78f8f1b 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -131,6 +131,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag INIT_LIST_HEAD_RCU(&fp->aux->ksym_prefix.lnode); #endif mutex_init(&fp->aux->used_maps_mutex); + mutex_init(&fp->aux->ext_mutex); mutex_init(&fp->aux->dst_mutex); return fp; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index a8f1808a1ca5..4d04d4d9c1f3 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3214,7 +3214,8 @@ static void bpf_tracing_link_release(struct bpf_link *link) container_of(link, struct bpf_tracing_link, link.link); WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link, - tr_link->trampoline)); + tr_link->trampoline, + tr_link->tgt_prog)); bpf_trampoline_put(tr_link->trampoline); @@ -3354,7 +3355,7 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog, * in prog->aux * * - if prog->aux->dst_trampoline is NULL, the program has already been - * attached to a target and its initial target was cleared (below) + * attached to a target and its initial target was cleared (below) * * - if tgt_prog != NULL, the caller specified tgt_prog_fd + * target_btf_id using the link_create API. @@ -3429,7 +3430,7 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog, if (err) goto out_unlock; - err = bpf_trampoline_link_prog(&link->link, tr); + err = bpf_trampoline_link_prog(&link->link, tr, tgt_prog); if (err) { bpf_link_cleanup(&link_primer); link = NULL; diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index f8302a5ca400..9f36c049f4c2 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -523,7 +523,27 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog) } } -static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) +static int bpf_freplace_check_tgt_prog(struct bpf_prog *tgt_prog) +{ + struct bpf_prog_aux *aux = tgt_prog->aux; + + guard(mutex)(&aux->ext_mutex); + if (aux->prog_array_member_cnt) + /* Program extensions can not extend target prog when the target + * prog has been updated to any prog_array map as tail callee. + * It's to prevent a potential infinite loop like: + * tgt prog entry -> tgt prog subprog -> freplace prog entry + * --tailcall-> tgt prog entry. + */ + return -EBUSY; + + aux->is_extended = true; + return 0; +} + +static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, + struct bpf_trampoline *tr, + struct bpf_prog *tgt_prog) { enum bpf_tramp_prog_type kind; struct bpf_tramp_link *link_exiting; @@ -544,6 +564,9 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_tr /* Cannot attach extension if fentry/fexit are in use. */ if (cnt) return -EBUSY; + err = bpf_freplace_check_tgt_prog(tgt_prog); + if (err) + return err; tr->extension_prog = link->link.prog; return bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL, link->link.prog->bpf_func); @@ -570,17 +593,21 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_tr return err; } -int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) +int bpf_trampoline_link_prog(struct bpf_tramp_link *link, + struct bpf_trampoline *tr, + struct bpf_prog *tgt_prog) { int err; mutex_lock(&tr->mutex); - err = __bpf_trampoline_link_prog(link, tr); + err = __bpf_trampoline_link_prog(link, tr, tgt_prog); mutex_unlock(&tr->mutex); return err; } -static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) +static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, + struct bpf_trampoline *tr, + struct bpf_prog *tgt_prog) { enum bpf_tramp_prog_type kind; int err; @@ -591,6 +618,8 @@ static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_ err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, tr->extension_prog->bpf_func, NULL); tr->extension_prog = NULL; + guard(mutex)(&tgt_prog->aux->ext_mutex); + tgt_prog->aux->is_extended = false; return err; } hlist_del_init(&link->tramp_hlist); @@ -599,12 +628,14 @@ static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_ } /* bpf_trampoline_unlink_prog() should never fail. */ -int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) +int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, + struct bpf_trampoline *tr, + struct bpf_prog *tgt_prog) { int err; mutex_lock(&tr->mutex); - err = __bpf_trampoline_unlink_prog(link, tr); + err = __bpf_trampoline_unlink_prog(link, tr, tgt_prog); mutex_unlock(&tr->mutex); return err; } @@ -619,7 +650,7 @@ static void bpf_shim_tramp_link_release(struct bpf_link *link) if (!shim_link->trampoline) return; - WARN_ON_ONCE(bpf_trampoline_unlink_prog(&shim_link->link, shim_link->trampoline)); + WARN_ON_ONCE(bpf_trampoline_unlink_prog(&shim_link->link, shim_link->trampoline, NULL)); bpf_trampoline_put(shim_link->trampoline); } @@ -733,7 +764,7 @@ int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog, goto err; } - err = __bpf_trampoline_link_prog(&shim_link->link, tr); + err = __bpf_trampoline_link_prog(&shim_link->link, tr, NULL); if (err) goto err; -- cgit v1.2.3 From ce1dfe6d328966b75821c1f043a940eb2569768a Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sat, 12 Oct 2024 20:32:43 +0900 Subject: PCI: endpoint: Introduce pci_epc_mem_map()/unmap() Some endpoint controllers have requirements on the alignment of the controller physical memory address that must be used to map a RC PCI address region. For instance, the endpoint controller of the RK3399 SoC uses at most the lower 20 bits of a physical memory address region as the lower bits of a RC PCI address region. For mapping a PCI address region of size bytes starting from pci_addr, the exact number of address bits used is the number of address bits changing in the address range [pci_addr..pci_addr + size - 1]. For this example, this creates the following constraints: 1) The offset into the controller physical memory allocated for a mapping depends on the mapping size *and* the starting PCI address for the mapping. 2) A mapping size cannot exceed the controller windows size (1MB) minus the offset needed into the allocated physical memory, which can end up being a smaller size than the desired mapping size. Handling these constraints independently of the controller being used in an endpoint function driver is not possible with the current EPC API as only the ->align field in struct pci_epc_features is provided but used for BAR (inbound ATU mappings) mapping only. A new API is needed for function drivers to discover mapping constraints and handle non-static requirements based on the RC PCI address range to access. Introduce the endpoint controller operation ->align_addr() to allow the EPC core functions to obtain the size and the offset into a controller address region that must be allocated and mapped to access a RC PCI address region. The size of the mapping provided by the align_addr() operation can then be used as the size argument for the function pci_epc_mem_alloc_addr() and the offset into the allocated controller memory provided can be used to correctly handle data transfers. For endpoint controllers that have PCI address alignment constraints, the align_addr() operation may indicate upon return an effective PCI address mapping size that is smaller (but not 0) than the requested PCI address region size. The controller ->align_addr() operation is optional: controllers that do not have any alignment constraints for mapping RC PCI address regions do not need to implement this operation. For such controllers, it is always assumed that the mapping size is equal to the requested size of the PCI region and that the mapping offset is 0. The function pci_epc_mem_map() is introduced to use this new controller operation (if it is defined) to handle controller memory allocation and mapping to a RC PCI address region in endpoint function drivers. This function first uses the ->align_addr() controller operation to determine the controller memory address size (and offset into) needed for mapping an RC PCI address region. The result of this operation is used to allocate a controller physical memory region using pci_epc_mem_alloc_addr() and then to map that memory to the RC PCI address space with pci_epc_map_addr(). Since ->align_addr() () may indicate that not all of a RC PCI address region can be mapped, pci_epc_mem_map() may only partially map the RC PCI address region specified. It is the responsibility of the caller (an endpoint function driver) to handle such smaller mapping by repeatedly using pci_epc_mem_map() over the desried PCI address range. The counterpart of pci_epc_mem_map() to unmap and free a mapped controller memory address region is pci_epc_mem_unmap(). Both functions operate using the new struct pci_epc_map data structure. This new structure represents a mapping PCI address, mapping effective size, the size of the controller memory needed for the mapping as well as the physical and virtual CPU addresses of the mapping (phys_base and virt_base fields). For convenience, the physical and virtual CPU addresses within that mapping to use to access the target RC PCI address region are also provided (phys_addr and virt_addr fields). Endpoint function drivers can use struct pci_epc_map to access the mapped RC PCI address region using the ->virt_addr and ->pci_size fields. Co-developed-by: Rick Wertenbroek Signed-off-by: Rick Wertenbroek Signed-off-by: Damien Le Moal Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20241012113246.95634-4-dlemoal@kernel.org [mani: squashed the patch that changed phy_addr_t to u64] Signed-off-by: Manivannan Sadhasivam --- drivers/pci/endpoint/pci-epc-core.c | 103 ++++++++++++++++++++++++++++++++++++ include/linux/pci-epc.h | 38 +++++++++++++ 2 files changed, 141 insertions(+) (limited to 'include') diff --git a/drivers/pci/endpoint/pci-epc-core.c b/drivers/pci/endpoint/pci-epc-core.c index b854f1bab26f..04a85d2f7e2a 100644 --- a/drivers/pci/endpoint/pci-epc-core.c +++ b/drivers/pci/endpoint/pci-epc-core.c @@ -466,6 +466,109 @@ int pci_epc_map_addr(struct pci_epc *epc, u8 func_no, u8 vfunc_no, } EXPORT_SYMBOL_GPL(pci_epc_map_addr); +/** + * pci_epc_mem_map() - allocate and map a PCI address to a CPU address + * @epc: the EPC device on which the CPU address is to be allocated and mapped + * @func_no: the physical endpoint function number in the EPC device + * @vfunc_no: the virtual endpoint function number in the physical function + * @pci_addr: PCI address to which the CPU address should be mapped + * @pci_size: the number of bytes to map starting from @pci_addr + * @map: where to return the mapping information + * + * Allocate a controller memory address region and map it to a RC PCI address + * region, taking into account the controller physical address mapping + * constraints using the controller operation align_addr(). If this operation is + * not defined, we assume that there are no alignment constraints for the + * mapping. + * + * The effective size of the PCI address range mapped from @pci_addr is + * indicated by @map->pci_size. This size may be less than the requested + * @pci_size. The local virtual CPU address for the mapping is indicated by + * @map->virt_addr (@map->phys_addr indicates the physical address). + * The size and CPU address of the controller memory allocated and mapped are + * respectively indicated by @map->map_size and @map->virt_base (and + * @map->phys_base for the physical address of @map->virt_base). + * + * Returns 0 on success and a negative error code in case of error. + */ +int pci_epc_mem_map(struct pci_epc *epc, u8 func_no, u8 vfunc_no, + u64 pci_addr, size_t pci_size, struct pci_epc_map *map) +{ + size_t map_size = pci_size; + size_t map_offset = 0; + int ret; + + if (!pci_epc_function_is_valid(epc, func_no, vfunc_no)) + return -EINVAL; + + if (!pci_size || !map) + return -EINVAL; + + /* + * Align the PCI address to map. If the controller defines the + * .align_addr() operation, use it to determine the PCI address to map + * and the size of the mapping. Otherwise, assume that the controller + * has no alignment constraint. + */ + memset(map, 0, sizeof(*map)); + map->pci_addr = pci_addr; + if (epc->ops->align_addr) + map->map_pci_addr = + epc->ops->align_addr(epc, pci_addr, + &map_size, &map_offset); + else + map->map_pci_addr = pci_addr; + map->map_size = map_size; + if (map->map_pci_addr + map->map_size < pci_addr + pci_size) + map->pci_size = map->map_pci_addr + map->map_size - pci_addr; + else + map->pci_size = pci_size; + + map->virt_base = pci_epc_mem_alloc_addr(epc, &map->phys_base, + map->map_size); + if (!map->virt_base) + return -ENOMEM; + + map->phys_addr = map->phys_base + map_offset; + map->virt_addr = map->virt_base + map_offset; + + ret = pci_epc_map_addr(epc, func_no, vfunc_no, map->phys_base, + map->map_pci_addr, map->map_size); + if (ret) { + pci_epc_mem_free_addr(epc, map->phys_base, map->virt_base, + map->map_size); + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(pci_epc_mem_map); + +/** + * pci_epc_mem_unmap() - unmap and free a CPU address region + * @epc: the EPC device on which the CPU address is allocated and mapped + * @func_no: the physical endpoint function number in the EPC device + * @vfunc_no: the virtual endpoint function number in the physical function + * @map: the mapping information + * + * Unmap and free a CPU address region that was allocated and mapped with + * pci_epc_mem_map(). + */ +void pci_epc_mem_unmap(struct pci_epc *epc, u8 func_no, u8 vfunc_no, + struct pci_epc_map *map) +{ + if (!pci_epc_function_is_valid(epc, func_no, vfunc_no)) + return; + + if (!map || !map->virt_base) + return; + + pci_epc_unmap_addr(epc, func_no, vfunc_no, map->phys_base); + pci_epc_mem_free_addr(epc, map->phys_base, map->virt_base, + map->map_size); +} +EXPORT_SYMBOL_GPL(pci_epc_mem_unmap); + /** * pci_epc_clear_bar() - reset the BAR * @epc: the EPC device for which the BAR has to be cleared diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index 42ef06136bd1..de8cc3658220 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -32,11 +32,43 @@ pci_epc_interface_string(enum pci_epc_interface_type type) } } +/** + * struct pci_epc_map - information about EPC memory for mapping a RC PCI + * address range + * @pci_addr: start address of the RC PCI address range to map + * @pci_size: size of the RC PCI address range mapped from @pci_addr + * @map_pci_addr: RC PCI address used as the first address mapped (may be lower + * than @pci_addr) + * @map_size: size of the controller memory needed for mapping the RC PCI address + * range @pci_addr..@pci_addr+@pci_size + * @phys_base: base physical address of the allocated EPC memory for mapping the + * RC PCI address range + * @phys_addr: physical address at which @pci_addr is mapped + * @virt_base: base virtual address of the allocated EPC memory for mapping the + * RC PCI address range + * @virt_addr: virtual address at which @pci_addr is mapped + */ +struct pci_epc_map { + u64 pci_addr; + size_t pci_size; + + u64 map_pci_addr; + size_t map_size; + + phys_addr_t phys_base; + phys_addr_t phys_addr; + void __iomem *virt_base; + void __iomem *virt_addr; +}; + /** * struct pci_epc_ops - set of function pointers for performing EPC operations * @write_header: ops to populate configuration space header * @set_bar: ops to configure the BAR * @clear_bar: ops to reset the BAR + * @align_addr: operation to get the mapping address, mapping size and offset + * into a controller memory window needed to map an RC PCI address + * region * @map_addr: ops to map CPU address to PCI address * @unmap_addr: ops to unmap CPU address and PCI address * @set_msi: ops to set the requested number of MSI interrupts in the MSI @@ -61,6 +93,8 @@ struct pci_epc_ops { struct pci_epf_bar *epf_bar); void (*clear_bar)(struct pci_epc *epc, u8 func_no, u8 vfunc_no, struct pci_epf_bar *epf_bar); + u64 (*align_addr)(struct pci_epc *epc, u64 pci_addr, size_t *size, + size_t *offset); int (*map_addr)(struct pci_epc *epc, u8 func_no, u8 vfunc_no, phys_addr_t addr, u64 pci_addr, size_t size); void (*unmap_addr)(struct pci_epc *epc, u8 func_no, u8 vfunc_no, @@ -278,6 +312,10 @@ void __iomem *pci_epc_mem_alloc_addr(struct pci_epc *epc, phys_addr_t *phys_addr, size_t size); void pci_epc_mem_free_addr(struct pci_epc *epc, phys_addr_t phys_addr, void __iomem *virt_addr, size_t size); +int pci_epc_mem_map(struct pci_epc *epc, u8 func_no, u8 vfunc_no, + u64 pci_addr, size_t pci_size, struct pci_epc_map *map); +void pci_epc_mem_unmap(struct pci_epc *epc, u8 func_no, u8 vfunc_no, + struct pci_epc_map *map); #else static inline void pci_epc_init_notify(struct pci_epc *epc) -- cgit v1.2.3 From 5280a14a6079040205a1d968cd80f20448d047c7 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 15 Oct 2024 12:09:32 -0700 Subject: genirq: Introduce irq_get_nr_irqs() and irq_set_nr_irqs() Prepare for changing 'nr_irqs' from an exported global variable into a variable with file scope. This will prevent accidental changes of assignments to a local variable 'nr_irqs' into assignments to the global 'nr_irqs' variable. Suppose that a patch would be submitted for review that removes a declaration of a local variable with the name 'nr_irqs' and that that patch does not remove all assignments to that local variable. Such a patch converts an assignment to a local variable into an assignment into a global variable. If the 'nr_irqs' assignment is more than three lines away from other changes, the assignment won't be included in the diff context lines and hence won't be visible without inspecting the modified file. With these abstraction series applied, such accidental conversions from assignments to a local variable into an assignment to a global variable are converted into a compilation error. Signed-off-by: Bart Van Assche Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20241015190953.1266194-2-bvanassche@acm.org --- include/linux/irqnr.h | 2 ++ kernel/irq/irqdesc.c | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+) (limited to 'include') diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h index 3496baa0b07f..7419b807b71b 100644 --- a/include/linux/irqnr.h +++ b/include/linux/irqnr.h @@ -6,6 +6,8 @@ extern int nr_irqs; +unsigned int irq_get_nr_irqs(void) __pure; +unsigned int irq_set_nr_irqs(unsigned int nr); extern struct irq_desc *irq_to_desc(unsigned int irq); unsigned int irq_get_next_irq(unsigned int offset); diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 1dee88ba0ae4..b0733959f8ae 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -141,6 +141,29 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node, int nr_irqs = NR_IRQS; EXPORT_SYMBOL_GPL(nr_irqs); +/** + * irq_get_nr_irqs() - Number of interrupts supported by the system. + */ +unsigned int irq_get_nr_irqs(void) +{ + return nr_irqs; +} +EXPORT_SYMBOL_GPL(irq_get_nr_irqs); + +/** + * irq_set_nr_irqs() - Set the number of interrupts supported by the system. + * @nr: New number of interrupts. + * + * Return: @nr. + */ +unsigned int irq_set_nr_irqs(unsigned int nr) +{ + nr_irqs = nr; + + return nr; +} +EXPORT_SYMBOL_GPL(irq_set_nr_irqs); + static DEFINE_MUTEX(sparse_irq_lock); static struct maple_tree sparse_irqs = MTREE_INIT_EXT(sparse_irqs, MT_FLAGS_ALLOC_RANGE | -- cgit v1.2.3 From 1ad2048bf7146efb83bc033147ca1611a7fe8494 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 15 Oct 2024 12:09:52 -0700 Subject: genirq: Switch to irq_get_nr_irqs() Use the irq_get_nr_irqs() function instead of the global variable 'nr_irqs'. Cache the result of this function in a local variable in order not to rely on CSE (common subexpression elimination). Prepare for changing 'nr_irqs' from an exported global variable into a variable with file scope. Signed-off-by: Bart Van Assche Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20241015190953.1266194-22-bvanassche@acm.org --- include/linux/irqnr.h | 33 +++++++++++++++++++-------------- kernel/irq/irqdomain.c | 2 +- kernel/irq/proc.c | 3 ++- 3 files changed, 22 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h index 7419b807b71b..a33088d27c54 100644 --- a/include/linux/irqnr.h +++ b/include/linux/irqnr.h @@ -11,26 +11,31 @@ unsigned int irq_set_nr_irqs(unsigned int nr); extern struct irq_desc *irq_to_desc(unsigned int irq); unsigned int irq_get_next_irq(unsigned int offset); -# define for_each_irq_desc(irq, desc) \ - for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; \ - irq++, desc = irq_to_desc(irq)) \ - if (!desc) \ - ; \ - else - +#define for_each_irq_desc(irq, desc) \ + for (unsigned int __nr_irqs__ = irq_get_nr_irqs(); __nr_irqs__; \ + __nr_irqs__ = 0) \ + for (irq = 0, desc = irq_to_desc(irq); irq < __nr_irqs__; \ + irq++, desc = irq_to_desc(irq)) \ + if (!desc) \ + ; \ + else # define for_each_irq_desc_reverse(irq, desc) \ - for (irq = nr_irqs - 1, desc = irq_to_desc(irq); irq >= 0; \ - irq--, desc = irq_to_desc(irq)) \ + for (irq = irq_get_nr_irqs() - 1, desc = irq_to_desc(irq); \ + irq >= 0; irq--, desc = irq_to_desc(irq)) \ if (!desc) \ ; \ else -# define for_each_active_irq(irq) \ - for (irq = irq_get_next_irq(0); irq < nr_irqs; \ - irq = irq_get_next_irq(irq + 1)) +#define for_each_active_irq(irq) \ + for (unsigned int __nr_irqs__ = irq_get_nr_irqs(); __nr_irqs__; \ + __nr_irqs__ = 0) \ + for (irq = irq_get_next_irq(0); irq < __nr_irqs__; \ + irq = irq_get_next_irq(irq + 1)) -#define for_each_irq_nr(irq) \ - for (irq = 0; irq < nr_irqs; irq++) +#define for_each_irq_nr(irq) \ + for (unsigned int __nr_irqs__ = irq_get_nr_irqs(); __nr_irqs__; \ + __nr_irqs__ = 0) \ + for (irq = 0; irq < __nr_irqs__; irq++) #endif diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index e0bff21f30e0..ec6d8e72d980 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -1225,7 +1225,7 @@ int irq_domain_alloc_descs(int virq, unsigned int cnt, irq_hw_number_t hwirq, virq = __irq_alloc_descs(virq, virq, cnt, node, THIS_MODULE, affinity); } else { - hint = hwirq % nr_irqs; + hint = hwirq % irq_get_nr_irqs(); if (hint == 0) hint++; virq = __irq_alloc_descs(-1, hint, cnt, node, THIS_MODULE, diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 9081ada81c3d..d226282c5b66 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -457,11 +457,12 @@ int __weak arch_show_interrupts(struct seq_file *p, int prec) } #ifndef ACTUAL_NR_IRQS -# define ACTUAL_NR_IRQS nr_irqs +# define ACTUAL_NR_IRQS irq_get_nr_irqs() #endif int show_interrupts(struct seq_file *p, void *v) { + const unsigned int nr_irqs = irq_get_nr_irqs(); static int prec; int i = *(loff_t *) v, j; -- cgit v1.2.3 From ef4c675dc2961ee533bdc1ea20390761df0af5be Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 15 Oct 2024 12:09:53 -0700 Subject: genirq: Unexport nr_irqs Unexport nr_irqs and declare it static now that all code that reads or modifies nr_irqs has been converted to number_of_interrupts() / set_number_of_interrupts(). Change the type of 'nr_irqs' from 'int' into 'unsigned int' to match the return type and argument type of the irq_get_nr_iqs() / irq_set_nr_irqs() functions. Signed-off-by: Bart Van Assche Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20241015190953.1266194-23-bvanassche@acm.org --- include/linux/irqnr.h | 1 - kernel/irq/irqdesc.c | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h index a33088d27c54..e97206c721a0 100644 --- a/include/linux/irqnr.h +++ b/include/linux/irqnr.h @@ -5,7 +5,6 @@ #include -extern int nr_irqs; unsigned int irq_get_nr_irqs(void) __pure; unsigned int irq_set_nr_irqs(unsigned int nr); extern struct irq_desc *irq_to_desc(unsigned int irq); diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index b0733959f8ae..479cf1caa610 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -138,8 +138,7 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node, desc_smp_init(desc, node, affinity); } -int nr_irqs = NR_IRQS; -EXPORT_SYMBOL_GPL(nr_irqs); +static unsigned int nr_irqs = NR_IRQS; /** * irq_get_nr_irqs() - Number of interrupts supported by the system. -- cgit v1.2.3 From cf70da29c4993bf23df68b67a82dfa3da8234e75 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sat, 5 Oct 2024 12:06:16 +0200 Subject: power: supply: core: unexport power_supply_property_is_writeable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit ("power: supply: Drop use_cnt check from power_supply_property_is_writeable()"), this function does not check use_cnt anymore, making it unsuitable for general usage. As it is only used by the psy core anyways, remove it from the public header and unexport it to avoid misusage. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20241005-power-supply-cleanups-v1-2-45303b2d0a4d@weissschuh.net Signed-off-by: Sebastian Reichel --- drivers/power/supply/power_supply.h | 3 +++ drivers/power/supply/power_supply_core.c | 1 - drivers/power/supply/power_supply_hwmon.c | 1 + include/linux/power_supply.h | 2 -- 4 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/power/supply/power_supply.h b/drivers/power/supply/power_supply.h index 3cbafc58bdad..1bc57615ba68 100644 --- a/drivers/power/supply/power_supply.h +++ b/drivers/power/supply/power_supply.h @@ -13,6 +13,9 @@ struct device; struct device_type; struct power_supply; +extern int power_supply_property_is_writeable(struct power_supply *psy, + enum power_supply_property psp); + #ifdef CONFIG_SYSFS extern void power_supply_init_attrs(void); diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index e0c87a08bc26..16085eff0084 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -1231,7 +1231,6 @@ int power_supply_property_is_writeable(struct power_supply *psy, { return psy->desc->property_is_writeable && psy->desc->property_is_writeable(psy, psp); } -EXPORT_SYMBOL_GPL(power_supply_property_is_writeable); void power_supply_external_power_changed(struct power_supply *psy) { diff --git a/drivers/power/supply/power_supply_hwmon.c b/drivers/power/supply/power_supply_hwmon.c index 6fbbfb1c685e..01be04903d7d 100644 --- a/drivers/power/supply/power_supply_hwmon.c +++ b/drivers/power/supply/power_supply_hwmon.c @@ -7,6 +7,7 @@ #include #include #include +#include "power_supply.h" struct power_supply_hwmon { struct power_supply *psy; diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 14eedefb3ac8..c1d6cb7f4c40 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -865,8 +865,6 @@ static inline int power_supply_set_property(struct power_supply *psy, const union power_supply_propval *val) { return 0; } #endif -extern int power_supply_property_is_writeable(struct power_supply *psy, - enum power_supply_property psp); extern void power_supply_external_power_changed(struct power_supply *psy); extern struct power_supply *__must_check -- cgit v1.2.3 From 8060bcb109f2d9b85451e84a7a08042da40368df Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Wed, 16 Oct 2024 16:18:31 +0300 Subject: usb: typec: Add attribute file showing the supported USB modes of the port This attribute file, named "usb_capability", will show the supported USB modes, which are USB 2.0, USB 3.2 and USB4. These modes are defined in the USB Type-C (R2.0) and USB Power Delivery (R3.0 V2.0) Specifications. Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20241016131834.898599-2-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-class-typec | 13 +++++ drivers/usb/typec/class.c | 81 +++++++++++++++++++++++++++++ drivers/usb/typec/class.h | 1 + include/linux/usb/typec.h | 17 ++++++ 4 files changed, 112 insertions(+) (limited to 'include') diff --git a/Documentation/ABI/testing/sysfs-class-typec b/Documentation/ABI/testing/sysfs-class-typec index 281b995beb05..3ee757208122 100644 --- a/Documentation/ABI/testing/sysfs-class-typec +++ b/Documentation/ABI/testing/sysfs-class-typec @@ -149,6 +149,19 @@ Description: advertise to the partner. The currently used capabilities are in brackets. Selection happens by writing to the file. +What: /sys/class/typec//usb_capability +Date: November 2024 +Contact: Heikki Krogerus +Description: Lists the supported USB Modes. The default USB mode that is used + next time with the Enter_USB Message is in brackets. The default + mode can be changed by writing to the file when supported by the + driver. + + Valid values: + - usb2 (USB 2.0) + - usb3 (USB 3.2) + - usb4 (USB4) + USB Type-C partner devices (eg. /sys/class/typec/port0-partner/) What: /sys/class/typec/-partner/accessory_mode diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c index 9262fcd4144f..2f12269d1465 100644 --- a/drivers/usb/typec/class.c +++ b/drivers/usb/typec/class.c @@ -219,6 +219,13 @@ static ssize_t usb_power_delivery_revision_show(struct device *dev, char *buf); static DEVICE_ATTR_RO(usb_power_delivery_revision); +static const char * const usb_modes[] = { + [USB_MODE_NONE] = "none", + [USB_MODE_USB2] = "usb2", + [USB_MODE_USB3] = "usb3", + [USB_MODE_USB4] = "usb4" +}; + /* ------------------------------------------------------------------------- */ /* Alternate Modes */ @@ -1289,6 +1296,67 @@ EXPORT_SYMBOL_GPL(typec_unregister_cable); /* ------------------------------------------------------------------------- */ /* USB Type-C ports */ +/** + * typec_port_set_usb_mode - Set the operational USB mode for the port + * @port: USB Type-C port + * @mode: USB Mode (USB2, USB3 or USB4) + * + * @mode will be used with the next Enter_USB message. Existing connections are + * not affected. + */ +void typec_port_set_usb_mode(struct typec_port *port, enum usb_mode mode) +{ + port->usb_mode = mode; +} +EXPORT_SYMBOL_GPL(typec_port_set_usb_mode); + +static ssize_t +usb_capability_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct typec_port *port = to_typec_port(dev); + int len = 0; + int i; + + for (i = USB_MODE_USB2; i < USB_MODE_USB4 + 1; i++) { + if (!(BIT(i - 1) & port->cap->usb_capability)) + continue; + + if (i == port->usb_mode) + len += sysfs_emit_at(buf, len, "[%s] ", usb_modes[i]); + else + len += sysfs_emit_at(buf, len, "%s ", usb_modes[i]); + } + + sysfs_emit_at(buf, len - 1, "\n"); + + return len; +} + +static ssize_t +usb_capability_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t size) +{ + struct typec_port *port = to_typec_port(dev); + int ret = 0; + int mode; + + if (!port->ops || !port->ops->default_usb_mode_set) + return -EOPNOTSUPP; + + mode = sysfs_match_string(usb_modes, buf); + if (mode < 0) + return mode; + + ret = port->ops->default_usb_mode_set(port, mode); + if (ret) + return ret; + + port->usb_mode = mode; + + return size; +} +static DEVICE_ATTR_RW(usb_capability); + /** * typec_port_set_usb_power_delivery - Assign USB PD for port. * @port: USB Type-C port. @@ -1757,6 +1825,7 @@ static struct attribute *typec_attrs[] = { &dev_attr_vconn_source.attr, &dev_attr_port_type.attr, &dev_attr_orientation.attr, + &dev_attr_usb_capability.attr, NULL, }; @@ -1790,6 +1859,11 @@ static umode_t typec_attr_is_visible(struct kobject *kobj, if (port->cap->orientation_aware) return 0444; return 0; + } else if (attr == &dev_attr_usb_capability.attr) { + if (!port->cap->usb_capability) + return 0; + if (!port->ops || !port->ops->default_usb_mode_set) + return 0444; } return attr->mode; @@ -2428,6 +2502,13 @@ struct typec_port *typec_register_port(struct device *parent, port->con.attach = typec_partner_attach; port->con.deattach = typec_partner_deattach; + if (cap->usb_capability & USB_CAPABILITY_USB4) + port->usb_mode = USB_MODE_USB4; + else if (cap->usb_capability & USB_CAPABILITY_USB3) + port->usb_mode = USB_MODE_USB3; + else if (cap->usb_capability & USB_CAPABILITY_USB2) + port->usb_mode = USB_MODE_USB2; + device_initialize(&port->dev); port->dev.class = &typec_class; port->dev.parent = parent; diff --git a/drivers/usb/typec/class.h b/drivers/usb/typec/class.h index 7485cdb9dd20..85bc50aa54f7 100644 --- a/drivers/usb/typec/class.h +++ b/drivers/usb/typec/class.h @@ -55,6 +55,7 @@ struct typec_port { enum typec_role vconn_role; enum typec_pwr_opmode pwr_opmode; enum typec_port_type port_type; + enum usb_mode usb_mode; struct mutex port_type_lock; enum typec_orientation orientation; diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h index 549275f8ac1b..f7edced5b10b 100644 --- a/include/linux/usb/typec.h +++ b/include/linux/usb/typec.h @@ -87,6 +87,17 @@ enum typec_orientation { TYPEC_ORIENTATION_REVERSE, }; +enum usb_mode { + USB_MODE_NONE, + USB_MODE_USB2, + USB_MODE_USB3, + USB_MODE_USB4 +}; + +#define USB_CAPABILITY_USB2 BIT(0) +#define USB_CAPABILITY_USB3 BIT(1) +#define USB_CAPABILITY_USB4 BIT(2) + /* * struct enter_usb_data - Enter_USB Message details * @eudo: Enter_USB Data Object @@ -240,6 +251,7 @@ struct typec_partner_desc { * @port_type_set: Set port type * @pd_get: Get available USB Power Delivery Capabilities. * @pd_set: Set USB Power Delivery Capabilities. + * @default_usb_mode_set: USB Mode to be used by default with Enter_USB Message */ struct typec_operations { int (*try_role)(struct typec_port *port, int role); @@ -250,6 +262,7 @@ struct typec_operations { enum typec_port_type type); struct usb_power_delivery **(*pd_get)(struct typec_port *port); int (*pd_set)(struct typec_port *port, struct usb_power_delivery *pd); + int (*default_usb_mode_set)(struct typec_port *port, enum usb_mode mode); }; enum usb_pd_svdm_ver { @@ -267,6 +280,7 @@ enum usb_pd_svdm_ver { * @svdm_version: USB PD Structured VDM version if supported * @prefer_role: Initial role preference (DRP ports). * @accessory: Supported Accessory Modes + * @usb_capability: Supported USB Modes * @fwnode: Optional fwnode of the port * @driver_data: Private pointer for driver specific info * @pd: Optional USB Power Delivery Support @@ -283,6 +297,7 @@ struct typec_capability { int prefer_role; enum typec_accessory accessory[TYPEC_MAX_ACCESSORY]; unsigned int orientation_aware:1; + u8 usb_capability; struct fwnode_handle *fwnode; void *driver_data; @@ -350,6 +365,8 @@ int typec_port_set_usb_power_delivery(struct typec_port *port, struct usb_power_ int typec_partner_set_usb_power_delivery(struct typec_partner *partner, struct usb_power_delivery *pd); +void typec_port_set_usb_mode(struct typec_port *port, enum usb_mode mode); + /** * struct typec_connector - Representation of Type-C port for external drivers * @attach: notification about device removal -- cgit v1.2.3 From 2140a952c4e9c73993ae6d9c2cc674d263d4beab Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Wed, 16 Oct 2024 16:18:32 +0300 Subject: usb: typec: Add attribute file showing the USB Modes of the partner This attribute file shows the supported USB modes (USB 2.0, USB 3.0 and USB4) of the partner, and the currently active mode. The active mode is determined primarily by checking the speed of the enumerated USB device. When USB Power Delivery is supported, the active USB mode should be always the mode that was used with the Enter_USB Message, regardless of the result of the USB enumeration. The port drivers can separately assign the mode with a dedicated API. If USB Power Delivery Identity is supplied for the partner device, the supported modes are extracted from it. Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20241016131834.898599-3-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-class-typec | 14 ++++ drivers/usb/typec/class.c | 124 +++++++++++++++++++++++++++- drivers/usb/typec/class.h | 2 + include/linux/usb/typec.h | 5 ++ 4 files changed, 141 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/Documentation/ABI/testing/sysfs-class-typec b/Documentation/ABI/testing/sysfs-class-typec index 3ee757208122..38e101c17a00 100644 --- a/Documentation/ABI/testing/sysfs-class-typec +++ b/Documentation/ABI/testing/sysfs-class-typec @@ -233,6 +233,20 @@ Description: directory exists, it will have an attribute file for every VDO in Discover Identity command result. +What: /sys/class/typec/-partner/usb_mode +Date: November 2024 +Contact: Heikki Krogerus +Description: The USB Modes that the partner device supports. The active mode + is displayed in brackets. The active USB mode can be changed by + writing to this file when the port driver is able to send Data + Reset Message to the partner. That requires USB Power Delivery + contract between the partner and the port. + + Valid values: + - usb2 (USB 2.0) + - usb3 (USB 3.2) + - usb4 (USB4) + USB Type-C cable devices (eg. /sys/class/typec/port0-cable/) Note: Electronically Marked Cables will have a device also for one cable plug diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c index 2f12269d1465..953cfb1b093e 100644 --- a/drivers/usb/typec/class.c +++ b/drivers/usb/typec/class.c @@ -618,6 +618,75 @@ EXPORT_SYMBOL_GPL(typec_unregister_altmode); /* ------------------------------------------------------------------------- */ /* Type-C Partners */ +/** + * typec_partner_set_usb_mode - Assign active USB Mode for the partner + * @partner: USB Type-C partner + * @mode: USB Mode (USB2, USB3 or USB4) + * + * The port drivers can use this function to assign the active USB Mode to + * @partner. The USB Mode can change for example due to Data Reset. + */ +void typec_partner_set_usb_mode(struct typec_partner *partner, enum usb_mode mode) +{ + if (!partner || partner->usb_mode == mode) + return; + + partner->usb_capability |= BIT(mode - 1); + partner->usb_mode = mode; + sysfs_notify(&partner->dev.kobj, NULL, "usb_mode"); +} +EXPORT_SYMBOL_GPL(typec_partner_set_usb_mode); + +static ssize_t +usb_mode_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct typec_partner *partner = to_typec_partner(dev); + int len = 0; + int i; + + for (i = USB_MODE_USB2; i < USB_MODE_USB4 + 1; i++) { + if (!(BIT(i - 1) & partner->usb_capability)) + continue; + + if (i == partner->usb_mode) + len += sysfs_emit_at(buf, len, "[%s] ", usb_modes[i]); + else + len += sysfs_emit_at(buf, len, "%s ", usb_modes[i]); + } + + sysfs_emit_at(buf, len - 1, "\n"); + + return len; +} + +static ssize_t usb_mode_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t size) +{ + struct typec_partner *partner = to_typec_partner(dev); + struct typec_port *port = to_typec_port(dev->parent); + int mode; + int ret; + + if (!port->ops || !port->ops->enter_usb_mode) + return -EOPNOTSUPP; + + mode = sysfs_match_string(usb_modes, buf); + if (mode < 0) + return mode; + + if (mode == partner->usb_mode) + return size; + + ret = port->ops->enter_usb_mode(port, mode); + if (ret) + return ret; + + typec_partner_set_usb_mode(partner, mode); + + return size; +} +static DEVICE_ATTR_RW(usb_mode); + static ssize_t accessory_mode_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -664,6 +733,7 @@ static struct attribute *typec_partner_attrs[] = { &dev_attr_supports_usb_power_delivery.attr, &dev_attr_number_of_alternate_modes.attr, &dev_attr_type.attr, + &dev_attr_usb_mode.attr, &dev_attr_usb_power_delivery_revision.attr, NULL }; @@ -671,6 +741,14 @@ static struct attribute *typec_partner_attrs[] = { static umode_t typec_partner_attr_is_visible(struct kobject *kobj, struct attribute *attr, int n) { struct typec_partner *partner = to_typec_partner(kobj_to_dev(kobj)); + struct typec_port *port = to_typec_port(partner->dev.parent); + + if (attr == &dev_attr_usb_mode.attr) { + if (!partner->usb_capability) + return 0; + if (!port->ops || !port->ops->enter_usb_mode) + return 0444; + } if (attr == &dev_attr_number_of_alternate_modes.attr) { if (partner->num_altmodes < 0) @@ -744,10 +822,33 @@ static void typec_partner_unlink_device(struct typec_partner *partner, struct de */ int typec_partner_set_identity(struct typec_partner *partner) { - if (!partner->identity) + u8 usb_capability = partner->usb_capability; + struct device *dev = &partner->dev; + struct usb_pd_identity *id; + + id = get_pd_identity(dev); + if (!id) return -EINVAL; - typec_report_identity(&partner->dev); + if (to_typec_port(dev->parent)->data_role == TYPEC_HOST) { + u32 devcap = PD_VDO_UFP_DEVCAP(id->vdo[0]); + + if (devcap & (DEV_USB2_CAPABLE | DEV_USB2_BILLBOARD)) + usb_capability |= USB_CAPABILITY_USB2; + if (devcap & DEV_USB3_CAPABLE) + usb_capability |= USB_CAPABILITY_USB3; + if (devcap & DEV_USB4_CAPABLE) + usb_capability |= USB_CAPABILITY_USB4; + } else { + usb_capability = PD_VDO_DFP_HOSTCAP(id->vdo[0]); + } + + if (partner->usb_capability != usb_capability) { + partner->usb_capability = usb_capability; + sysfs_notify(&dev->kobj, NULL, "usb_mode"); + } + + typec_report_identity(dev); return 0; } EXPORT_SYMBOL_GPL(typec_partner_set_identity); @@ -917,6 +1018,7 @@ struct typec_partner *typec_register_partner(struct typec_port *port, partner->usb_pd = desc->usb_pd; partner->accessory = desc->accessory; partner->num_altmodes = -1; + partner->usb_capability = desc->usb_capability; partner->pd_revision = desc->pd_revision; partner->svdm_version = port->cap->svdm_version; partner->attach = desc->attach; @@ -936,6 +1038,15 @@ struct typec_partner *typec_register_partner(struct typec_port *port, partner->dev.type = &typec_partner_dev_type; dev_set_name(&partner->dev, "%s-partner", dev_name(&port->dev)); + if (port->usb2_dev) { + partner->usb_capability |= USB_CAPABILITY_USB2; + partner->usb_mode = USB_MODE_USB2; + } + if (port->usb3_dev) { + partner->usb_capability |= USB_CAPABILITY_USB2 | USB_CAPABILITY_USB3; + partner->usb_mode = USB_MODE_USB3; + } + ret = device_register(&partner->dev); if (ret) { dev_err(&port->dev, "failed to register partner (%d)\n", ret); @@ -1935,13 +2046,18 @@ static void typec_partner_attach(struct typec_connector *con, struct device *dev struct typec_port *port = container_of(con, struct typec_port, con); struct typec_partner *partner = typec_get_partner(port); struct usb_device *udev = to_usb_device(dev); + enum usb_mode usb_mode; - if (udev->speed < USB_SPEED_SUPER) + if (udev->speed < USB_SPEED_SUPER) { + usb_mode = USB_MODE_USB2; port->usb2_dev = dev; - else + } else { + usb_mode = USB_MODE_USB3; port->usb3_dev = dev; + } if (partner) { + typec_partner_set_usb_mode(partner, usb_mode); typec_partner_link_device(partner, dev); put_device(&partner->dev); } diff --git a/drivers/usb/typec/class.h b/drivers/usb/typec/class.h index 85bc50aa54f7..b3076a24ad2e 100644 --- a/drivers/usb/typec/class.h +++ b/drivers/usb/typec/class.h @@ -35,6 +35,8 @@ struct typec_partner { int num_altmodes; u16 pd_revision; /* 0300H = "3.0" */ enum usb_pd_svdm_ver svdm_version; + enum usb_mode usb_mode; + u8 usb_capability; struct usb_power_delivery *pd; diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h index f7edced5b10b..d616b8807000 100644 --- a/include/linux/usb/typec.h +++ b/include/linux/usb/typec.h @@ -220,6 +220,7 @@ struct typec_cable_desc { * @accessory: Audio, Debug or none. * @identity: Discover Identity command data * @pd_revision: USB Power Delivery Specification Revision if supported + * @usb_capability: Supported USB Modes * @attach: Notification about attached USB device * @deattach: Notification about removed USB device * @@ -237,6 +238,7 @@ struct typec_partner_desc { enum typec_accessory accessory; struct usb_pd_identity *identity; u16 pd_revision; /* 0300H = "3.0" */ + u8 usb_capability; void (*attach)(struct typec_partner *partner, struct device *dev); void (*deattach)(struct typec_partner *partner, struct device *dev); @@ -252,6 +254,7 @@ struct typec_partner_desc { * @pd_get: Get available USB Power Delivery Capabilities. * @pd_set: Set USB Power Delivery Capabilities. * @default_usb_mode_set: USB Mode to be used by default with Enter_USB Message + * @enter_usb_mode: Change the active USB Mode */ struct typec_operations { int (*try_role)(struct typec_port *port, int role); @@ -263,6 +266,7 @@ struct typec_operations { struct usb_power_delivery **(*pd_get)(struct typec_port *port); int (*pd_set)(struct typec_port *port, struct usb_power_delivery *pd); int (*default_usb_mode_set)(struct typec_port *port, enum usb_mode mode); + int (*enter_usb_mode)(struct typec_port *port, enum usb_mode mode); }; enum usb_pd_svdm_ver { @@ -365,6 +369,7 @@ int typec_port_set_usb_power_delivery(struct typec_port *port, struct usb_power_ int typec_partner_set_usb_power_delivery(struct typec_partner *partner, struct usb_power_delivery *pd); +void typec_partner_set_usb_mode(struct typec_partner *partner, enum usb_mode usb_mode); void typec_port_set_usb_mode(struct typec_port *port, enum usb_mode mode); /** -- cgit v1.2.3 From 3d9b44d0972be1298400e449cfbcc436df2e988e Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 16 Oct 2024 11:03:41 +0800 Subject: ASoC: sdw_utils: Add support for exclusion DAI quirks The system contains a mechanism for certain DAI links to be included based on a quirk. Add support for certain DAI links to excluded based on a quirk, this is useful in situations where the vast majority of SKUs utilise a feature so it is easier to quirk on those that don't. Signed-off-by: Charles Keepax Reviewed-by: Liam Girdwood Signed-off-by: Bard Liao Link: https://patch.msgid.link/20241016030344.13535-2-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- include/sound/soc_sdw_utils.h | 1 + sound/soc/sdw_utils/soc_sdw_utils.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h index 2374e6df4e58..e16fccd48e6b 100644 --- a/include/sound/soc_sdw_utils.h +++ b/include/sound/soc_sdw_utils.h @@ -59,6 +59,7 @@ struct asoc_sdw_dai_info { int (*rtd_init)(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); bool rtd_init_done; /* Indicate that the rtd_init callback is done */ unsigned long quirk; + bool quirk_exclude; }; struct asoc_sdw_codec_info { diff --git a/sound/soc/sdw_utils/soc_sdw_utils.c b/sound/soc/sdw_utils/soc_sdw_utils.c index a9323cb444d0..f86d55cdaaf6 100644 --- a/sound/soc/sdw_utils/soc_sdw_utils.c +++ b/sound/soc/sdw_utils/soc_sdw_utils.c @@ -1153,7 +1153,8 @@ int asoc_sdw_parse_sdw_endpoints(struct snd_soc_card *card, dai_info = &codec_info->dais[adr_end->num]; soc_dai = asoc_sdw_find_dailink(soc_dais, adr_end); - if (dai_info->quirk && !(dai_info->quirk & ctx->mc_quirk)) + if (dai_info->quirk && + !(dai_info->quirk_exclude ^ !!(dai_info->quirk & ctx->mc_quirk))) continue; dev_dbg(dev, -- cgit v1.2.3 From a6f7afb39362ef70d08d23e5bfc0a14d69fafea1 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 16 Oct 2024 11:03:42 +0800 Subject: ASoC: sdw_utils: Add a quirk to allow the cs42l43 mic DAI to be ignored To support some systems using host microphones add a quirk to allow the cs42l43 microphone DAI link to be ignored. Signed-off-by: Charles Keepax Reviewed-by: Liam Girdwood Signed-off-by: Bard Liao Link: https://patch.msgid.link/20241016030344.13535-3-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- include/sound/soc_sdw_utils.h | 1 + sound/soc/sdw_utils/soc_sdw_utils.c | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h index e16fccd48e6b..a25f94d6eb67 100644 --- a/include/sound/soc_sdw_utils.h +++ b/include/sound/soc_sdw_utils.h @@ -28,6 +28,7 @@ * - SOC_SDW_CODEC_SPKR | SOF_SIDECAR_AMPS - Not currently supported */ #define SOC_SDW_SIDECAR_AMPS BIT(16) +#define SOC_SDW_CODEC_MIC BIT(17) #define SOC_SDW_UNUSED_DAI_ID -1 #define SOC_SDW_JACK_OUT_DAI_ID 0 diff --git a/sound/soc/sdw_utils/soc_sdw_utils.c b/sound/soc/sdw_utils/soc_sdw_utils.c index f86d55cdaaf6..6f6bff0f69fc 100644 --- a/sound/soc/sdw_utils/soc_sdw_utils.c +++ b/sound/soc/sdw_utils/soc_sdw_utils.c @@ -528,6 +528,8 @@ struct asoc_sdw_codec_info codec_info_list[] = { .rtd_init = asoc_sdw_cs42l43_dmic_rtd_init, .widgets = generic_dmic_widgets, .num_widgets = ARRAY_SIZE(generic_dmic_widgets), + .quirk = SOC_SDW_CODEC_MIC, + .quirk_exclude = true, }, { .direction = {false, true}, -- cgit v1.2.3 From 0d3039f4d2f4a79798d97f2ac1a9656b055b561f Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 16 Oct 2024 01:35:27 +0000 Subject: ASoC: makes snd_soc_set_runtime_hwparams() inline snd_soc_set_runtime_hwparams() is now doing very simple things. We can makes it simply inline function, without having EXPORT_SYMBOL_GPL() Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/87cyk0eso0.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc.h | 9 +++++++-- sound/soc/soc-pcm.c | 16 ---------------- 2 files changed, 7 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index e6e359c1a2ac..8a1db45988ba 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -541,8 +541,13 @@ int snd_soc_tdm_params_to_bclk(const struct snd_pcm_hw_params *params, int tdm_width, int tdm_slots, int slot_multiple); /* set runtime hw params */ -int snd_soc_set_runtime_hwparams(struct snd_pcm_substream *substream, - const struct snd_pcm_hardware *hw); +static inline int snd_soc_set_runtime_hwparams(struct snd_pcm_substream *substream, + const struct snd_pcm_hardware *hw) +{ + substream->runtime->hw = *hw; + + return 0; +} struct snd_ac97 *snd_soc_alloc_ac97_component(struct snd_soc_component *component); struct snd_ac97 *snd_soc_new_ac97_component(struct snd_soc_component *component, diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 7a59121fc323..e2c5300df0f2 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -416,22 +416,6 @@ bool snd_soc_runtime_ignore_pmdown_time(struct snd_soc_pcm_runtime *rtd) return true; } -/** - * snd_soc_set_runtime_hwparams - set the runtime hardware parameters - * @substream: the pcm substream - * @hw: the hardware parameters - * - * Sets the substream runtime hardware parameters. - */ -int snd_soc_set_runtime_hwparams(struct snd_pcm_substream *substream, - const struct snd_pcm_hardware *hw) -{ - substream->runtime->hw = *hw; - - return 0; -} -EXPORT_SYMBOL_GPL(snd_soc_set_runtime_hwparams); - /* DPCM stream event, send event to FE and all active BEs. */ int dpcm_dapm_stream_event(struct snd_soc_pcm_runtime *fe, int dir, int event) -- cgit v1.2.3 From f35533a0e60946ee3fb8adccf8a36024c6f1fe40 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Wed, 16 Oct 2024 18:23:23 +0800 Subject: soundwire: sdw_intel: include linux/acpi.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For the acpi_handle stuff. Signed-off-by: Bard Liao Reviewed-by: Péter Ujfalusi Acked-by: Vinod Koul Link: https://patch.msgid.link/20241016102333.294448-2-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- include/linux/soundwire/sdw_intel.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index 37ae69365fe2..fae345987b8c 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -4,6 +4,7 @@ #ifndef __SDW_INTEL_H #define __SDW_INTEL_H +#include #include #include -- cgit v1.2.3 From 4b224ff80d6609811ec6ab5406a16c92825cfb1a Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 16 Oct 2024 18:23:24 +0800 Subject: ASoC/soundwire: remove sdw_slave_extended_id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This structure is used to copy information from the 'sdw_slave' structures, it's better to create a flexible array of 'sdw_slave' pointers and directly access the information. This will also help access additional information stored in the 'sdw_slave' structure, such as an SDCA context. This patch does not add new functionality, it only modified how the information is retrieved. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Péter Ujfalusi Signed-off-by: Bard Liao Link: https://patch.msgid.link/20241016102333.294448-3-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- drivers/soundwire/amd_init.c | 12 ++++++------ drivers/soundwire/intel_init.c | 13 ++++++------- include/linux/soundwire/sdw.h | 6 +++--- include/linux/soundwire/sdw_amd.h | 7 ++----- include/linux/soundwire/sdw_intel.h | 7 ++----- include/sound/soc-acpi.h | 3 +-- sound/soc/amd/ps/pci-ps.c | 3 +-- sound/soc/soc-acpi.c | 30 ++++++++++++++++-------------- sound/soc/sof/amd/acp-common.c | 3 +-- sound/soc/sof/intel/hda.c | 16 +++++++++------- 10 files changed, 47 insertions(+), 53 deletions(-) (limited to 'include') diff --git a/drivers/soundwire/amd_init.c b/drivers/soundwire/amd_init.c index db040f435059..53d1d707ca1a 100644 --- a/drivers/soundwire/amd_init.c +++ b/drivers/soundwire/amd_init.c @@ -177,7 +177,7 @@ EXPORT_SYMBOL_NS(sdw_amd_probe, SOUNDWIRE_AMD_INIT); void sdw_amd_exit(struct sdw_amd_ctx *ctx) { sdw_amd_cleanup(ctx); - kfree(ctx->ids); + kfree(ctx->peripherals); kfree(ctx); } EXPORT_SYMBOL_NS(sdw_amd_exit, SOUNDWIRE_AMD_INIT); @@ -204,10 +204,11 @@ int sdw_amd_get_slave_info(struct sdw_amd_ctx *ctx) num_slaves++; } - ctx->ids = kcalloc(num_slaves, sizeof(*ctx->ids), GFP_KERNEL); - if (!ctx->ids) + ctx->peripherals = kmalloc(struct_size(ctx->peripherals, array, num_slaves), + GFP_KERNEL); + if (!ctx->peripherals) return -ENOMEM; - ctx->num_slaves = num_slaves; + ctx->peripherals->num_peripherals = num_slaves; for (index = 0; index < ctx->count; index++) { if (!(ctx->link_mask & BIT(index))) continue; @@ -215,8 +216,7 @@ int sdw_amd_get_slave_info(struct sdw_amd_ctx *ctx) if (amd_manager) { bus = &amd_manager->bus; list_for_each_entry(slave, &bus->slaves, node) { - ctx->ids[i].id = slave->id; - ctx->ids[i].link_id = bus->link_id; + ctx->peripherals->array[i] = slave; i++; } } diff --git a/drivers/soundwire/intel_init.c b/drivers/soundwire/intel_init.c index a09134b97cd6..12e7a98f319f 100644 --- a/drivers/soundwire/intel_init.c +++ b/drivers/soundwire/intel_init.c @@ -252,17 +252,16 @@ static struct sdw_intel_ctx num_slaves++; } - ctx->ids = kcalloc(num_slaves, sizeof(*ctx->ids), GFP_KERNEL); - if (!ctx->ids) + ctx->peripherals = kmalloc(struct_size(ctx->peripherals, array, num_slaves), + GFP_KERNEL); + if (!ctx->peripherals) goto err; - - ctx->num_slaves = num_slaves; + ctx->peripherals->num_peripherals = num_slaves; i = 0; list_for_each_entry(link, &ctx->link_list, list) { bus = &link->cdns->bus; list_for_each_entry(slave, &bus->slaves, node) { - ctx->ids[i].id = slave->id; - ctx->ids[i].link_id = bus->link_id; + ctx->peripherals->array[i] = slave; i++; } } @@ -371,7 +370,7 @@ void sdw_intel_exit(struct sdw_intel_ctx *ctx) } sdw_intel_cleanup(ctx); - kfree(ctx->ids); + kfree(ctx->peripherals); kfree(ctx->ldev); kfree(ctx); } diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 5e0dd47a0412..283c8bfdde49 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -488,9 +488,9 @@ struct sdw_slave_id { __u8 sdw_version:4; }; -struct sdw_extended_slave_id { - int link_id; - struct sdw_slave_id id; +struct sdw_peripherals { + int num_peripherals; + struct sdw_slave *array[]; }; /* diff --git a/include/linux/soundwire/sdw_amd.h b/include/linux/soundwire/sdw_amd.h index 28a4eb77717f..585b4c58a8a6 100644 --- a/include/linux/soundwire/sdw_amd.h +++ b/include/linux/soundwire/sdw_amd.h @@ -115,19 +115,16 @@ struct sdw_amd_acpi_info { * struct sdw_amd_ctx - context allocated by the controller driver probe * * @count: link count - * @num_slaves: total number of devices exposed across all enabled links * @link_mask: bit-wise mask listing SoundWire links reported by the * Controller - * @ids: array of slave_id, representing Slaves exposed across all enabled - * links * @pdev: platform device structure + * @peripherals: array representing Peripherals exposed across all enabled links */ struct sdw_amd_ctx { int count; - int num_slaves; u32 link_mask; - struct sdw_extended_slave_id *ids; struct platform_device *pdev[AMD_SDW_MAX_MANAGER_COUNT]; + struct sdw_peripherals *peripherals; }; /** diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index fae345987b8c..491ddd27270f 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -287,31 +287,28 @@ struct hdac_bus; * hardware capabilities after all power dependencies are settled. * @link_mask: bit-wise mask listing SoundWire links reported by the * Controller - * @num_slaves: total number of devices exposed across all enabled links * @handle: ACPI parent handle * @ldev: information for each link (controller-specific and kept * opaque here) - * @ids: array of slave_id, representing Slaves exposed across all enabled - * links * @link_list: list to handle interrupts across all links * @shim_lock: mutex to handle concurrent rmw access to shared SHIM registers. * @shim_mask: flags to track initialization of SHIM shared registers * @shim_base: sdw shim base. * @alh_base: sdw alh base. + * @peripherals: array representing Peripherals exposed across all enabled links */ struct sdw_intel_ctx { int count; void __iomem *mmio_base; u32 link_mask; - int num_slaves; acpi_handle handle; struct sdw_intel_link_dev **ldev; - struct sdw_extended_slave_id *ids; struct list_head link_list; struct mutex shim_lock; /* lock for access to shared SHIM registers */ u32 shim_mask; u32 shim_base; u32 alh_base; + struct sdw_peripherals *peripherals; }; /** diff --git a/include/sound/soc-acpi.h b/include/sound/soc-acpi.h index 60d3b86a4660..ac7f9e791ee1 100644 --- a/include/sound/soc-acpi.h +++ b/include/sound/soc-acpi.h @@ -233,7 +233,6 @@ static inline bool snd_soc_acpi_sof_parent(struct device *dev) bool snd_soc_acpi_sdw_link_slaves_found(struct device *dev, const struct snd_soc_acpi_link_adr *link, - struct sdw_extended_slave_id *ids, - int num_slaves); + struct sdw_peripherals *peripherals); #endif diff --git a/sound/soc/amd/ps/pci-ps.c b/sound/soc/amd/ps/pci-ps.c index c72d666d51bd..4365499c8f82 100644 --- a/sound/soc/amd/ps/pci-ps.c +++ b/sound/soc/amd/ps/pci-ps.c @@ -302,8 +302,7 @@ static struct snd_soc_acpi_mach *acp63_sdw_machine_select(struct device *dev) link = mach->links; for (i = 0; i < acp_data->info.count && link->num_adr; link++, i++) { if (!snd_soc_acpi_sdw_link_slaves_found(dev, link, - acp_data->sdw->ids, - acp_data->sdw->num_slaves)) + acp_data->sdw->peripherals)) break; } if (i == acp_data->info.count || !link->num_adr) diff --git a/sound/soc/soc-acpi.c b/sound/soc/soc-acpi.c index 6d693b2ad5a3..270f9777942f 100644 --- a/sound/soc/soc-acpi.c +++ b/sound/soc/soc-acpi.c @@ -131,8 +131,7 @@ EXPORT_SYMBOL_GPL(snd_soc_acpi_codec_list); /* Check if all Slaves defined on the link can be found */ bool snd_soc_acpi_sdw_link_slaves_found(struct device *dev, const struct snd_soc_acpi_link_adr *link, - struct sdw_extended_slave_id *ids, - int num_slaves) + struct sdw_peripherals *peripherals) { unsigned int part_id, link_id, unique_id, mfg_id, version; int i, j, k; @@ -146,22 +145,25 @@ bool snd_soc_acpi_sdw_link_slaves_found(struct device *dev, link_id = SDW_DISCO_LINK_ID(adr); version = SDW_VERSION(adr); - for (j = 0; j < num_slaves; j++) { + for (j = 0; j < peripherals->num_peripherals; j++) { + struct sdw_slave *peripheral = peripherals->array[j]; + /* find out how many identical parts were reported on that link */ - if (ids[j].link_id == link_id && - ids[j].id.part_id == part_id && - ids[j].id.mfg_id == mfg_id && - ids[j].id.sdw_version == version) + if (peripheral->bus->link_id == link_id && + peripheral->id.part_id == part_id && + peripheral->id.mfg_id == mfg_id && + peripheral->id.sdw_version == version) reported_part_count++; } - for (j = 0; j < num_slaves; j++) { + for (j = 0; j < peripherals->num_peripherals; j++) { + struct sdw_slave *peripheral = peripherals->array[j]; int expected_part_count = 0; - if (ids[j].link_id != link_id || - ids[j].id.part_id != part_id || - ids[j].id.mfg_id != mfg_id || - ids[j].id.sdw_version != version) + if (peripheral->bus->link_id != link_id || + peripheral->id.part_id != part_id || + peripheral->id.mfg_id != mfg_id || + peripheral->id.sdw_version != version) continue; /* find out how many identical parts are expected */ @@ -180,7 +182,7 @@ bool snd_soc_acpi_sdw_link_slaves_found(struct device *dev, */ unique_id = SDW_UNIQUE_ID(adr); if (reported_part_count == 1 || - ids[j].id.unique_id == unique_id) { + peripheral->id.unique_id == unique_id) { dev_dbg(dev, "found part_id %#x at link %d\n", part_id, link_id); break; } @@ -189,7 +191,7 @@ bool snd_soc_acpi_sdw_link_slaves_found(struct device *dev, part_id, reported_part_count, expected_part_count, link_id); } } - if (j == num_slaves) { + if (j == peripherals->num_peripherals) { dev_dbg(dev, "Slave part_id %#x not found\n", part_id); return false; } diff --git a/sound/soc/sof/amd/acp-common.c b/sound/soc/sof/amd/acp-common.c index dbcaac84cb73..fc792956bb97 100644 --- a/sound/soc/sof/amd/acp-common.c +++ b/sound/soc/sof/amd/acp-common.c @@ -145,8 +145,7 @@ static struct snd_soc_acpi_mach *amd_sof_sdw_machine_select(struct snd_sof_dev * link = mach->links; for (i = 0; i < acp_data->info.count && link->num_adr; link++, i++) { if (!snd_soc_acpi_sdw_link_slaves_found(sdev->dev, link, - acp_data->sdw->ids, - acp_data->sdw->num_slaves)) + acp_data->sdw->peripherals)) break; } if (i == acp_data->info.count || !link->num_adr) diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c index e4cb4ffc7270..9abc4c071ae5 100644 --- a/sound/soc/sof/intel/hda.c +++ b/sound/soc/sof/intel/hda.c @@ -1064,7 +1064,7 @@ static struct snd_soc_acpi_mach *hda_sdw_machine_select(struct snd_sof_dev *sdev { struct snd_sof_pdata *pdata = sdev->pdata; const struct snd_soc_acpi_link_adr *link; - struct sdw_extended_slave_id *ids; + struct sdw_peripherals *peripherals; struct snd_soc_acpi_mach *mach; struct sof_intel_hda_dev *hdev; u32 link_mask; @@ -1083,7 +1083,7 @@ static struct snd_soc_acpi_mach *hda_sdw_machine_select(struct snd_sof_dev *sdev return NULL; } - if (!hdev->sdw->num_slaves) { + if (!hdev->sdw->peripherals || !hdev->sdw->peripherals->num_peripherals) { dev_warn(sdev->dev, "No SoundWire peripheral detected in ACPI tables\n"); return NULL; } @@ -1119,8 +1119,7 @@ static struct snd_soc_acpi_mach *hda_sdw_machine_select(struct snd_sof_dev *sdev * are not found on this link. */ if (!snd_soc_acpi_sdw_link_slaves_found(sdev->dev, link, - hdev->sdw->ids, - hdev->sdw->num_slaves)) + hdev->sdw->peripherals)) break; } /* Found if all Slaves are checked */ @@ -1136,10 +1135,13 @@ static struct snd_soc_acpi_mach *hda_sdw_machine_select(struct snd_sof_dev *sdev } dev_info(sdev->dev, "No SoundWire machine driver found for the ACPI-reported configuration:\n"); - ids = hdev->sdw->ids; - for (i = 0; i < hdev->sdw->num_slaves; i++) + peripherals = hdev->sdw->peripherals; + for (i = 0; i < peripherals->num_peripherals; i++) dev_info(sdev->dev, "link %d mfg_id 0x%04x part_id 0x%04x version %#x\n", - ids[i].link_id, ids[i].id.mfg_id, ids[i].id.part_id, ids[i].id.sdw_version); + peripherals->array[i]->bus->link_id, + peripherals->array[i]->id.mfg_id, + peripherals->array[i]->id.part_id, + peripherals->array[i]->id.sdw_version); return NULL; } -- cgit v1.2.3 From 3a513da1ae33972e59efeef7908061f1f24af480 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 16 Oct 2024 18:23:25 +0800 Subject: ASoC: SDCA: add initial module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add new module for SDCA (SoundWire Device Class for Audio) support. For now just add a parser to identify the SDCA revision and the function mask. Note that the SDCA definitions and related MIPI DisCo properties are defined only for ACPI platforms and extracted with _DSD helpers. There is currently no support for Device Tree in the specification, the 'depends on ACPI' reflects this design limitation. This might change in a future revision of the specification but for SDCA 1.0 ACPI is the only supported type of platform firmware. The SDCA library is defined with static inline fallbacks, which will allow for unconditional addition of SDCA support in common parts of the code. The design follows a four-step process: 1) Basic information related to Functions is extracted from MIPI DisCo tables and stored in the 'struct sdw_slave'. Devm_ based memory allocation is not allowed at this point prior to a driver probe, so we only store the function node, address and type. 2) When a codec driver probes, it will register subdevices for each Function identified in phase 1) 3) a driver will probe for each subdevice and addition parsing/memory allocation takes place at this level. devm_ based allocation is highly encouraged to make error handling manageable. 4) Before the peripheral device becomes physically attached, register access is not permitted and the regmaps are cache-only. When peripheral device is enumerated, the bus level uses the 'update_status' notification; after optional device-level initialization, the codec driver will notify each of the subdevices so that they can start interacting with the hardware. Note that the context extracted in 1) should be arguably be handled completely in the codec driver probe. That would however make it difficult to use the ACPI information for machine quirks, and e.g. select different machine driver and topologies as done for the RT712_VB handling later in the series. To make the implementation of quirks simpler, this patchset extracts a minimal amount of context (interface revision and number/type of Functions) before the codec driver probe, and stores this context in the scope of the 'struct sdw_slave'. The SDCA library can also be used in a vendor-specific driver without creating subdevices, e.g. to retrieve the 'initialization-table' values to write platform-specific values as needed. For more technical details, the SDCA specification is available for public downloads at https://www.mipi.org/mipi-sdca-v1-0-download Signed-off-by: Pierre-Louis Bossart Reviewed-by: Péter Ujfalusi Signed-off-by: Bard Liao Link: https://patch.msgid.link/20241016102333.294448-4-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- include/linux/soundwire/sdw.h | 3 + include/sound/sdca.h | 54 +++++++++++++ include/sound/sdca_function.h | 55 +++++++++++++ sound/soc/Kconfig | 1 + sound/soc/Makefile | 1 + sound/soc/sdca/Kconfig | 11 +++ sound/soc/sdca/Makefile | 5 ++ sound/soc/sdca/sdca_device.c | 24 ++++++ sound/soc/sdca/sdca_functions.c | 173 ++++++++++++++++++++++++++++++++++++++++ 9 files changed, 327 insertions(+) create mode 100644 include/sound/sdca.h create mode 100644 include/sound/sdca_function.h create mode 100644 sound/soc/sdca/Kconfig create mode 100644 sound/soc/sdca/Makefile create mode 100644 sound/soc/sdca/sdca_device.c create mode 100644 sound/soc/sdca/sdca_functions.c (limited to 'include') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 283c8bfdde49..49d690f3d29a 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -10,6 +10,7 @@ #include #include #include +#include struct sdw_bus; struct sdw_slave; @@ -663,6 +664,7 @@ struct sdw_slave_ops { * @is_mockup_device: status flag used to squelch errors in the command/control * protocol for SoundWire mockup devices * @sdw_dev_lock: mutex used to protect callbacks/remove races + * @sdca_data: structure containing all device data for SDCA helpers */ struct sdw_slave { struct sdw_slave_id id; @@ -686,6 +688,7 @@ struct sdw_slave { bool first_interrupt_done; bool is_mockup_device; struct mutex sdw_dev_lock; /* protect callbacks/remove races */ + struct sdca_device_data sdca_data; }; #define dev_to_sdw_dev(_dev) container_of(_dev, struct sdw_slave, dev) diff --git a/include/sound/sdca.h b/include/sound/sdca.h new file mode 100644 index 000000000000..34473ca4c789 --- /dev/null +++ b/include/sound/sdca.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ +/* + * The MIPI SDCA specification is available for public downloads at + * https://www.mipi.org/mipi-sdca-v1-0-download + * + * Copyright(c) 2024 Intel Corporation + */ + +#ifndef __SDCA_H__ +#define __SDCA_H__ + +struct sdw_slave; + +#define SDCA_MAX_FUNCTION_COUNT 8 + +/** + * sdca_device_desc - short descriptor for an SDCA Function + * @adr: ACPI address (used for SDCA register access) + * @type: Function topology type + * @name: human-readable string + */ +struct sdca_function_desc { + u64 adr; + u32 type; + const char *name; +}; + +/** + * sdca_device_data - structure containing all SDCA related information + * @sdca_interface_revision: value read from _DSD property, mainly to check + * for changes between silicon versions + * @num_functions: total number of supported SDCA functions. Invalid/unsupported + * functions will be skipped. + * @sdca_func: array of function descriptors + */ +struct sdca_device_data { + u32 interface_revision; + int num_functions; + struct sdca_function_desc sdca_func[SDCA_MAX_FUNCTION_COUNT]; +}; + +#if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_SND_SOC_SDCA) + +void sdca_lookup_functions(struct sdw_slave *slave); +void sdca_lookup_interface_revision(struct sdw_slave *slave); + +#else + +static inline void sdca_lookup_functions(struct sdw_slave *slave) {} +static inline void sdca_lookup_interface_revision(struct sdw_slave *slave) {} + +#endif + +#endif diff --git a/include/sound/sdca_function.h b/include/sound/sdca_function.h new file mode 100644 index 000000000000..a01eec86b9a6 --- /dev/null +++ b/include/sound/sdca_function.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ +/* + * The MIPI SDCA specification is available for public downloads at + * https://www.mipi.org/mipi-sdca-v1-0-download + * + * Copyright(c) 2024 Intel Corporation + */ + +#ifndef __SDCA_FUNCTION_H__ +#define __SDCA_FUNCTION_H__ + +/* + * SDCA Function Types from SDCA specification v1.0a Section 5.1.2 + * all Function types not described are reserved + * Note that SIMPLE_AMP, SIMPLE_MIC and SIMPLE_JACK Function Types + * are NOT defined in SDCA 1.0a, but they were defined in earlier + * drafts and are planned for 1.1. + */ + +enum sdca_function_type { + SDCA_FUNCTION_TYPE_SMART_AMP = 0x01, /* Amplifier with protection features */ + SDCA_FUNCTION_TYPE_SIMPLE_AMP = 0x02, /* subset of SmartAmp */ + SDCA_FUNCTION_TYPE_SMART_MIC = 0x03, /* Smart microphone with acoustic triggers */ + SDCA_FUNCTION_TYPE_SIMPLE_MIC = 0x04, /* subset of SmartMic */ + SDCA_FUNCTION_TYPE_SPEAKER_MIC = 0x05, /* Combination of SmartMic and SmartAmp */ + SDCA_FUNCTION_TYPE_UAJ = 0x06, /* 3.5mm Universal Audio jack */ + SDCA_FUNCTION_TYPE_RJ = 0x07, /* Retaskable jack */ + SDCA_FUNCTION_TYPE_SIMPLE_JACK = 0x08, /* Subset of UAJ */ + SDCA_FUNCTION_TYPE_HID = 0x0A, /* Human Interface Device, for e.g. buttons */ + SDCA_FUNCTION_TYPE_IMP_DEF = 0x1F, /* Implementation-defined function */ +}; + +/* Human-readable names used for kernel logs and Function device registration/bind */ +#define SDCA_FUNCTION_TYPE_SMART_AMP_NAME "SmartAmp" +#define SDCA_FUNCTION_TYPE_SIMPLE_AMP_NAME "SimpleAmp" +#define SDCA_FUNCTION_TYPE_SMART_MIC_NAME "SmartMic" +#define SDCA_FUNCTION_TYPE_SIMPLE_MIC_NAME "SimpleMic" +#define SDCA_FUNCTION_TYPE_SPEAKER_MIC_NAME "SpeakerMic" +#define SDCA_FUNCTION_TYPE_UAJ_NAME "UAJ" +#define SDCA_FUNCTION_TYPE_RJ_NAME "RJ" +#define SDCA_FUNCTION_TYPE_SIMPLE_NAME "SimpleJack" +#define SDCA_FUNCTION_TYPE_HID_NAME "HID" + +enum sdca_entity0_controls { + SDCA_CONTROL_ENTITY_0_COMMIT_GROUP_MASK = 0x01, + SDCA_CONTROL_ENTITY_0_INTSTAT_CLEAR = 0x02, + SDCA_CONTROL_ENTITY_0_INT_ENABLE = 0x03, + SDCA_CONTROL_ENTITY_0_FUNCTION_SDCA_VERSION = 0x04, + SDCA_CONTROL_ENTITY_0_FUNCTION_TOPOLOGY = 0x05, + SDCA_CONTROL_ENTITY_0_FUNCTION_MANUFACTURER_ID = 0x06, + SDCA_CONTROL_ENTITY_0_FUNCTION_ID = 0x07, + SDCA_CONTROL_ENTITY_0_FUNCTION_VERSION = 0x08 +}; + +#endif diff --git a/sound/soc/Kconfig b/sound/soc/Kconfig index e87bd15a8b43..8e01b421fe8d 100644 --- a/sound/soc/Kconfig +++ b/sound/soc/Kconfig @@ -108,6 +108,7 @@ source "sound/soc/pxa/Kconfig" source "sound/soc/qcom/Kconfig" source "sound/soc/rockchip/Kconfig" source "sound/soc/samsung/Kconfig" +source "sound/soc/sdca/Kconfig" source "sound/soc/sh/Kconfig" source "sound/soc/sof/Kconfig" source "sound/soc/spear/Kconfig" diff --git a/sound/soc/Makefile b/sound/soc/Makefile index 775bb38c2ed4..5307b0b62a93 100644 --- a/sound/soc/Makefile +++ b/sound/soc/Makefile @@ -61,6 +61,7 @@ obj-$(CONFIG_SND_SOC) += pxa/ obj-$(CONFIG_SND_SOC) += qcom/ obj-$(CONFIG_SND_SOC) += rockchip/ obj-$(CONFIG_SND_SOC) += samsung/ +obj-$(CONFIG_SND_SOC) += sdca/ obj-$(CONFIG_SND_SOC) += sh/ obj-$(CONFIG_SND_SOC) += sof/ obj-$(CONFIG_SND_SOC) += spear/ diff --git a/sound/soc/sdca/Kconfig b/sound/soc/sdca/Kconfig new file mode 100644 index 000000000000..07f6822fa614 --- /dev/null +++ b/sound/soc/sdca/Kconfig @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0-only + +config SND_SOC_SDCA + tristate "ASoC SDCA library" + depends on ACPI + help + This option enables support for the MIPI SoundWire Device + Class for Audio (SDCA). + +config SND_SOC_SDCA_OPTIONAL + def_tristate SND_SOC_SDCA || !SND_SOC_SDCA diff --git a/sound/soc/sdca/Makefile b/sound/soc/sdca/Makefile new file mode 100644 index 000000000000..c296bd5a0a7c --- /dev/null +++ b/sound/soc/sdca/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0-only + +snd-soc-sdca-objs := sdca_functions.o sdca_device.o + +obj-$(CONFIG_SND_SOC_SDCA) += snd-soc-sdca.o diff --git a/sound/soc/sdca/sdca_device.c b/sound/soc/sdca/sdca_device.c new file mode 100644 index 000000000000..58f5f6f0f723 --- /dev/null +++ b/sound/soc/sdca/sdca_device.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) +// Copyright(c) 2024 Intel Corporation + +/* + * The MIPI SDCA specification is available for public downloads at + * https://www.mipi.org/mipi-sdca-v1-0-download + */ + +#include +#include +#include + +void sdca_lookup_interface_revision(struct sdw_slave *slave) +{ + struct fwnode_handle *fwnode = slave->dev.fwnode; + + /* + * if this property is not present, then the sdca_interface_revision will + * remain zero, which will be considered as 'not defined' or 'invalid'. + */ + fwnode_property_read_u32(fwnode, "mipi-sdw-sdca-interface-revision", + &slave->sdca_data.interface_revision); +} +EXPORT_SYMBOL_NS(sdca_lookup_interface_revision, SND_SOC_SDCA); diff --git a/sound/soc/sdca/sdca_functions.c b/sound/soc/sdca/sdca_functions.c new file mode 100644 index 000000000000..a6ad57430dd4 --- /dev/null +++ b/sound/soc/sdca/sdca_functions.c @@ -0,0 +1,173 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) +// Copyright(c) 2024 Intel Corporation + +/* + * The MIPI SDCA specification is available for public downloads at + * https://www.mipi.org/mipi-sdca-v1-0-download + */ + +#include +#include +#include +#include + +static int patch_sdca_function_type(struct device *dev, + u32 interface_revision, + u32 *function_type, + const char **function_name) +{ + unsigned long function_type_patch = 0; + + /* + * Unfortunately early SDCA specifications used different indices for Functions, + * for backwards compatibility we have to reorder the values found + */ + if (interface_revision >= 0x0801) + goto skip_early_draft_order; + + switch (*function_type) { + case 1: + function_type_patch = SDCA_FUNCTION_TYPE_SMART_AMP; + break; + case 2: + function_type_patch = SDCA_FUNCTION_TYPE_SMART_MIC; + break; + case 3: + function_type_patch = SDCA_FUNCTION_TYPE_SPEAKER_MIC; + break; + case 4: + function_type_patch = SDCA_FUNCTION_TYPE_UAJ; + break; + case 5: + function_type_patch = SDCA_FUNCTION_TYPE_RJ; + break; + case 6: + function_type_patch = SDCA_FUNCTION_TYPE_HID; + break; + default: + dev_warn(dev, "%s: SDCA version %#x unsupported function type %d, skipped\n", + __func__, interface_revision, *function_type); + return -EINVAL; + } + +skip_early_draft_order: + if (function_type_patch) + *function_type = function_type_patch; + + /* now double-check the values */ + switch (*function_type) { + case SDCA_FUNCTION_TYPE_SMART_AMP: + *function_name = SDCA_FUNCTION_TYPE_SMART_AMP_NAME; + break; + case SDCA_FUNCTION_TYPE_SMART_MIC: + *function_name = SDCA_FUNCTION_TYPE_SMART_MIC_NAME; + break; + case SDCA_FUNCTION_TYPE_UAJ: + *function_name = SDCA_FUNCTION_TYPE_UAJ_NAME; + break; + case SDCA_FUNCTION_TYPE_HID: + *function_name = SDCA_FUNCTION_TYPE_HID_NAME; + break; + case SDCA_FUNCTION_TYPE_SIMPLE_AMP: + case SDCA_FUNCTION_TYPE_SIMPLE_MIC: + case SDCA_FUNCTION_TYPE_SPEAKER_MIC: + case SDCA_FUNCTION_TYPE_RJ: + case SDCA_FUNCTION_TYPE_IMP_DEF: + dev_warn(dev, "%s: found unsupported SDCA function type %d, skipped\n", + __func__, *function_type); + return -EINVAL; + default: + dev_err(dev, "%s: found invalid SDCA function type %d, skipped\n", + __func__, *function_type); + return -EINVAL; + } + + dev_info(dev, "%s: found SDCA function %s (type %d)\n", + __func__, *function_name, *function_type); + + return 0; +} + +static int find_sdca_function(struct acpi_device *adev, void *data) +{ + struct fwnode_handle *function_node = acpi_fwnode_handle(adev); + struct sdca_device_data *sdca_data = data; + struct device *dev = &adev->dev; + struct fwnode_handle *control5; /* used to identify function type */ + const char *function_name; + u32 function_type; + int func_index; + u64 addr; + int ret; + + if (sdca_data->num_functions >= SDCA_MAX_FUNCTION_COUNT) { + dev_err(dev, "%s: maximum number of functions exceeded\n", __func__); + return -EINVAL; + } + + /* + * The number of functions cannot exceed 8, we could use + * acpi_get_local_address() but the value is stored as u64 so + * we might as well avoid casts and intermediate levels + */ + ret = acpi_get_local_u64_address(adev->handle, &addr); + if (ret < 0) + return ret; + + if (!addr) { + dev_err(dev, "%s: no addr\n", __func__); + return -ENODEV; + } + + /* + * Extracting the topology type for an SDCA function is a + * convoluted process. + * The Function type is only visible as a result of a read + * from a control. In theory this would mean reading from the hardware, + * but the SDCA/DisCo specs defined the notion of "DC value" - a constant + * represented with a DSD subproperty. + * Drivers have to query the properties for the control + * SDCA_CONTROL_ENTITY_0_FUNCTION_TOPOLOGY (0x05) + */ + control5 = fwnode_get_named_child_node(function_node, + "mipi-sdca-control-0x5-subproperties"); + if (!control5) + return -ENODEV; + + ret = fwnode_property_read_u32(control5, "mipi-sdca-control-dc-value", + &function_type); + + fwnode_handle_put(control5); + + if (ret < 0) { + dev_err(dev, "%s: the function type can only be determined from ACPI information\n", + __func__); + return ret; + } + + ret = patch_sdca_function_type(dev, sdca_data->interface_revision, + &function_type, &function_name); + if (ret < 0) + return ret; + + /* store results */ + func_index = sdca_data->num_functions; + sdca_data->sdca_func[func_index].adr = addr; + sdca_data->sdca_func[func_index].type = function_type; + sdca_data->sdca_func[func_index].name = function_name; + sdca_data->num_functions++; + + return 0; +} + +void sdca_lookup_functions(struct sdw_slave *slave) +{ + struct device *dev = &slave->dev; + struct acpi_device *adev = to_acpi_device_node(dev->fwnode); + + acpi_dev_for_each_child(adev, find_sdca_function, &slave->sdca_data); +} +EXPORT_SYMBOL_NS(sdca_lookup_functions, SND_SOC_SDCA); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("SDCA library"); -- cgit v1.2.3 From fdb220399177177917dce52063b326a191a35a02 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 16 Oct 2024 18:23:27 +0800 Subject: ASoC: SDCA: add quirk function for RT712_VB match MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a generic match function for quirks, chances are we are going to have lots of those... Signed-off-by: Pierre-Louis Bossart Reviewed-by: Péter Ujfalusi Signed-off-by: Bard Liao Link: https://patch.msgid.link/20241016102333.294448-6-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- include/sound/sdca.h | 10 +++++++++- sound/soc/sdca/sdca_device.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/sdca.h b/include/sound/sdca.h index 34473ca4c789..7e138229e8f3 100644 --- a/include/sound/sdca.h +++ b/include/sound/sdca.h @@ -39,16 +39,24 @@ struct sdca_device_data { struct sdca_function_desc sdca_func[SDCA_MAX_FUNCTION_COUNT]; }; +enum sdca_quirk { + SDCA_QUIRKS_RT712_VB, +}; + #if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_SND_SOC_SDCA) void sdca_lookup_functions(struct sdw_slave *slave); void sdca_lookup_interface_revision(struct sdw_slave *slave); +bool sdca_device_quirk_match(struct sdw_slave *slave, enum sdca_quirk quirk); #else static inline void sdca_lookup_functions(struct sdw_slave *slave) {} static inline void sdca_lookup_interface_revision(struct sdw_slave *slave) {} - +static inline bool sdca_device_quirk_match(struct sdw_slave *slave, enum sdca_quirk quirk) +{ + return false; +} #endif #endif diff --git a/sound/soc/sdca/sdca_device.c b/sound/soc/sdca/sdca_device.c index 58f5f6f0f723..c44dc21cb634 100644 --- a/sound/soc/sdca/sdca_device.c +++ b/sound/soc/sdca/sdca_device.c @@ -9,6 +9,7 @@ #include #include #include +#include void sdca_lookup_interface_revision(struct sdw_slave *slave) { @@ -22,3 +23,45 @@ void sdca_lookup_interface_revision(struct sdw_slave *slave) &slave->sdca_data.interface_revision); } EXPORT_SYMBOL_NS(sdca_lookup_interface_revision, SND_SOC_SDCA); + +static bool sdca_device_quirk_rt712_vb(struct sdw_slave *slave) +{ + struct sdw_slave_id *id = &slave->id; + int i; + + /* + * The RT712_VA relies on the v06r04 draft, and the + * RT712_VB on a more recent v08r01 draft. + */ + if (slave->sdca_data.interface_revision < 0x0801) + return false; + + if (id->mfg_id != 0x025d) + return false; + + if (id->part_id != 0x712 && + id->part_id != 0x713 && + id->part_id != 0x716 && + id->part_id != 0x717) + return false; + + for (i = 0; i < slave->sdca_data.num_functions; i++) { + if (slave->sdca_data.sdca_func[i].type == + SDCA_FUNCTION_TYPE_SMART_MIC) + return true; + } + + return false; +} + +bool sdca_device_quirk_match(struct sdw_slave *slave, enum sdca_quirk quirk) +{ + switch (quirk) { + case SDCA_QUIRKS_RT712_VB: + return sdca_device_quirk_rt712_vb(slave); + default: + break; + } + return false; +} +EXPORT_SYMBOL_NS(sdca_device_quirk_match, SND_SOC_SDCA); -- cgit v1.2.3 From dcf4694f200a67784e053eb5d1d70a191761ff4f Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 16 Oct 2024 18:23:29 +0800 Subject: ASoC: soc-acpi: introduce new 'machine check' callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The existing machine_quirk() returns a pointer to a soc_acpi_mach structure. For SoundWire/SDCA support, we need a slightly different functionality where a quirk function either validates or NACKs an initial selection, based on additional firmware/DMI information. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Péter Ujfalusi Signed-off-by: Bard Liao Link: https://patch.msgid.link/20241016102333.294448-8-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- include/sound/soc-acpi.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/sound/soc-acpi.h b/include/sound/soc-acpi.h index ac7f9e791ee1..72e371a21767 100644 --- a/include/sound/soc-acpi.h +++ b/include/sound/soc-acpi.h @@ -185,6 +185,10 @@ struct snd_soc_acpi_link_adr { * ACPI ID alone is not sufficient, wrong or misleading * @quirk_data: data used to uniquely identify a machine, usually a list of * audio codecs whose presence if checked with ACPI + * @machine_check: pointer to quirk function. The functionality is similar to + * the use of @machine_quirk, except that the return value is a boolean: the intent + * is to skip a machine if the additional hardware/firmware verification invalidates + * the initial selection in the snd_soc_acpi_mach table. * @pdata: intended for platform data or machine specific-ops. This structure * is not constant since this field may be updated at run-time * @sof_tplg_filename: Sound Open Firmware topology file name, if enabled @@ -203,6 +207,7 @@ struct snd_soc_acpi_mach { const char *board; struct snd_soc_acpi_mach * (*machine_quirk)(void *arg); const void *quirk_data; + bool (*machine_check)(void *arg); void *pdata; struct snd_soc_acpi_mach_params mach_params; const char *sof_tplg_filename; -- cgit v1.2.3 From bae7aff5818b89d5d72f6ab0135c8faf3fa54318 Mon Sep 17 00:00:00 2001 From: Théo Lebrun Date: Mon, 7 Oct 2024 15:49:17 +0200 Subject: dt-bindings: clock: add Mobileye EyeQ6L/EyeQ6H clock indexes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add #defines for Mobileye EyeQ6L and EyeQ6H SoC clocks. Constant prefixes are: - EQ6LC_PLL_: EyeQ6L clock PLLs - EQ6HC_SOUTH_PLL_: EyeQ6H south OLB PLLs - EQ6HC_SOUTH_DIV_: EyeQ6H south OLB divider clocks - EQ6HC_ACC_PLL_: EyeQ6H accelerator OLB PLLs Acked-by: Krzysztof Kozlowski Signed-off-by: Théo Lebrun Link: https://lore.kernel.org/r/20241007-mbly-clk-v5-2-e9d8994269cb@bootlin.com Signed-off-by: Stephen Boyd --- include/dt-bindings/clock/mobileye,eyeq5-clk.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/mobileye,eyeq5-clk.h b/include/dt-bindings/clock/mobileye,eyeq5-clk.h index 26d8930335e4..b433c1772c28 100644 --- a/include/dt-bindings/clock/mobileye,eyeq5-clk.h +++ b/include/dt-bindings/clock/mobileye,eyeq5-clk.h @@ -19,4 +19,25 @@ #define EQ5C_DIV_OSPI 10 +#define EQ6LC_PLL_DDR 0 +#define EQ6LC_PLL_CPU 1 +#define EQ6LC_PLL_PER 2 +#define EQ6LC_PLL_VDI 3 + +#define EQ6HC_SOUTH_PLL_VDI 0 +#define EQ6HC_SOUTH_PLL_PCIE 1 +#define EQ6HC_SOUTH_PLL_PER 2 +#define EQ6HC_SOUTH_PLL_ISP 3 + +#define EQ6HC_SOUTH_DIV_EMMC 4 +#define EQ6HC_SOUTH_DIV_OSPI_REF 5 +#define EQ6HC_SOUTH_DIV_OSPI_SYS 6 +#define EQ6HC_SOUTH_DIV_TSU 7 + +#define EQ6HC_ACC_PLL_XNN 0 +#define EQ6HC_ACC_PLL_VMP 1 +#define EQ6HC_ACC_PLL_PMA 2 +#define EQ6HC_ACC_PLL_MPC 3 +#define EQ6HC_ACC_PLL_NOC 4 + #endif -- cgit v1.2.3 From 6a136805e3c1532d2414b298c024429943cfd482 Mon Sep 17 00:00:00 2001 From: Théo Lebrun Date: Mon, 7 Oct 2024 15:49:18 +0200 Subject: clk: divider: Introduce CLK_DIVIDER_EVEN_INTEGERS flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add CLK_DIVIDER_EVEN_INTEGERS flag to support divisor of 2, 4, 6, etc. The same divisor can be done using a table, which would be big and wasteful for a clock dividor of width 8 (256 entries). Require increasing flags size from u8 to u16 because CLK_DIVIDER_EVEN_INTEGERS is the eighth flag. u16 is used inside struct clk_divider; `unsigned long` is used for function arguments. Signed-off-by: Théo Lebrun Link: https://lore.kernel.org/r/20241007-mbly-clk-v5-3-e9d8994269cb@bootlin.com Signed-off-by: Stephen Boyd --- drivers/clk/clk-divider.c | 16 ++++++++++++---- include/linux/clk-provider.h | 15 ++++++++++----- 2 files changed, 22 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/clk/clk-divider.c b/drivers/clk/clk-divider.c index a2c2b5203b0a..c1f426b8a504 100644 --- a/drivers/clk/clk-divider.c +++ b/drivers/clk/clk-divider.c @@ -72,6 +72,8 @@ static unsigned int _get_maxdiv(const struct clk_div_table *table, u8 width, return clk_div_mask(width); if (flags & CLK_DIVIDER_POWER_OF_TWO) return 1 << clk_div_mask(width); + if (flags & CLK_DIVIDER_EVEN_INTEGERS) + return 2 * (clk_div_mask(width) + 1); if (table) return _get_table_maxdiv(table, width); return clk_div_mask(width) + 1; @@ -97,6 +99,8 @@ static unsigned int _get_div(const struct clk_div_table *table, return 1 << val; if (flags & CLK_DIVIDER_MAX_AT_ZERO) return val ? val : clk_div_mask(width) + 1; + if (flags & CLK_DIVIDER_EVEN_INTEGERS) + return 2 * (val + 1); if (table) return _get_table_div(table, val); return val + 1; @@ -122,6 +126,8 @@ static unsigned int _get_val(const struct clk_div_table *table, return __ffs(div); if (flags & CLK_DIVIDER_MAX_AT_ZERO) return (div == clk_div_mask(width) + 1) ? 0 : div; + if (flags & CLK_DIVIDER_EVEN_INTEGERS) + return (div >> 1) - 1; if (table) return _get_table_val(table, div); return div - 1; @@ -538,7 +544,8 @@ struct clk_hw *__clk_hw_register_divider(struct device *dev, struct device_node *np, const char *name, const char *parent_name, const struct clk_hw *parent_hw, const struct clk_parent_data *parent_data, unsigned long flags, - void __iomem *reg, u8 shift, u8 width, u8 clk_divider_flags, + void __iomem *reg, u8 shift, u8 width, + unsigned long clk_divider_flags, const struct clk_div_table *table, spinlock_t *lock) { struct clk_divider *div; @@ -610,8 +617,8 @@ EXPORT_SYMBOL_GPL(__clk_hw_register_divider); struct clk *clk_register_divider_table(struct device *dev, const char *name, const char *parent_name, unsigned long flags, void __iomem *reg, u8 shift, u8 width, - u8 clk_divider_flags, const struct clk_div_table *table, - spinlock_t *lock) + unsigned long clk_divider_flags, + const struct clk_div_table *table, spinlock_t *lock) { struct clk_hw *hw; @@ -664,7 +671,8 @@ struct clk_hw *__devm_clk_hw_register_divider(struct device *dev, struct device_node *np, const char *name, const char *parent_name, const struct clk_hw *parent_hw, const struct clk_parent_data *parent_data, unsigned long flags, - void __iomem *reg, u8 shift, u8 width, u8 clk_divider_flags, + void __iomem *reg, u8 shift, u8 width, + unsigned long clk_divider_flags, const struct clk_div_table *table, spinlock_t *lock) { struct clk_hw **ptr, *hw; diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 7e43caabb54b..dbe793964c24 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -689,13 +689,15 @@ struct clk_div_table { * CLK_DIVIDER_BIG_ENDIAN - By default little endian register accesses are used * for the divider register. Setting this flag makes the register accesses * big endian. + * CLK_DIVIDER_EVEN_INTEGERS - clock divisor is 2, 4, 6, 8, 10, etc. + * Formula is 2 * (value read from hardware + 1). */ struct clk_divider { struct clk_hw hw; void __iomem *reg; u8 shift; u8 width; - u8 flags; + u16 flags; const struct clk_div_table *table; spinlock_t *lock; }; @@ -711,6 +713,7 @@ struct clk_divider { #define CLK_DIVIDER_READ_ONLY BIT(5) #define CLK_DIVIDER_MAX_AT_ZERO BIT(6) #define CLK_DIVIDER_BIG_ENDIAN BIT(7) +#define CLK_DIVIDER_EVEN_INTEGERS BIT(8) extern const struct clk_ops clk_divider_ops; extern const struct clk_ops clk_divider_ro_ops; @@ -740,19 +743,21 @@ struct clk_hw *__clk_hw_register_divider(struct device *dev, struct device_node *np, const char *name, const char *parent_name, const struct clk_hw *parent_hw, const struct clk_parent_data *parent_data, unsigned long flags, - void __iomem *reg, u8 shift, u8 width, u8 clk_divider_flags, + void __iomem *reg, u8 shift, u8 width, + unsigned long clk_divider_flags, const struct clk_div_table *table, spinlock_t *lock); struct clk_hw *__devm_clk_hw_register_divider(struct device *dev, struct device_node *np, const char *name, const char *parent_name, const struct clk_hw *parent_hw, const struct clk_parent_data *parent_data, unsigned long flags, - void __iomem *reg, u8 shift, u8 width, u8 clk_divider_flags, + void __iomem *reg, u8 shift, u8 width, + unsigned long clk_divider_flags, const struct clk_div_table *table, spinlock_t *lock); struct clk *clk_register_divider_table(struct device *dev, const char *name, const char *parent_name, unsigned long flags, void __iomem *reg, u8 shift, u8 width, - u8 clk_divider_flags, const struct clk_div_table *table, - spinlock_t *lock); + unsigned long clk_divider_flags, + const struct clk_div_table *table, spinlock_t *lock); /** * clk_register_divider - register a divider clock with the clock framework * @dev: device registering this clock -- cgit v1.2.3 From ea1cca026842bc83af92785e61a4433bcc71ea70 Mon Sep 17 00:00:00 2001 From: Yassine Oudjana Date: Thu, 17 Oct 2024 10:17:05 +0300 Subject: dt-bindings: clock: Add MediaTek MT6735 clock and reset bindings Add clock definitions for the main clock and reset controllers of MT6735 (apmixedsys, topckgen, infracfg and pericfg). Signed-off-by: Yassine Oudjana Reviewed-by: Conor Dooley Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20241017071708.38663-2-y.oudjana@protonmail.com Signed-off-by: Stephen Boyd --- .../bindings/clock/mediatek,apmixedsys.yaml | 4 +- .../bindings/clock/mediatek,infracfg.yaml | 8 ++- .../bindings/clock/mediatek,pericfg.yaml | 1 + .../bindings/clock/mediatek,topckgen.yaml | 4 +- MAINTAINERS | 12 ++++ .../dt-bindings/clock/mediatek,mt6735-apmixedsys.h | 16 +++++ .../dt-bindings/clock/mediatek,mt6735-infracfg.h | 25 +++++++ .../dt-bindings/clock/mediatek,mt6735-pericfg.h | 37 ++++++++++ .../dt-bindings/clock/mediatek,mt6735-topckgen.h | 79 ++++++++++++++++++++++ .../dt-bindings/reset/mediatek,mt6735-infracfg.h | 27 ++++++++ .../dt-bindings/reset/mediatek,mt6735-pericfg.h | 31 +++++++++ 11 files changed, 239 insertions(+), 5 deletions(-) create mode 100644 include/dt-bindings/clock/mediatek,mt6735-apmixedsys.h create mode 100644 include/dt-bindings/clock/mediatek,mt6735-infracfg.h create mode 100644 include/dt-bindings/clock/mediatek,mt6735-pericfg.h create mode 100644 include/dt-bindings/clock/mediatek,mt6735-topckgen.h create mode 100644 include/dt-bindings/reset/mediatek,mt6735-infracfg.h create mode 100644 include/dt-bindings/reset/mediatek,mt6735-pericfg.h (limited to 'include') diff --git a/Documentation/devicetree/bindings/clock/mediatek,apmixedsys.yaml b/Documentation/devicetree/bindings/clock/mediatek,apmixedsys.yaml index db5f48e4dd15..591a9e862c7d 100644 --- a/Documentation/devicetree/bindings/clock/mediatek,apmixedsys.yaml +++ b/Documentation/devicetree/bindings/clock/mediatek,apmixedsys.yaml @@ -12,7 +12,8 @@ maintainers: description: The Mediatek apmixedsys controller provides PLLs to the system. - The clock values can be found in . + The clock values can be found in + and . properties: compatible: @@ -34,6 +35,7 @@ properties: - enum: - mediatek,mt2701-apmixedsys - mediatek,mt2712-apmixedsys + - mediatek,mt6735-apmixedsys - mediatek,mt6765-apmixedsys - mediatek,mt6779-apmixed - mediatek,mt6795-apmixedsys diff --git a/Documentation/devicetree/bindings/clock/mediatek,infracfg.yaml b/Documentation/devicetree/bindings/clock/mediatek,infracfg.yaml index 252c46d316ee..d1d30700d9b0 100644 --- a/Documentation/devicetree/bindings/clock/mediatek,infracfg.yaml +++ b/Documentation/devicetree/bindings/clock/mediatek,infracfg.yaml @@ -11,9 +11,10 @@ maintainers: description: The Mediatek infracfg controller provides various clocks and reset outputs - to the system. The clock values can be found in , - and reset values in and - . + to the system. The clock values can be found in + and , and reset values in + , and + . properties: compatible: @@ -22,6 +23,7 @@ properties: - enum: - mediatek,mt2701-infracfg - mediatek,mt2712-infracfg + - mediatek,mt6735-infracfg - mediatek,mt6765-infracfg - mediatek,mt6795-infracfg - mediatek,mt6779-infracfg_ao diff --git a/Documentation/devicetree/bindings/clock/mediatek,pericfg.yaml b/Documentation/devicetree/bindings/clock/mediatek,pericfg.yaml index 2f06baecfd23..b98cf45efe2f 100644 --- a/Documentation/devicetree/bindings/clock/mediatek,pericfg.yaml +++ b/Documentation/devicetree/bindings/clock/mediatek,pericfg.yaml @@ -20,6 +20,7 @@ properties: - enum: - mediatek,mt2701-pericfg - mediatek,mt2712-pericfg + - mediatek,mt6735-pericfg - mediatek,mt6765-pericfg - mediatek,mt6795-pericfg - mediatek,mt7622-pericfg diff --git a/Documentation/devicetree/bindings/clock/mediatek,topckgen.yaml b/Documentation/devicetree/bindings/clock/mediatek,topckgen.yaml index bdf3b55bd56f..c080fb0a1618 100644 --- a/Documentation/devicetree/bindings/clock/mediatek,topckgen.yaml +++ b/Documentation/devicetree/bindings/clock/mediatek,topckgen.yaml @@ -12,7 +12,8 @@ maintainers: description: The Mediatek topckgen controller provides various clocks to the system. - The clock values can be found in . + The clock values can be found in and + . properties: compatible: @@ -31,6 +32,7 @@ properties: - enum: - mediatek,mt2701-topckgen - mediatek,mt2712-topckgen + - mediatek,mt6735-topckgen - mediatek,mt6765-topckgen - mediatek,mt6779-topckgen - mediatek,mt6795-topckgen diff --git a/MAINTAINERS b/MAINTAINERS index c27f3190737f..c5fdf3cccc9f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14528,6 +14528,18 @@ S: Maintained F: Documentation/devicetree/bindings/mmc/mtk-sd.yaml F: drivers/mmc/host/mtk-sd.c +MEDIATEK MT6735 CLOCK & RESET DRIVERS +M: Yassine Oudjana +L: linux-clk@vger.kernel.org +L: linux-mediatek@lists.infradead.org (moderated for non-subscribers) +S: Maintained +F: include/dt-bindings/clock/mediatek,mt6735-apmixedsys.h +F: include/dt-bindings/clock/mediatek,mt6735-infracfg.h +F: include/dt-bindings/clock/mediatek,mt6735-pericfg.h +F: include/dt-bindings/clock/mediatek,mt6735-topckgen.h +F: include/dt-bindings/reset/mediatek,mt6735-infracfg.h +F: include/dt-bindings/reset/mediatek,mt6735-pericfg.h + MEDIATEK MT76 WIRELESS LAN DRIVER M: Felix Fietkau M: Lorenzo Bianconi diff --git a/include/dt-bindings/clock/mediatek,mt6735-apmixedsys.h b/include/dt-bindings/clock/mediatek,mt6735-apmixedsys.h new file mode 100644 index 000000000000..b4705204409c --- /dev/null +++ b/include/dt-bindings/clock/mediatek,mt6735-apmixedsys.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ + +#ifndef _DT_BINDINGS_CLK_MT6735_APMIXEDSYS_H +#define _DT_BINDINGS_CLK_MT6735_APMIXEDSYS_H + +#define CLK_APMIXED_ARMPLL 0 +#define CLK_APMIXED_MAINPLL 1 +#define CLK_APMIXED_UNIVPLL 2 +#define CLK_APMIXED_MMPLL 3 +#define CLK_APMIXED_MSDCPLL 4 +#define CLK_APMIXED_VENCPLL 5 +#define CLK_APMIXED_TVDPLL 6 +#define CLK_APMIXED_APLL1 7 +#define CLK_APMIXED_APLL2 8 + +#endif diff --git a/include/dt-bindings/clock/mediatek,mt6735-infracfg.h b/include/dt-bindings/clock/mediatek,mt6735-infracfg.h new file mode 100644 index 000000000000..d8dd51e15637 --- /dev/null +++ b/include/dt-bindings/clock/mediatek,mt6735-infracfg.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ + +#ifndef _DT_BINDINGS_CLK_MT6735_INFRACFG_H +#define _DT_BINDINGS_CLK_MT6735_INFRACFG_H + +#define CLK_INFRA_DBG 0 +#define CLK_INFRA_GCE 1 +#define CLK_INFRA_TRBG 2 +#define CLK_INFRA_CPUM 3 +#define CLK_INFRA_DEVAPC 4 +#define CLK_INFRA_AUDIO 5 +#define CLK_INFRA_GCPU 6 +#define CLK_INFRA_L2C_SRAM 7 +#define CLK_INFRA_M4U 8 +#define CLK_INFRA_CLDMA 9 +#define CLK_INFRA_CONNMCU_BUS 10 +#define CLK_INFRA_KP 11 +#define CLK_INFRA_APXGPT 12 +#define CLK_INFRA_SEJ 13 +#define CLK_INFRA_CCIF0_AP 14 +#define CLK_INFRA_CCIF1_AP 15 +#define CLK_INFRA_PMIC_SPI 16 +#define CLK_INFRA_PMIC_WRAP 17 + +#endif diff --git a/include/dt-bindings/clock/mediatek,mt6735-pericfg.h b/include/dt-bindings/clock/mediatek,mt6735-pericfg.h new file mode 100644 index 000000000000..16bc21bbd95b --- /dev/null +++ b/include/dt-bindings/clock/mediatek,mt6735-pericfg.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ + +#ifndef _DT_BINDINGS_CLK_MT6735_PERICFG_H +#define _DT_BINDINGS_CLK_MT6735_PERICFG_H + +#define CLK_PERI_DISP_PWM 0 +#define CLK_PERI_THERM 1 +#define CLK_PERI_PWM1 2 +#define CLK_PERI_PWM2 3 +#define CLK_PERI_PWM3 4 +#define CLK_PERI_PWM4 5 +#define CLK_PERI_PWM5 6 +#define CLK_PERI_PWM6 7 +#define CLK_PERI_PWM7 8 +#define CLK_PERI_PWM 9 +#define CLK_PERI_USB0 10 +#define CLK_PERI_IRDA 11 +#define CLK_PERI_APDMA 12 +#define CLK_PERI_MSDC30_0 13 +#define CLK_PERI_MSDC30_1 14 +#define CLK_PERI_MSDC30_2 15 +#define CLK_PERI_MSDC30_3 16 +#define CLK_PERI_UART0 17 +#define CLK_PERI_UART1 18 +#define CLK_PERI_UART2 19 +#define CLK_PERI_UART3 20 +#define CLK_PERI_UART4 21 +#define CLK_PERI_BTIF 22 +#define CLK_PERI_I2C0 23 +#define CLK_PERI_I2C1 24 +#define CLK_PERI_I2C2 25 +#define CLK_PERI_I2C3 26 +#define CLK_PERI_AUXADC 27 +#define CLK_PERI_SPI0 28 +#define CLK_PERI_IRTX 29 + +#endif diff --git a/include/dt-bindings/clock/mediatek,mt6735-topckgen.h b/include/dt-bindings/clock/mediatek,mt6735-topckgen.h new file mode 100644 index 000000000000..d4b1e113cc0a --- /dev/null +++ b/include/dt-bindings/clock/mediatek,mt6735-topckgen.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ + +#ifndef _DT_BINDINGS_CLK_MT6735_TOPCKGEN_H +#define _DT_BINDINGS_CLK_MT6735_TOPCKGEN_H + +#define CLK_TOP_AD_SYS_26M_CK 0 +#define CLK_TOP_CLKPH_MCK_O 1 +#define CLK_TOP_DMPLL 2 +#define CLK_TOP_DPI_CK 3 +#define CLK_TOP_WHPLL_AUDIO_CK 4 + +#define CLK_TOP_SYSPLL_D2 5 +#define CLK_TOP_SYSPLL_D3 6 +#define CLK_TOP_SYSPLL_D5 7 +#define CLK_TOP_SYSPLL1_D2 8 +#define CLK_TOP_SYSPLL1_D4 9 +#define CLK_TOP_SYSPLL1_D8 10 +#define CLK_TOP_SYSPLL1_D16 11 +#define CLK_TOP_SYSPLL2_D2 12 +#define CLK_TOP_SYSPLL2_D4 13 +#define CLK_TOP_SYSPLL3_D2 14 +#define CLK_TOP_SYSPLL3_D4 15 +#define CLK_TOP_SYSPLL4_D2 16 +#define CLK_TOP_SYSPLL4_D4 17 +#define CLK_TOP_UNIVPLL_D2 18 +#define CLK_TOP_UNIVPLL_D3 19 +#define CLK_TOP_UNIVPLL_D5 20 +#define CLK_TOP_UNIVPLL_D26 21 +#define CLK_TOP_UNIVPLL1_D2 22 +#define CLK_TOP_UNIVPLL1_D4 23 +#define CLK_TOP_UNIVPLL1_D8 24 +#define CLK_TOP_UNIVPLL2_D2 25 +#define CLK_TOP_UNIVPLL2_D4 26 +#define CLK_TOP_UNIVPLL2_D8 27 +#define CLK_TOP_UNIVPLL3_D2 28 +#define CLK_TOP_UNIVPLL3_D4 29 +#define CLK_TOP_MSDCPLL_D2 30 +#define CLK_TOP_MSDCPLL_D4 31 +#define CLK_TOP_MSDCPLL_D8 32 +#define CLK_TOP_MSDCPLL_D16 33 +#define CLK_TOP_VENCPLL_D3 34 +#define CLK_TOP_TVDPLL_D2 35 +#define CLK_TOP_TVDPLL_D4 36 +#define CLK_TOP_DMPLL_D2 37 +#define CLK_TOP_DMPLL_D4 38 +#define CLK_TOP_DMPLL_D8 39 +#define CLK_TOP_AD_SYS_26M_D2 40 + +#define CLK_TOP_AXI_SEL 41 +#define CLK_TOP_MEM_SEL 42 +#define CLK_TOP_DDRPHY_SEL 43 +#define CLK_TOP_MM_SEL 44 +#define CLK_TOP_PWM_SEL 45 +#define CLK_TOP_VDEC_SEL 46 +#define CLK_TOP_MFG_SEL 47 +#define CLK_TOP_CAMTG_SEL 48 +#define CLK_TOP_UART_SEL 49 +#define CLK_TOP_SPI_SEL 50 +#define CLK_TOP_USB20_SEL 51 +#define CLK_TOP_MSDC50_0_SEL 52 +#define CLK_TOP_MSDC30_0_SEL 53 +#define CLK_TOP_MSDC30_1_SEL 54 +#define CLK_TOP_MSDC30_2_SEL 55 +#define CLK_TOP_MSDC30_3_SEL 56 +#define CLK_TOP_AUDIO_SEL 57 +#define CLK_TOP_AUDINTBUS_SEL 58 +#define CLK_TOP_PMICSPI_SEL 59 +#define CLK_TOP_SCP_SEL 60 +#define CLK_TOP_ATB_SEL 61 +#define CLK_TOP_DPI0_SEL 62 +#define CLK_TOP_SCAM_SEL 63 +#define CLK_TOP_MFG13M_SEL 64 +#define CLK_TOP_AUD1_SEL 65 +#define CLK_TOP_AUD2_SEL 66 +#define CLK_TOP_IRDA_SEL 67 +#define CLK_TOP_IRTX_SEL 68 +#define CLK_TOP_DISPPWM_SEL 69 + +#endif diff --git a/include/dt-bindings/reset/mediatek,mt6735-infracfg.h b/include/dt-bindings/reset/mediatek,mt6735-infracfg.h new file mode 100644 index 000000000000..9df969090377 --- /dev/null +++ b/include/dt-bindings/reset/mediatek,mt6735-infracfg.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ + +#ifndef _DT_BINDINGS_RESET_MT6735_INFRACFG_H +#define _DT_BINDINGS_RESET_MT6735_INFRACFG_H + +#define MT6735_INFRA_RST0_EMI_REG 0 +#define MT6735_INFRA_RST0_DRAMC0_AO 1 +#define MT6735_INFRA_RST0_AP_CIRQ_EINT 2 +#define MT6735_INFRA_RST0_APXGPT 3 +#define MT6735_INFRA_RST0_SCPSYS 4 +#define MT6735_INFRA_RST0_KP 5 +#define MT6735_INFRA_RST0_PMIC_WRAP 6 +#define MT6735_INFRA_RST0_CLDMA_AO_TOP 7 +#define MT6735_INFRA_RST0_USBSIF_TOP 8 +#define MT6735_INFRA_RST0_EMI 9 +#define MT6735_INFRA_RST0_CCIF 10 +#define MT6735_INFRA_RST0_DRAMC0 11 +#define MT6735_INFRA_RST0_EMI_AO_REG 12 +#define MT6735_INFRA_RST0_CCIF_AO 13 +#define MT6735_INFRA_RST0_TRNG 14 +#define MT6735_INFRA_RST0_SYS_CIRQ 15 +#define MT6735_INFRA_RST0_GCE 16 +#define MT6735_INFRA_RST0_M4U 17 +#define MT6735_INFRA_RST0_CCIF1 18 +#define MT6735_INFRA_RST0_CLDMA_TOP_PD 19 + +#endif diff --git a/include/dt-bindings/reset/mediatek,mt6735-pericfg.h b/include/dt-bindings/reset/mediatek,mt6735-pericfg.h new file mode 100644 index 000000000000..a62bb192835a --- /dev/null +++ b/include/dt-bindings/reset/mediatek,mt6735-pericfg.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ + +#ifndef _DT_BINDINGS_RESET_MT6735_PERICFG_H +#define _DT_BINDINGS_RESET_MT6735_PERICFG_H + +#define MT6735_PERI_RST0_UART0 0 +#define MT6735_PERI_RST0_UART1 1 +#define MT6735_PERI_RST0_UART2 2 +#define MT6735_PERI_RST0_UART3 3 +#define MT6735_PERI_RST0_UART4 4 +#define MT6735_PERI_RST0_BTIF 5 +#define MT6735_PERI_RST0_DISP_PWM_PERI 6 +#define MT6735_PERI_RST0_PWM 7 +#define MT6735_PERI_RST0_AUXADC 8 +#define MT6735_PERI_RST0_DMA 9 +#define MT6735_PERI_RST0_IRDA 10 +#define MT6735_PERI_RST0_IRTX 11 +#define MT6735_PERI_RST0_THERM 12 +#define MT6735_PERI_RST0_MSDC2 13 +#define MT6735_PERI_RST0_MSDC3 14 +#define MT6735_PERI_RST0_MSDC0 15 +#define MT6735_PERI_RST0_MSDC1 16 +#define MT6735_PERI_RST0_I2C0 17 +#define MT6735_PERI_RST0_I2C1 18 +#define MT6735_PERI_RST0_I2C2 19 +#define MT6735_PERI_RST0_I2C3 20 +#define MT6735_PERI_RST0_USB 21 + +#define MT6735_PERI_RST1_SPI0 22 + +#endif -- cgit v1.2.3 From 44fcc479a574a4055fb6aed1f786d39999466383 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Thu, 17 Oct 2024 06:37:33 +0200 Subject: power: supply: hwmon: move interface to private header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The interface of power_supply_hwmon.c is only meant to be used by the psy core. Remove it from the public header file and use the private one instead. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20241017-power-supply-cleanups-v2-1-cb0f5deab088@weissschuh.net Signed-off-by: Sebastian Reichel --- drivers/power/supply/power_supply.h | 17 +++++++++++++++++ include/linux/power_supply.h | 13 ------------- 2 files changed, 17 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/drivers/power/supply/power_supply.h b/drivers/power/supply/power_supply.h index 4c558fb3db72..7434a6f24775 100644 --- a/drivers/power/supply/power_supply.h +++ b/drivers/power/supply/power_supply.h @@ -44,3 +44,20 @@ static inline int power_supply_create_triggers(struct power_supply *psy) static inline void power_supply_remove_triggers(struct power_supply *psy) {} #endif /* CONFIG_LEDS_TRIGGERS */ + +#ifdef CONFIG_POWER_SUPPLY_HWMON + +int power_supply_add_hwmon_sysfs(struct power_supply *psy); +void power_supply_remove_hwmon_sysfs(struct power_supply *psy); + +#else + +static inline int power_supply_add_hwmon_sysfs(struct power_supply *psy) +{ + return 0; +} + +static inline +void power_supply_remove_hwmon_sysfs(struct power_supply *psy) {} + +#endif /* CONFIG_POWER_SUPPLY_HWMON */ diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index c1d6cb7f4c40..b98106e1a90f 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -937,19 +937,6 @@ static inline bool power_supply_is_watt_property(enum power_supply_property psp) return false; } -#ifdef CONFIG_POWER_SUPPLY_HWMON -int power_supply_add_hwmon_sysfs(struct power_supply *psy); -void power_supply_remove_hwmon_sysfs(struct power_supply *psy); -#else -static inline int power_supply_add_hwmon_sysfs(struct power_supply *psy) -{ - return 0; -} - -static inline -void power_supply_remove_hwmon_sysfs(struct power_supply *psy) {} -#endif - #ifdef CONFIG_SYSFS ssize_t power_supply_charge_behaviour_show(struct device *dev, unsigned int available_behaviours, -- cgit v1.2.3 From 22823157d90c4631a951920090686c20c459b36f Mon Sep 17 00:00:00 2001 From: Tomer Maimon Date: Thu, 12 Sep 2024 22:10:37 +0300 Subject: reset: npcm: register npcm8xx clock auxiliary bus device Add NPCM8xx clock controller auxiliary bus device registration. The NPCM8xx clock controller is registered as an aux device because the reset and the clock controller share the same register region. Signed-off-by: Tomer Maimon Tested-by: Benjamin Fair Reviewed-by: Philipp Zabel Link: https://lore.kernel.org/r/20240912191038.981105-3-tmaimon77@gmail.com Signed-off-by: Stephen Boyd --- drivers/reset/Kconfig | 1 + drivers/reset/reset-npcm.c | 78 ++++++++++++++++++++++++++++++++++++- include/soc/nuvoton/clock-npcm8xx.h | 18 +++++++++ 3 files changed, 95 insertions(+), 2 deletions(-) create mode 100644 include/soc/nuvoton/clock-npcm8xx.h (limited to 'include') diff --git a/drivers/reset/Kconfig b/drivers/reset/Kconfig index 5484a65f66b9..ace9c842d7bb 100644 --- a/drivers/reset/Kconfig +++ b/drivers/reset/Kconfig @@ -170,6 +170,7 @@ config RESET_MESON_AUDIO_ARB config RESET_NPCM bool "NPCM BMC Reset Driver" if COMPILE_TEST default ARCH_NPCM + select AUXILIARY_BUS help This enables the reset controller driver for Nuvoton NPCM BMC SoCs. diff --git a/drivers/reset/reset-npcm.c b/drivers/reset/reset-npcm.c index 8935ef95a2d1..4f3b9fc58de0 100644 --- a/drivers/reset/reset-npcm.c +++ b/drivers/reset/reset-npcm.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2019 Nuvoton Technology corporation. +#include #include #include #include @@ -10,11 +11,14 @@ #include #include #include +#include #include #include #include #include +#include + /* NPCM7xx GCR registers */ #define NPCM_MDLR_OFFSET 0x7C #define NPCM7XX_MDLR_USBD0 BIT(9) @@ -89,6 +93,7 @@ struct npcm_rc_data { const struct npcm_reset_info *info; struct regmap *gcr_regmap; u32 sw_reset_number; + struct device *dev; void __iomem *base; spinlock_t lock; }; @@ -372,6 +377,67 @@ static const struct reset_control_ops npcm_rc_ops = { .status = npcm_rc_status, }; +static void npcm_clock_unregister_adev(void *_adev) +{ + struct auxiliary_device *adev = _adev; + + auxiliary_device_delete(adev); + auxiliary_device_uninit(adev); +} + +static void npcm_clock_adev_release(struct device *dev) +{ + struct auxiliary_device *adev = to_auxiliary_dev(dev); + struct npcm_clock_adev *rdev = to_npcm_clock_adev(adev); + + kfree(rdev); +} + +static struct auxiliary_device *npcm_clock_adev_alloc(struct npcm_rc_data *rst_data, char *clk_name) +{ + struct npcm_clock_adev *rdev; + struct auxiliary_device *adev; + int ret; + + rdev = kzalloc(sizeof(*rdev), GFP_KERNEL); + if (!rdev) + return ERR_PTR(-ENOMEM); + + rdev->base = rst_data->base; + + adev = &rdev->adev; + adev->name = clk_name; + adev->dev.parent = rst_data->dev; + adev->dev.release = npcm_clock_adev_release; + adev->id = 555u; + + ret = auxiliary_device_init(adev); + if (ret) { + kfree(rdev); + return ERR_PTR(ret); + } + + return adev; +} + +static int npcm8xx_clock_controller_register(struct npcm_rc_data *rst_data, char *clk_name) +{ + struct auxiliary_device *adev; + int ret; + + adev = npcm_clock_adev_alloc(rst_data, clk_name); + if (IS_ERR(adev)) + return PTR_ERR(adev); + + ret = auxiliary_device_add(adev); + if (ret) { + auxiliary_device_uninit(adev); + return ret; + } + + return devm_add_action_or_reset(rst_data->dev, npcm_clock_unregister_adev, adev); +} + static int npcm_rc_probe(struct platform_device *pdev) { struct npcm_rc_data *rc; @@ -392,6 +458,7 @@ static int npcm_rc_probe(struct platform_device *pdev) rc->rcdev.of_node = pdev->dev.of_node; rc->rcdev.of_reset_n_cells = 2; rc->rcdev.of_xlate = npcm_reset_xlate; + rc->dev = &pdev->dev; ret = devm_reset_controller_register(&pdev->dev, &rc->rcdev); if (ret) { @@ -408,12 +475,19 @@ static int npcm_rc_probe(struct platform_device *pdev) rc->restart_nb.priority = 192, rc->restart_nb.notifier_call = npcm_rc_restart, ret = register_restart_handler(&rc->restart_nb); - if (ret) + if (ret) { dev_warn(&pdev->dev, "failed to register restart handler\n"); + return ret; + } } } - return ret; + switch (rc->info->bmc_id) { + case BMC_NPCM8XX: + return npcm8xx_clock_controller_register(rc, "clk-npcm8xx"); + default: + return 0; + } } static struct platform_driver npcm_rc_driver = { diff --git a/include/soc/nuvoton/clock-npcm8xx.h b/include/soc/nuvoton/clock-npcm8xx.h new file mode 100644 index 000000000000..1d974e89d8a8 --- /dev/null +++ b/include/soc/nuvoton/clock-npcm8xx.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __SOC_NPCM8XX_CLOCK_H +#define __SOC_NPCM8XX_CLOCK_H + +#include +#include + +struct npcm_clock_adev { + void __iomem *base; + struct auxiliary_device adev; +}; + +static inline struct npcm_clock_adev *to_npcm_clock_adev(struct auxiliary_device *_adev) +{ + return container_of(_adev, struct npcm_clock_adev, adev); +} + +#endif -- cgit v1.2.3 From 0784181b44af831a3fa52e1e5ff77c388d699dba Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 26 Sep 2024 16:17:37 +0100 Subject: lockdep: Add lockdep_cleanup_dead_cpu() Add a function to check that an offline CPU has left the tracing infrastructure in a sane state. Commit 9bb69ba4c177 ("ACPI: processor_idle: use raw_safe_halt() in acpi_idle_play_dead()") fixed an issue where the acpi_idle_play_dead() function called safe_halt() instead of raw_safe_halt(), which had the side-effect of setting the hardirqs_enabled flag for the offline CPU. On x86 this triggered warnings from lockdep_assert_irqs_disabled() when the CPU was brought back online again later. These warnings were too early for the exception to be handled correctly, leading to a triple-fault. Add lockdep_cleanup_dead_cpu() to check for this kind of failure mode, print the events leading up to it, and correct it so that the CPU can come online again correctly. Re-introducing the original bug now merely results in this warning instead: [ 61.556652] smpboot: CPU 1 is now offline [ 61.556769] CPU 1 left hardirqs enabled! [ 61.556915] irq event stamp: 128149 [ 61.556965] hardirqs last enabled at (128149): [] acpi_idle_play_dead+0x46/0x70 [ 61.557055] hardirqs last disabled at (128148): [] do_idle+0x90/0xe0 [ 61.557117] softirqs last enabled at (128078): [] __do_softirq+0x31c/0x423 [ 61.557199] softirqs last disabled at (128065): [] __irq_exit_rcu+0x91/0x100 [boqun: Capitalize the title and reword the message a bit] Signed-off-by: David Woodhouse Reviewed-by: Thomas Gleixner Signed-off-by: Boqun Feng Link: https://lore.kernel.org/r/f7bd2b3b999051bb3ef4be34526a9262008285f5.camel@infradead.org --- include/linux/irqflags.h | 6 ++++++ kernel/cpu.c | 1 + kernel/locking/lockdep.c | 24 ++++++++++++++++++++++++ 3 files changed, 31 insertions(+) (limited to 'include') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 3f003d5fde53..57b074e0cfbb 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -18,6 +18,8 @@ #include #include +struct task_struct; + /* Currently lockdep_softirqs_on/off is used only by lockdep */ #ifdef CONFIG_PROVE_LOCKING extern void lockdep_softirqs_on(unsigned long ip); @@ -25,12 +27,16 @@ extern void lockdep_hardirqs_on_prepare(void); extern void lockdep_hardirqs_on(unsigned long ip); extern void lockdep_hardirqs_off(unsigned long ip); + extern void lockdep_cleanup_dead_cpu(unsigned int cpu, + struct task_struct *idle); #else static inline void lockdep_softirqs_on(unsigned long ip) { } static inline void lockdep_softirqs_off(unsigned long ip) { } static inline void lockdep_hardirqs_on_prepare(void) { } static inline void lockdep_hardirqs_on(unsigned long ip) { } static inline void lockdep_hardirqs_off(unsigned long ip) { } + static inline void lockdep_cleanup_dead_cpu(unsigned int cpu, + struct task_struct *idle) {} #endif #ifdef CONFIG_TRACE_IRQFLAGS diff --git a/kernel/cpu.c b/kernel/cpu.c index d293d52a3e00..c4aaf73dec9e 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1338,6 +1338,7 @@ static int takedown_cpu(unsigned int cpu) cpuhp_bp_sync_dead(cpu); + lockdep_cleanup_dead_cpu(cpu, idle_thread_get(cpu)); tick_cleanup_dead_cpu(cpu); /* diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 536bd471557f..6fd4af217e71 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -4586,6 +4586,30 @@ void lockdep_softirqs_off(unsigned long ip) debug_atomic_inc(redundant_softirqs_off); } +/** + * lockdep_cleanup_dead_cpu - Ensure CPU lockdep state is cleanly stopped + * + * @cpu: index of offlined CPU + * @idle: task pointer for offlined CPU's idle thread + * + * Invoked after the CPU is dead. Ensures that the tracing infrastructure + * is left in a suitable state for the CPU to be subsequently brought + * online again. + */ +void lockdep_cleanup_dead_cpu(unsigned int cpu, struct task_struct *idle) +{ + if (unlikely(!debug_locks)) + return; + + if (unlikely(per_cpu(hardirqs_enabled, cpu))) { + pr_warn("CPU %u left hardirqs enabled!", cpu); + if (idle) + print_irqtrace_events(idle); + /* Clean it up for when the CPU comes online again. */ + per_cpu(hardirqs_enabled, cpu) = 0; + } +} + static int mark_usage(struct task_struct *curr, struct held_lock *hlock, int check) { -- cgit v1.2.3 From d7fe143cb115076fed0126ad8cf5ba6c3e575e43 Mon Sep 17 00:00:00 2001 From: Ahmed Ehab Date: Sun, 25 Aug 2024 01:10:30 +0300 Subject: locking/lockdep: Avoid creating new name string literals in lockdep_set_subclass() Syzbot reports a problem that a warning will be triggered while searching a lock class in look_up_lock_class(). The cause of the issue is that a new name is created and used by lockdep_set_subclass() instead of using the existing one. This results in a lock instance has a different name pointer than previous registered one stored in lock class, and WARN_ONCE() is triggered because of that in look_up_lock_class(). To fix this, change lockdep_set_subclass() to use the existing name instead of a new one. Hence, no new name will be created by lockdep_set_subclass(). Hence, the warning is avoided. [boqun: Reword the commit log to state the correct issue] Reported-by: Fixes: de8f5e4f2dc1f ("lockdep: Introduce wait-type checks") Cc: stable@vger.kernel.org Signed-off-by: Ahmed Ehab Signed-off-by: Boqun Feng Link: https://lore.kernel.org/lkml/20240824221031.7751-1-bottaawesome633@gmail.com/ --- include/linux/lockdep.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 217f7abf2cbf..67964dc4db95 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -173,7 +173,7 @@ static inline void lockdep_init_map(struct lockdep_map *lock, const char *name, (lock)->dep_map.lock_type) #define lockdep_set_subclass(lock, sub) \ - lockdep_init_map_type(&(lock)->dep_map, #lock, (lock)->dep_map.key, sub,\ + lockdep_init_map_type(&(lock)->dep_map, (lock)->dep_map.name, (lock)->dep_map.key, sub,\ (lock)->dep_map.wait_type_inner, \ (lock)->dep_map.wait_type_outer, \ (lock)->dep_map.lock_type) -- cgit v1.2.3 From 6eaa83ec229b1e99130456c4f6658430d509c76c Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Thu, 17 Oct 2024 17:11:08 +0300 Subject: PCI: Remove unused PCI_SUBTRACTIVE_DECODE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 2fe2abf896c1 ("PCI: augment bus resource table with a list") added PCI_SUBTRACTIVE_DECODE which is put into the struct pci_bus_resource flags field but is never read. There seems to never have been users for it. Remove both PCI_SUBTRACTIVE_DECODE and the flags field from the struct pci_bus_resource. Link: https://lore.kernel.org/r/20241017141111.44612-1-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron --- arch/s390/pci/pci_bus.c | 2 +- arch/x86/pci/fixup.c | 2 +- drivers/pci/bus.c | 4 +--- drivers/pci/probe.c | 5 ++--- include/linux/pci.h | 12 +----------- 5 files changed, 6 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c index daa5d7450c7d..5630af5deb8b 100644 --- a/arch/s390/pci/pci_bus.c +++ b/arch/s390/pci/pci_bus.c @@ -53,7 +53,7 @@ static int zpci_bus_prepare_device(struct zpci_dev *zdev) zpci_setup_bus_resources(zdev); for (i = 0; i < PCI_STD_NUM_BARS; i++) { if (zdev->bars[i].res) - pci_bus_add_resource(zdev->zbus->bus, zdev->bars[i].res, 0); + pci_bus_add_resource(zdev->zbus->bus, zdev->bars[i].res); } } diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 98a9bb92d75c..0681ecfe3430 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -757,7 +757,7 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev) dev_info(&dev->dev, "adding root bus resource %pR (tainting kernel)\n", res); add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); - pci_bus_add_resource(dev->bus, res, 0); + pci_bus_add_resource(dev->bus, res); } base = ((res->start >> 8) & AMD_141b_MMIO_BASE_MMIOBASE_MASK) | diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 55c853686051..9cf6d0f3ab2b 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -46,8 +46,7 @@ void pci_free_resource_list(struct list_head *resources) } EXPORT_SYMBOL(pci_free_resource_list); -void pci_bus_add_resource(struct pci_bus *bus, struct resource *res, - unsigned int flags) +void pci_bus_add_resource(struct pci_bus *bus, struct resource *res) { struct pci_bus_resource *bus_res; @@ -58,7 +57,6 @@ void pci_bus_add_resource(struct pci_bus *bus, struct resource *res, } bus_res->res = res; - bus_res->flags = flags; list_add_tail(&bus_res->list, &bus->resources); } diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 4f68414c3086..4243b1e6ece2 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -546,8 +546,7 @@ void pci_read_bridge_bases(struct pci_bus *child) if (dev->transparent) { pci_bus_for_each_resource(child->parent, res) { if (res && res->flags) { - pci_bus_add_resource(child, res, - PCI_SUBTRACTIVE_DECODE); + pci_bus_add_resource(child, res); pci_info(dev, " bridge window %pR (subtractive decode)\n", res); } @@ -1032,7 +1031,7 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge) if (res->flags & IORESOURCE_BUS) pci_bus_insert_busn_res(bus, bus->number, res->end); else - pci_bus_add_resource(bus, res, 0); + pci_bus_add_resource(bus, res); if (offset) { if (resource_type(res) == IORESOURCE_IO) diff --git a/include/linux/pci.h b/include/linux/pci.h index 573b4c4c2be6..6a9cf80d0d4b 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -633,18 +633,9 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge); * Use pci_bus_for_each_resource() to iterate through all the resources. */ -/* - * PCI_SUBTRACTIVE_DECODE means the bridge forwards the window implicitly - * and there's no way to program the bridge with the details of the window. - * This does not apply to ACPI _CRS windows, even with the _DEC subtractive- - * decode bit set, because they are explicit and can be programmed with _SRS. - */ -#define PCI_SUBTRACTIVE_DECODE 0x1 - struct pci_bus_resource { struct list_head list; struct resource *res; - unsigned int flags; }; #define PCI_REGION_FLAG_MASK 0x0fU /* These bits of resource flags tell us the PCI region flags */ @@ -1498,8 +1489,7 @@ void pci_add_resource(struct list_head *resources, struct resource *res); void pci_add_resource_offset(struct list_head *resources, struct resource *res, resource_size_t offset); void pci_free_resource_list(struct list_head *resources); -void pci_bus_add_resource(struct pci_bus *bus, struct resource *res, - unsigned int flags); +void pci_bus_add_resource(struct pci_bus *bus, struct resource *res); struct resource *pci_bus_resource_n(const struct pci_bus *bus, int n); void pci_bus_remove_resources(struct pci_bus *bus); void pci_bus_remove_resource(struct pci_bus *bus, struct resource *res); -- cgit v1.2.3 From 469c9cb941480718db52876bbdb95a7a79b34889 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Thu, 17 Oct 2024 17:11:09 +0300 Subject: PCI: Move struct pci_bus_resource into bus.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The struct pci_bus_resource is only used in bus.c, so move it there. Link: https://lore.kernel.org/r/20241017141111.44612-2-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron --- drivers/pci/bus.c | 12 ++++++++++++ include/linux/pci.h | 12 ------------ 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 9cf6d0f3ab2b..e0a2441be6d3 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -18,6 +18,18 @@ #include "pci.h" +/* + * The first PCI_BRIDGE_RESOURCE_NUM PCI bus resources (those that correspond + * to P2P or CardBus bridge windows) go in a table. Additional ones (for + * buses below host bridges or subtractive decode bridges) go in the list. + * Use pci_bus_for_each_resource() to iterate through all the resources. + */ + +struct pci_bus_resource { + struct list_head list; + struct resource *res; +}; + void pci_add_resource_offset(struct list_head *resources, struct resource *res, resource_size_t offset) { diff --git a/include/linux/pci.h b/include/linux/pci.h index 6a9cf80d0d4b..5a9d849b28ef 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -626,18 +626,6 @@ void pci_set_host_bridge_release(struct pci_host_bridge *bridge, int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge); -/* - * The first PCI_BRIDGE_RESOURCE_NUM PCI bus resources (those that correspond - * to P2P or CardBus bridge windows) go in a table. Additional ones (for - * buses below host bridges or subtractive decode bridges) go in the list. - * Use pci_bus_for_each_resource() to iterate through all the resources. - */ - -struct pci_bus_resource { - struct list_head list; - struct resource *res; -}; - #define PCI_REGION_FLAG_MASK 0x0fU /* These bits of resource flags tell us the PCI region flags */ struct pci_bus { -- cgit v1.2.3 From 08ef26ea9ab315b895d57f8fbad41e02ff345bb9 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 7 Oct 2024 16:23:58 +0200 Subject: fs: add file_ref As atomic_inc_not_zero() is implemented with a try_cmpxchg() loop it has O(N^2) behaviour under contention with N concurrent operations and it is in a hot path in __fget_files_rcu(). The rcuref infrastructures remedies this problem by using an unconditional increment relying on safe- and dead zones to make this work and requiring rcu protection for the data structure in question. This not just scales better it also introduces overflow protection. However, in contrast to generic rcuref, files require a memory barrier and thus cannot rely on *_relaxed() atomic operations and also require to be built on atomic_long_t as having massive amounts of reference isn't unheard of even if it is just an attack. As suggested by Linus, add a file specific variant instead of making this a generic library. Files are SLAB_TYPESAFE_BY_RCU and thus don't have "regular" rcu protection. In short, freeing of files isn't delayed until a grace period has elapsed. Instead, they are freed immediately and thus can be reused (multiple times) within the same grace period. So when picking a file from the file descriptor table via its file descriptor number it is thus possible to see an elevated reference count on file->f_count even though the file has already been recycled possibly multiple times by another task. To guard against this the vfs will pick the file from the file descriptor table twice. Once before the refcount increment and once after to compare the pointers (grossly simplified). If they match then the file is still valid. If not the caller needs to fput() it. The unconditional increment makes the following race possible as illustrated by rcuref: > Deconstruction race > =================== > > The release operation must be protected by prohibiting a grace period in > order to prevent a possible use after free: > > T1 T2 > put() get() > // ref->refcnt = ONEREF > if (!atomic_add_negative(-1, &ref->refcnt)) > return false; <- Not taken > > // ref->refcnt == NOREF > --> preemption > // Elevates ref->refcnt to ONEREF > if (!atomic_add_negative(1, &ref->refcnt)) > return true; <- taken > > if (put(&p->ref)) { <-- Succeeds > remove_pointer(p); > kfree_rcu(p, rcu); > } > > RCU grace period ends, object is freed > > atomic_cmpxchg(&ref->refcnt, NOREF, DEAD); <- UAF > > [...] it prevents the grace period which keeps the object alive until > all put() operations complete. Having files by SLAB_TYPESAFE_BY_RCU shouldn't cause any problems for this deconstruction race. Afaict, the only interesting case would be someone freeing the file and someone immediately recycling it within the same grace period and reinitializing file->f_count to ONEREF while a concurrent fput() is doing atomic_cmpxchg(&ref->refcnt, NOREF, DEAD) as in the race above. But this is safe from SLAB_TYPESAFE_BY_RCU's perspective and it should be safe from rcuref's perspective. T1 T2 T3 fput() fget() // f_count->refcnt = ONEREF if (!atomic_add_negative(-1, &f_count->refcnt)) return false; <- Not taken // f_count->refcnt == NOREF --> preemption // Elevates f_count->refcnt to ONEREF if (!atomic_add_negative(1, &f_count->refcnt)) return true; <- taken if (put(&f_count)) { <-- Succeeds remove_pointer(p); /* * Cache is SLAB_TYPESAFE_BY_RCU * so this is freed without a grace period. */ kmem_cache_free(p); } kmem_cache_alloc() init_file() { // Sets f_count->refcnt to ONEREF rcuref_long_init(&f->f_count, 1); } Object has been reused within the same grace period via kmem_cache_alloc()'s SLAB_TYPESAFE_BY_RCU. /* * With SLAB_TYPESAFE_BY_RCU this would be a safe UAF access and * it would work correctly because the atomic_cmpxchg() * will fail because the refcount has been reset to ONEREF by T3. */ atomic_cmpxchg(&ref->refcnt, NOREF, DEAD); <- UAF However, there are other cases to consider: (1) Benign race due to multiple atomic_long_read() CPU1 CPU2 file_ref_put() // last reference // => count goes negative/FILE_REF_NOREF atomic_long_add_negative_release(-1, &ref->refcnt) -> __file_ref_put() file_ref_get() // goes back from negative/FILE_REF_NOREF to 0 // and file_ref_get() succeeds atomic_long_add_negative(1, &ref->refcnt) // This is immediately followed by file_ref_put() // managing to set FILE_REF_DEAD file_ref_put() // __file_ref_put() continues and sees // cnt > FILE_REF_RELEASED // and splats with // "imbalanced put on file reference count" cnt = atomic_long_read(&ref->refcnt); The race however is benign and the problem is the atomic_long_read(). Instead of performing a separate read this uses atomic_long_dec_return() and pass the value to __file_ref_put(). Thanks to Linus for pointing out that braino. (2) SLAB_TYPESAFE_BY_RCU may cause recycled files to be marked dead When a file is recycled the following race exists: CPU1 CPU2 // @file is already dead and thus // cnt >= FILE_REF_RELEASED. file_ref_get(file) atomic_long_add_negative(1, &ref->refcnt) // We thus call into __file_ref_get() -> __file_ref_get() // which sees cnt >= FILE_REF_RELEASED cnt = atomic_long_read(&ref->refcnt); // In the meantime @file gets freed kmem_cache_free() // and is immediately recycled file = kmem_cache_zalloc() // and the reference count is reinitialized // and the file alive again in someone // else's file descriptor table file_ref_init(&ref->refcnt, 1); // the __file_ref_get() slowpath now continues // and as it saw earlier that cnt >= FILE_REF_RELEASED // it wants to ensure that we're staying in the middle // of the deadzone and unconditionally sets // FILE_REF_DEAD. // This marks @file dead for CPU2... atomic_long_set(&ref->refcnt, FILE_REF_DEAD); // Caller issues a close() system call to close @file close(fd) file = file_close_fd_locked() filp_flush() // The caller sees that cnt >= FILE_REF_RELEASED // and warns the first time... CHECK_DATA_CORRUPTION(file_count(file) == 0) // and then splats a second time because // __file_ref_put() sees cnt >= FILE_REF_RELEASED file_ref_put(&ref->refcnt); -> __file_ref_put() My initial inclination was to replace the unconditional atomic_long_set() with an atomic_long_try_cmpxchg() but Linus pointed out that: > I think we should just make file_ref_get() do a simple > > return !atomic_long_add_negative(1, &ref->refcnt)); > > and nothing else. Yes, multiple CPU's can race, and you can increment > more than once, but the gap - even on 32-bit - between DEAD and > becoming close to REF_RELEASED is so big that we simply don't care. > That's the point of having a gap. I've been testing this with will-it-scale using fstat() on a machine that Jens gave me access (thank you very much!): processor : 511 vendor_id : AuthenticAMD cpu family : 25 model : 160 model name : AMD EPYC 9754 128-Core Processor and I consistently get a 3-5% improvement on 256+ threads. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202410151043.5d224a27-oliver.sang@intel.com Closes: https://lore.kernel.org/all/202410151611.f4cd71f2-oliver.sang@intel.com Link: https://lore.kernel.org/r/20241007-brauner-file-rcuref-v2-2-387e24dc9163@kernel.org Signed-off-by: Christian Brauner --- fs/file.c | 63 +++++++++++++++++ include/linux/file_ref.h | 177 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 240 insertions(+) create mode 100644 include/linux/file_ref.h (limited to 'include') diff --git a/fs/file.c b/fs/file.c index eb093e736972..ab15e2a3d280 100644 --- a/fs/file.c +++ b/fs/file.c @@ -20,10 +20,73 @@ #include #include #include +#include #include #include "internal.h" +/** + * __file_ref_put - Slowpath of file_ref_put() + * @ref: Pointer to the reference count + * @cnt: Current reference count + * + * Invoked when the reference count is outside of the valid zone. + * + * Return: + * True if this was the last reference with no future references + * possible. This signals the caller that it can safely schedule the + * object, which is protected by the reference counter, for + * deconstruction. + * + * False if there are still active references or the put() raced + * with a concurrent get()/put() pair. Caller is not allowed to + * deconstruct the protected object. + */ +bool __file_ref_put(file_ref_t *ref, unsigned long cnt) +{ + /* Did this drop the last reference? */ + if (likely(cnt == FILE_REF_NOREF)) { + /* + * Carefully try to set the reference count to FILE_REF_DEAD. + * + * This can fail if a concurrent get() operation has + * elevated it again or the corresponding put() even marked + * it dead already. Both are valid situations and do not + * require a retry. If this fails the caller is not + * allowed to deconstruct the object. + */ + if (!atomic_long_try_cmpxchg_release(&ref->refcnt, &cnt, FILE_REF_DEAD)) + return false; + + /* + * The caller can safely schedule the object for + * deconstruction. Provide acquire ordering. + */ + smp_acquire__after_ctrl_dep(); + return true; + } + + /* + * If the reference count was already in the dead zone, then this + * put() operation is imbalanced. Warn, put the reference count back to + * DEAD and tell the caller to not deconstruct the object. + */ + if (WARN_ONCE(cnt >= FILE_REF_RELEASED, "imbalanced put on file reference count")) { + atomic_long_set(&ref->refcnt, FILE_REF_DEAD); + return false; + } + + /* + * This is a put() operation on a saturated refcount. Restore the + * mean saturation value and tell the caller to not deconstruct the + * object. + */ + if (cnt > FILE_REF_MAXREF) + atomic_long_set(&ref->refcnt, FILE_REF_SATURATED); + return false; +} +EXPORT_SYMBOL_GPL(__file_ref_put); + unsigned int sysctl_nr_open __read_mostly = 1024*1024; unsigned int sysctl_nr_open_min = BITS_PER_LONG; /* our min() is unusable in constant expressions ;-/ */ diff --git a/include/linux/file_ref.h b/include/linux/file_ref.h new file mode 100644 index 000000000000..9b3a8d9b17ab --- /dev/null +++ b/include/linux/file_ref.h @@ -0,0 +1,177 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _LINUX_FILE_REF_H +#define _LINUX_FILE_REF_H + +#include +#include +#include + +/* + * file_ref is a reference count implementation specifically for use by + * files. It takes inspiration from rcuref but differs in key aspects + * such as support for SLAB_TYPESAFE_BY_RCU type caches. + * + * FILE_REF_ONEREF FILE_REF_MAXREF + * 0x0000000000000000UL 0x7FFFFFFFFFFFFFFFUL + * <-------------------valid -------------------> + * + * FILE_REF_SATURATED + * 0x8000000000000000UL 0xA000000000000000UL 0xBFFFFFFFFFFFFFFFUL + * <-----------------------saturation zone----------------------> + * + * FILE_REF_RELEASED FILE_REF_DEAD + * 0xC000000000000000UL 0xE000000000000000UL + * <-------------------dead zone-------------------> + * + * FILE_REF_NOREF + * 0xFFFFFFFFFFFFFFFFUL + */ + +#ifdef CONFIG_64BIT +#define FILE_REF_ONEREF 0x0000000000000000UL +#define FILE_REF_MAXREF 0x7FFFFFFFFFFFFFFFUL +#define FILE_REF_SATURATED 0xA000000000000000UL +#define FILE_REF_RELEASED 0xC000000000000000UL +#define FILE_REF_DEAD 0xE000000000000000UL +#define FILE_REF_NOREF 0xFFFFFFFFFFFFFFFFUL +#else +#define FILE_REF_ONEREF 0x00000000U +#define FILE_REF_MAXREF 0x7FFFFFFFU +#define FILE_REF_SATURATED 0xA0000000U +#define FILE_REF_RELEASED 0xC0000000U +#define FILE_REF_DEAD 0xE0000000U +#define FILE_REF_NOREF 0xFFFFFFFFU +#endif + +typedef struct { +#ifdef CONFIG_64BIT + atomic64_t refcnt; +#else + atomic_t refcnt; +#endif +} file_ref_t; + +/** + * file_ref_init - Initialize a file reference count + * @ref: Pointer to the reference count + * @cnt: The initial reference count typically '1' + */ +static inline void file_ref_init(file_ref_t *ref, unsigned long cnt) +{ + atomic_long_set(&ref->refcnt, cnt - 1); +} + +bool __file_ref_put(file_ref_t *ref, unsigned long cnt); + +/** + * file_ref_get - Acquire one reference on a file + * @ref: Pointer to the reference count + * + * Similar to atomic_inc_not_zero() but saturates at FILE_REF_MAXREF. + * + * Provides full memory ordering. + * + * Return: False if the attempt to acquire a reference failed. This happens + * when the last reference has been put already. True if a reference + * was successfully acquired + */ +static __always_inline __must_check bool file_ref_get(file_ref_t *ref) +{ + /* + * Unconditionally increase the reference count with full + * ordering. The saturation and dead zones provide enough + * tolerance for this. + * + * If this indicates negative the file in question the fail can + * be freed and immediately reused due to SLAB_TYPSAFE_BY_RCU. + * Hence, unconditionally altering the file reference count to + * e.g., reset the file reference count back to the middle of + * the deadzone risk end up marking someone else's file as dead + * behind their back. + * + * It would be possible to do a careful: + * + * cnt = atomic_long_inc_return(); + * if (likely(cnt >= 0)) + * return true; + * + * and then something like: + * + * if (cnt >= FILE_REF_RELEASE) + * atomic_long_try_cmpxchg(&ref->refcnt, &cnt, FILE_REF_DEAD), + * + * to set the value back to the middle of the deadzone. But it's + * practically impossible to go from FILE_REF_DEAD to + * FILE_REF_ONEREF. It would need 2305843009213693952/2^61 + * file_ref_get()s to resurrect such a dead file. + */ + return !atomic_long_add_negative(1, &ref->refcnt); +} + +/** + * file_ref_inc - Acquire one reference on a file + * @ref: Pointer to the reference count + * + * Acquire an additional reference on a file. Warns if the caller didn't + * already hold a reference. + */ +static __always_inline void file_ref_inc(file_ref_t *ref) +{ + long prior = atomic_long_fetch_inc_relaxed(&ref->refcnt); + WARN_ONCE(prior < 0, "file_ref_inc() on a released file reference"); +} + +/** + * file_ref_put -- Release a file reference + * @ref: Pointer to the reference count + * + * Provides release memory ordering, such that prior loads and stores + * are done before, and provides an acquire ordering on success such + * that free() must come after. + * + * Return: True if this was the last reference with no future references + * possible. This signals the caller that it can safely release + * the object which is protected by the reference counter. + * False if there are still active references or the put() raced + * with a concurrent get()/put() pair. Caller is not allowed to + * release the protected object. + */ +static __always_inline __must_check bool file_ref_put(file_ref_t *ref) +{ + long cnt; + + /* + * While files are SLAB_TYPESAFE_BY_RCU and thus file_ref_put() + * calls don't risk UAFs when a file is recyclyed, it is still + * vulnerable to UAFs caused by freeing the whole slab page once + * it becomes unused. Prevent file_ref_put() from being + * preempted protects against this. + */ + guard(preempt)(); + /* + * Unconditionally decrease the reference count. The saturation + * and dead zones provide enough tolerance for this. If this + * fails then we need to handle the last reference drop and + * cases inside the saturation and dead zones. + */ + cnt = atomic_long_dec_return(&ref->refcnt); + if (cnt >= 0) + return false; + return __file_ref_put(ref, cnt); +} + +/** + * file_ref_read - Read the number of file references + * @ref: Pointer to the reference count + * + * Return: The number of held references (0 ... N) + */ +static inline unsigned long file_ref_read(file_ref_t *ref) +{ + unsigned long c = atomic_long_read(&ref->refcnt); + + /* Return 0 if within the DEAD zone. */ + return c >= FILE_REF_RELEASED ? 0 : c + 1; +} + +#endif -- cgit v1.2.3 From 9a8dbdadae509e5717ff6e5aa572ca0974d2101d Mon Sep 17 00:00:00 2001 From: John Garry Date: Sat, 19 Oct 2024 12:51:06 +0000 Subject: block/fs: Pass an iocb to generic_atomic_write_valid() Darrick and Hannes both thought it better that generic_atomic_write_valid() should be passed a struct iocb, and not just the member of that struct which is referenced; see [0] and [1]. I think that makes a more generic and clean API, so make that change. [0] https://lore.kernel.org/linux-block/680ce641-729b-4150-b875-531a98657682@suse.de/ [1] https://lore.kernel.org/linux-xfs/20240620212401.GA3058325@frogsfrogsfrogs/ Fixes: c34fc6f26ab8 ("fs: Initial atomic write support") Suggested-by: Darrick J. Wong Suggested-by: Hannes Reinecke Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: John Garry Link: https://lore.kernel.org/r/20241019125113.369994-2-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- block/fops.c | 8 ++++---- fs/read_write.c | 4 ++-- include/linux/fs.h | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/block/fops.c b/block/fops.c index e696ae53bf1e..968b47b615c4 100644 --- a/block/fops.c +++ b/block/fops.c @@ -35,13 +35,13 @@ static blk_opf_t dio_bio_write_op(struct kiocb *iocb) return opf; } -static bool blkdev_dio_invalid(struct block_device *bdev, loff_t pos, +static bool blkdev_dio_invalid(struct block_device *bdev, struct kiocb *iocb, struct iov_iter *iter, bool is_atomic) { - if (is_atomic && !generic_atomic_write_valid(iter, pos)) + if (is_atomic && !generic_atomic_write_valid(iocb, iter)) return true; - return pos & (bdev_logical_block_size(bdev) - 1) || + return iocb->ki_pos & (bdev_logical_block_size(bdev) - 1) || !bdev_iter_is_aligned(bdev, iter); } @@ -374,7 +374,7 @@ static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) if (!iov_iter_count(iter)) return 0; - if (blkdev_dio_invalid(bdev, iocb->ki_pos, iter, is_atomic)) + if (blkdev_dio_invalid(bdev, iocb, iter, is_atomic)) return -EINVAL; nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1); diff --git a/fs/read_write.c b/fs/read_write.c index 64dc24afdb3a..2c3263530828 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1830,7 +1830,7 @@ int generic_file_rw_checks(struct file *file_in, struct file *file_out) return 0; } -bool generic_atomic_write_valid(struct iov_iter *iter, loff_t pos) +bool generic_atomic_write_valid(struct kiocb *iocb, struct iov_iter *iter) { size_t len = iov_iter_count(iter); @@ -1840,7 +1840,7 @@ bool generic_atomic_write_valid(struct iov_iter *iter, loff_t pos) if (!is_power_of_2(len)) return false; - if (!IS_ALIGNED(pos, len)) + if (!IS_ALIGNED(iocb->ki_pos, len)) return false; return true; diff --git a/include/linux/fs.h b/include/linux/fs.h index e3c603d01337..fbfa032d1d90 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3721,6 +3721,6 @@ static inline bool vfs_empty_path(int dfd, const char __user *path) return !c; } -bool generic_atomic_write_valid(struct iov_iter *iter, loff_t pos); +bool generic_atomic_write_valid(struct kiocb *iocb, struct iov_iter *iter); #endif /* _LINUX_FS_H */ -- cgit v1.2.3 From c3be7ebbbce5201e151f17e28a6c807602f369c9 Mon Sep 17 00:00:00 2001 From: John Garry Date: Sat, 19 Oct 2024 12:51:07 +0000 Subject: fs/block: Check for IOCB_DIRECT in generic_atomic_write_valid() Currently FMODE_CAN_ATOMIC_WRITE is set if the bdev can atomic write and the file is open for direct IO. This does not work if the file is not opened for direct IO, yet fcntl(O_DIRECT) is used on the fd later. Change to check for direct IO on a per-IO basis in generic_atomic_write_valid(). Since we want to report -EOPNOTSUPP for non-direct IO for an atomic write, change to return an error code. Relocate the block fops atomic write checks to the common write path, as to catch non-direct IO. Fixes: c34fc6f26ab8 ("fs: Initial atomic write support") Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: John Garry Link: https://lore.kernel.org/r/20241019125113.369994-3-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- block/fops.c | 18 ++++++++++-------- fs/read_write.c | 13 ++++++++----- include/linux/fs.h | 2 +- 3 files changed, 19 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/block/fops.c b/block/fops.c index 968b47b615c4..2d01c9007681 100644 --- a/block/fops.c +++ b/block/fops.c @@ -36,11 +36,8 @@ static blk_opf_t dio_bio_write_op(struct kiocb *iocb) } static bool blkdev_dio_invalid(struct block_device *bdev, struct kiocb *iocb, - struct iov_iter *iter, bool is_atomic) + struct iov_iter *iter) { - if (is_atomic && !generic_atomic_write_valid(iocb, iter)) - return true; - return iocb->ki_pos & (bdev_logical_block_size(bdev) - 1) || !bdev_iter_is_aligned(bdev, iter); } @@ -368,13 +365,12 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb, static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host); - bool is_atomic = iocb->ki_flags & IOCB_ATOMIC; unsigned int nr_pages; if (!iov_iter_count(iter)) return 0; - if (blkdev_dio_invalid(bdev, iocb, iter, is_atomic)) + if (blkdev_dio_invalid(bdev, iocb, iter)) return -EINVAL; nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1); @@ -383,7 +379,7 @@ static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) return __blkdev_direct_IO_simple(iocb, iter, bdev, nr_pages); return __blkdev_direct_IO_async(iocb, iter, bdev, nr_pages); - } else if (is_atomic) { + } else if (iocb->ki_flags & IOCB_ATOMIC) { return -EINVAL; } return __blkdev_direct_IO(iocb, iter, bdev, bio_max_segs(nr_pages)); @@ -625,7 +621,7 @@ static int blkdev_open(struct inode *inode, struct file *filp) if (!bdev) return -ENXIO; - if (bdev_can_atomic_write(bdev) && filp->f_flags & O_DIRECT) + if (bdev_can_atomic_write(bdev)) filp->f_mode |= FMODE_CAN_ATOMIC_WRITE; ret = bdev_open(bdev, mode, filp->private_data, NULL, filp); @@ -700,6 +696,12 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT) return -EOPNOTSUPP; + if (iocb->ki_flags & IOCB_ATOMIC) { + ret = generic_atomic_write_valid(iocb, from); + if (ret) + return ret; + } + size -= iocb->ki_pos; if (iov_iter_count(from) > size) { shorted = iov_iter_count(from) - size; diff --git a/fs/read_write.c b/fs/read_write.c index 2c3263530828..befec0b5c537 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1830,18 +1830,21 @@ int generic_file_rw_checks(struct file *file_in, struct file *file_out) return 0; } -bool generic_atomic_write_valid(struct kiocb *iocb, struct iov_iter *iter) +int generic_atomic_write_valid(struct kiocb *iocb, struct iov_iter *iter) { size_t len = iov_iter_count(iter); if (!iter_is_ubuf(iter)) - return false; + return -EINVAL; if (!is_power_of_2(len)) - return false; + return -EINVAL; if (!IS_ALIGNED(iocb->ki_pos, len)) - return false; + return -EINVAL; - return true; + if (!(iocb->ki_flags & IOCB_DIRECT)) + return -EOPNOTSUPP; + + return 0; } diff --git a/include/linux/fs.h b/include/linux/fs.h index fbfa032d1d90..ba47fb283730 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3721,6 +3721,6 @@ static inline bool vfs_empty_path(int dfd, const char __user *path) return !c; } -bool generic_atomic_write_valid(struct kiocb *iocb, struct iov_iter *iter); +int generic_atomic_write_valid(struct kiocb *iocb, struct iov_iter *iter); #endif /* _LINUX_FS_H */ -- cgit v1.2.3 From 1eadb157947163ca72ba8963b915fdc099ce6cca Mon Sep 17 00:00:00 2001 From: John Garry Date: Sat, 19 Oct 2024 12:51:08 +0000 Subject: block: Add bdev atomic write limits helpers Add helpers to get atomic write limits for a bdev, so that we don't access request_queue helpers outside the block layer. We check if the bdev can actually atomic write in these helpers, so we can avoid users missing using this check. Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: John Garry Link: https://lore.kernel.org/r/20241019125113.369994-4-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 50c3b959da28..c2cc3c146d74 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1674,6 +1674,22 @@ static inline bool bdev_can_atomic_write(struct block_device *bdev) return true; } +static inline unsigned int +bdev_atomic_write_unit_min_bytes(struct block_device *bdev) +{ + if (!bdev_can_atomic_write(bdev)) + return 0; + return queue_atomic_write_unit_min_bytes(bdev_get_queue(bdev)); +} + +static inline unsigned int +bdev_atomic_write_unit_max_bytes(struct block_device *bdev) +{ + if (!bdev_can_atomic_write(bdev)) + return 0; + return queue_atomic_write_unit_max_bytes(bdev_get_queue(bdev)); +} + #define DEFINE_IO_COMP_BATCH(name) struct io_comp_batch name = { } #endif /* _LINUX_BLKDEV_H */ -- cgit v1.2.3 From e896474fe4851ffc4dd860c92daa906783090346 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 7 Oct 2024 23:08:34 -0400 Subject: getname_maybe_null() - the third variant of pathname copy-in Semantics used by statx(2) (and later *xattrat(2)): without AT_EMPTY_PATH it's standard getname() (i.e. ERR_PTR(-ENOENT) on empty string, ERR_PTR(-EFAULT) on NULL), with AT_EMPTY_PATH both empty string and NULL are accepted. Calling conventions: getname_maybe_null(user_pointer, flags) returns * pointer to struct filename when non-empty string had been successfully read * ERR_PTR(...) on error * NULL if an empty string or NULL pointer had been given with AT_EMPTY_PATH in the flags argument. It tries to avoid allocation in the last case; it's not always able to do so, in which case the temporary struct filename instance is freed and NULL returned anyway. Fast path is inlined. Signed-off-by: Al Viro --- fs/namei.c | 30 +++++++++++++++++++++++------- fs/stat.c | 28 ++++------------------------ include/linux/fs.h | 10 ++++++++++ 3 files changed, 37 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/fs/namei.c b/fs/namei.c index aaf3cd6c802f..2bfe476c3bd0 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -211,22 +211,38 @@ getname_flags(const char __user *filename, int flags) return result; } -struct filename * -getname_uflags(const char __user *filename, int uflags) +struct filename *getname_uflags(const char __user *filename, int uflags) { int flags = (uflags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0; return getname_flags(filename, flags); } -struct filename * -getname(const char __user * filename) +struct filename *getname(const char __user * filename) { return getname_flags(filename, 0); } -struct filename * -getname_kernel(const char * filename) +struct filename *__getname_maybe_null(const char __user *pathname) +{ + struct filename *name; + char c; + + /* try to save on allocations; loss on um, though */ + if (get_user(c, pathname)) + return ERR_PTR(-EFAULT); + if (!c) + return NULL; + + name = getname_flags(pathname, LOOKUP_EMPTY); + if (!IS_ERR(name) && !(name->name[0])) { + putname(name); + name = NULL; + } + return name; +} + +struct filename *getname_kernel(const char * filename) { struct filename *result; int len = strlen(filename) + 1; @@ -264,7 +280,7 @@ EXPORT_SYMBOL(getname_kernel); void putname(struct filename *name) { - if (IS_ERR(name)) + if (IS_ERR_OR_NULL(name)) return; if (WARN_ON_ONCE(!atomic_read(&name->refcnt))) diff --git a/fs/stat.c b/fs/stat.c index 41e598376d7e..b74831dc7ae6 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -326,18 +326,11 @@ int vfs_fstatat(int dfd, const char __user *filename, { int ret; int statx_flags = flags | AT_NO_AUTOMOUNT; - struct filename *name; + struct filename *name = getname_maybe_null(filename, flags); - /* - * Work around glibc turning fstat() into fstatat(AT_EMPTY_PATH) - * - * If AT_EMPTY_PATH is set, we expect the common case to be that - * empty path, and avoid doing all the extra pathname work. - */ - if (flags == AT_EMPTY_PATH && vfs_empty_path(dfd, filename)) + if (!name && dfd >= 0) return vfs_fstat(dfd, stat); - name = getname_flags(filename, getname_statx_lookup_flags(statx_flags)); ret = vfs_statx(dfd, name, statx_flags, stat, STATX_BASIC_STATS); putname(name); @@ -774,24 +767,11 @@ SYSCALL_DEFINE5(statx, struct statx __user *, buffer) { int ret; - unsigned lflags; - struct filename *name; + struct filename *name = getname_maybe_null(filename, flags); - /* - * Short-circuit handling of NULL and "" paths. - * - * For a NULL path we require and accept only the AT_EMPTY_PATH flag - * (possibly |'d with AT_STATX flags). - * - * However, glibc on 32-bit architectures implements fstatat as statx - * with the "" pathname and AT_NO_AUTOMOUNT | AT_EMPTY_PATH flags. - * Supporting this results in the uglification below. - */ - lflags = flags & ~(AT_NO_AUTOMOUNT | AT_STATX_SYNC_TYPE); - if (lflags == AT_EMPTY_PATH && vfs_empty_path(dfd, filename)) + if (!name && dfd >= 0) return do_statx_fd(dfd, flags & ~AT_NO_AUTOMOUNT, mask, buffer); - name = getname_flags(filename, getname_statx_lookup_flags(flags)); ret = do_statx(dfd, name, flags, mask, buffer); putname(name); diff --git a/include/linux/fs.h b/include/linux/fs.h index e3c603d01337..403258ac2ea2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2766,6 +2766,16 @@ extern struct filename *getname_flags(const char __user *, int); extern struct filename *getname_uflags(const char __user *, int); extern struct filename *getname(const char __user *); extern struct filename *getname_kernel(const char *); +extern struct filename *__getname_maybe_null(const char __user *); +static inline struct filename *getname_maybe_null(const char __user *name, int flags) +{ + if (!(flags & AT_EMPTY_PATH)) + return getname(name); + + if (!name) + return NULL; + return __getname_maybe_null(name); +} extern void putname(struct filename *name); extern int finish_open(struct file *file, struct dentry *dentry, -- cgit v1.2.3 From af264e5989055ac33f413c4c80874345cda0cc97 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 18 Oct 2024 19:05:57 +0200 Subject: mtd: spinand: Constify struct nand_ecc_engine_ops 'struct nand_ecc_engine_ops' are not modified in these drivers. Constifying this structure moves some data to a read-only section, so increases overall security, especially when the structure holds some function pointers. Update the prototype of mxic_ecc_get_pipelined_ops() accordingly. On a x86_64, with allmodconfig, as an example: Before: ====== text data bss dec hex filename 16709 1374 16 18099 46b3 drivers/mtd/nand/ecc-mxic.o After: ===== text data bss dec hex filename 16789 1294 16 18099 46b3 drivers/mtd/nand/ecc-mxic.o Signed-off-by: Christophe JAILLET Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/72597e9de2320a4109be2112e696399592edacd4.1729271136.git.christophe.jaillet@wanadoo.fr --- drivers/mtd/nand/ecc-mxic.c | 6 +++--- drivers/mtd/nand/ecc-sw-bch.c | 2 +- drivers/mtd/nand/ecc-sw-hamming.c | 2 +- drivers/mtd/nand/spi/core.c | 2 +- drivers/spi/spi-mtk-snfi.c | 2 +- drivers/spi/spi-mxic.c | 10 +++++----- include/linux/mtd/nand-ecc-mxic.h | 4 ++-- include/linux/mtd/nand.h | 2 +- 8 files changed, 15 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/drivers/mtd/nand/ecc-mxic.c b/drivers/mtd/nand/ecc-mxic.c index 47e10945b8d2..86895e09328f 100644 --- a/drivers/mtd/nand/ecc-mxic.c +++ b/drivers/mtd/nand/ecc-mxic.c @@ -723,21 +723,21 @@ static int mxic_ecc_finish_io_req_pipelined(struct nand_device *nand, return ret; } -static struct nand_ecc_engine_ops mxic_ecc_engine_external_ops = { +static const struct nand_ecc_engine_ops mxic_ecc_engine_external_ops = { .init_ctx = mxic_ecc_init_ctx_external, .cleanup_ctx = mxic_ecc_cleanup_ctx, .prepare_io_req = mxic_ecc_prepare_io_req_external, .finish_io_req = mxic_ecc_finish_io_req_external, }; -static struct nand_ecc_engine_ops mxic_ecc_engine_pipelined_ops = { +static const struct nand_ecc_engine_ops mxic_ecc_engine_pipelined_ops = { .init_ctx = mxic_ecc_init_ctx_pipelined, .cleanup_ctx = mxic_ecc_cleanup_ctx, .prepare_io_req = mxic_ecc_prepare_io_req_pipelined, .finish_io_req = mxic_ecc_finish_io_req_pipelined, }; -struct nand_ecc_engine_ops *mxic_ecc_get_pipelined_ops(void) +const struct nand_ecc_engine_ops *mxic_ecc_get_pipelined_ops(void) { return &mxic_ecc_engine_pipelined_ops; } diff --git a/drivers/mtd/nand/ecc-sw-bch.c b/drivers/mtd/nand/ecc-sw-bch.c index 405552d014a8..0d9310dd6f52 100644 --- a/drivers/mtd/nand/ecc-sw-bch.c +++ b/drivers/mtd/nand/ecc-sw-bch.c @@ -384,7 +384,7 @@ static int nand_ecc_sw_bch_finish_io_req(struct nand_device *nand, return max_bitflips; } -static struct nand_ecc_engine_ops nand_ecc_sw_bch_engine_ops = { +static const struct nand_ecc_engine_ops nand_ecc_sw_bch_engine_ops = { .init_ctx = nand_ecc_sw_bch_init_ctx, .cleanup_ctx = nand_ecc_sw_bch_cleanup_ctx, .prepare_io_req = nand_ecc_sw_bch_prepare_io_req, diff --git a/drivers/mtd/nand/ecc-sw-hamming.c b/drivers/mtd/nand/ecc-sw-hamming.c index 254db2e7f8bb..f2d0effad9d2 100644 --- a/drivers/mtd/nand/ecc-sw-hamming.c +++ b/drivers/mtd/nand/ecc-sw-hamming.c @@ -638,7 +638,7 @@ static int nand_ecc_sw_hamming_finish_io_req(struct nand_device *nand, return max_bitflips; } -static struct nand_ecc_engine_ops nand_ecc_sw_hamming_engine_ops = { +static const struct nand_ecc_engine_ops nand_ecc_sw_hamming_engine_ops = { .init_ctx = nand_ecc_sw_hamming_init_ctx, .cleanup_ctx = nand_ecc_sw_hamming_cleanup_ctx, .prepare_io_req = nand_ecc_sw_hamming_prepare_io_req, diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c index 4d76f9f71a0e..b1df7f627161 100644 --- a/drivers/mtd/nand/spi/core.c +++ b/drivers/mtd/nand/spi/core.c @@ -337,7 +337,7 @@ static int spinand_ondie_ecc_finish_io_req(struct nand_device *nand, return ret; } -static struct nand_ecc_engine_ops spinand_ondie_ecc_engine_ops = { +static const struct nand_ecc_engine_ops spinand_ondie_ecc_engine_ops = { .init_ctx = spinand_ondie_ecc_init_ctx, .cleanup_ctx = spinand_ondie_ecc_cleanup_ctx, .prepare_io_req = spinand_ondie_ecc_prepare_io_req, diff --git a/drivers/spi/spi-mtk-snfi.c b/drivers/spi/spi-mtk-snfi.c index ddd98ddb7913..b2a135e99e4d 100644 --- a/drivers/spi/spi-mtk-snfi.c +++ b/drivers/spi/spi-mtk-snfi.c @@ -776,7 +776,7 @@ static int mtk_snand_ecc_finish_io_req(struct nand_device *nand, return snf->ecc_stats.failed ? -EBADMSG : snf->ecc_stats.bitflips; } -static struct nand_ecc_engine_ops mtk_snfi_ecc_engine_ops = { +static const struct nand_ecc_engine_ops mtk_snfi_ecc_engine_ops = { .init_ctx = mtk_snand_ecc_init_ctx, .cleanup_ctx = mtk_snand_ecc_cleanup_ctx, .prepare_io_req = mtk_snand_ecc_prepare_io_req, diff --git a/drivers/spi/spi-mxic.c b/drivers/spi/spi-mxic.c index 6156d691630a..7ef3ee55cba3 100644 --- a/drivers/spi/spi-mxic.c +++ b/drivers/spi/spi-mxic.c @@ -640,7 +640,7 @@ static int mxic_spi_transfer_one(struct spi_controller *host, /* ECC wrapper */ static int mxic_spi_mem_ecc_init_ctx(struct nand_device *nand) { - struct nand_ecc_engine_ops *ops = mxic_ecc_get_pipelined_ops(); + const struct nand_ecc_engine_ops *ops = mxic_ecc_get_pipelined_ops(); struct mxic_spi *mxic = nand->ecc.engine->priv; mxic->ecc.use_pipelined_conf = true; @@ -650,7 +650,7 @@ static int mxic_spi_mem_ecc_init_ctx(struct nand_device *nand) static void mxic_spi_mem_ecc_cleanup_ctx(struct nand_device *nand) { - struct nand_ecc_engine_ops *ops = mxic_ecc_get_pipelined_ops(); + const struct nand_ecc_engine_ops *ops = mxic_ecc_get_pipelined_ops(); struct mxic_spi *mxic = nand->ecc.engine->priv; mxic->ecc.use_pipelined_conf = false; @@ -661,7 +661,7 @@ static void mxic_spi_mem_ecc_cleanup_ctx(struct nand_device *nand) static int mxic_spi_mem_ecc_prepare_io_req(struct nand_device *nand, struct nand_page_io_req *req) { - struct nand_ecc_engine_ops *ops = mxic_ecc_get_pipelined_ops(); + const struct nand_ecc_engine_ops *ops = mxic_ecc_get_pipelined_ops(); return ops->prepare_io_req(nand, req); } @@ -669,12 +669,12 @@ static int mxic_spi_mem_ecc_prepare_io_req(struct nand_device *nand, static int mxic_spi_mem_ecc_finish_io_req(struct nand_device *nand, struct nand_page_io_req *req) { - struct nand_ecc_engine_ops *ops = mxic_ecc_get_pipelined_ops(); + const struct nand_ecc_engine_ops *ops = mxic_ecc_get_pipelined_ops(); return ops->finish_io_req(nand, req); } -static struct nand_ecc_engine_ops mxic_spi_mem_ecc_engine_pipelined_ops = { +static const struct nand_ecc_engine_ops mxic_spi_mem_ecc_engine_pipelined_ops = { .init_ctx = mxic_spi_mem_ecc_init_ctx, .cleanup_ctx = mxic_spi_mem_ecc_cleanup_ctx, .prepare_io_req = mxic_spi_mem_ecc_prepare_io_req, diff --git a/include/linux/mtd/nand-ecc-mxic.h b/include/linux/mtd/nand-ecc-mxic.h index b125926e458c..0da4b2999576 100644 --- a/include/linux/mtd/nand-ecc-mxic.h +++ b/include/linux/mtd/nand-ecc-mxic.h @@ -16,7 +16,7 @@ struct mxic_ecc_engine; #if IS_ENABLED(CONFIG_MTD_NAND_ECC_MXIC) && IS_REACHABLE(CONFIG_MTD_NAND_CORE) -struct nand_ecc_engine_ops *mxic_ecc_get_pipelined_ops(void); +const struct nand_ecc_engine_ops *mxic_ecc_get_pipelined_ops(void); struct nand_ecc_engine *mxic_ecc_get_pipelined_engine(struct platform_device *spi_pdev); void mxic_ecc_put_pipelined_engine(struct nand_ecc_engine *eng); int mxic_ecc_process_data_pipelined(struct nand_ecc_engine *eng, @@ -24,7 +24,7 @@ int mxic_ecc_process_data_pipelined(struct nand_ecc_engine *eng, #else /* !CONFIG_MTD_NAND_ECC_MXIC */ -static inline struct nand_ecc_engine_ops *mxic_ecc_get_pipelined_ops(void) +static inline const struct nand_ecc_engine_ops *mxic_ecc_get_pipelined_ops(void) { return NULL; } diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 1e4208040956..0e2f228e8b4a 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -293,7 +293,7 @@ enum nand_ecc_engine_integration { struct nand_ecc_engine { struct device *dev; struct list_head node; - struct nand_ecc_engine_ops *ops; + const struct nand_ecc_engine_ops *ops; enum nand_ecc_engine_integration integration; void *priv; }; -- cgit v1.2.3 From 192514ae05687a60ac230569b2a215fb2d3b8d90 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Mon, 10 Jun 2024 10:57:32 +0200 Subject: soc: mediatek: Add MediaTek DVFS Resource Collector (DVFSRC) driver The Dynamic Voltage and Frequency Scaling Resource Collector (DVFSRC) is a Hardware module used to collect all the requests from both software and the various remote processors embedded into the SoC and decide about a minimum operating voltage and a minimum DRAM frequency to fulfill those requests in an effort to provide the best achievable performance per watt. This hardware IP is capable of transparently performing direct register R/W on all of the DVFSRC-controlled regulators and SoC bandwidth knobs. This driver includes support for MT8183, MT8192 and MT8195. Co-Developed-by: Dawei Chien [Angelo: Partial refactoring and cleanups] Reviewed-by: Georgi Djakov Link: https://lore.kernel.org/r/20240610085735.147134-5-angelogioacchino.delregno@collabora.com Signed-off-by: AngeloGioacchino Del Regno --- drivers/soc/mediatek/Kconfig | 11 + drivers/soc/mediatek/Makefile | 1 + drivers/soc/mediatek/mtk-dvfsrc.c | 545 +++++++++++++++++++++++++++++++ include/linux/soc/mediatek/dvfsrc.h | 36 ++ include/linux/soc/mediatek/mtk_sip_svc.h | 3 + 5 files changed, 596 insertions(+) create mode 100644 drivers/soc/mediatek/mtk-dvfsrc.c create mode 100644 include/linux/soc/mediatek/dvfsrc.h (limited to 'include') diff --git a/drivers/soc/mediatek/Kconfig b/drivers/soc/mediatek/Kconfig index 1b7afb19ccd6..d7293977f06e 100644 --- a/drivers/soc/mediatek/Kconfig +++ b/drivers/soc/mediatek/Kconfig @@ -26,6 +26,17 @@ config MTK_DEVAPC The violation information is logged for further analysis or countermeasures. +config MTK_DVFSRC + tristate "MediaTek DVFSRC Support" + depends on ARCH_MEDIATEK + help + Say yes here to add support for the MediaTek Dynamic Voltage + and Frequency Scaling Resource Collector (DVFSRC): a HW + IP found on many MediaTek SoCs, which is responsible for + collecting DVFS requests from various SoC IPs, other than + software, and performing bandwidth scaling to provide the + best achievable performance-per-watt. + config MTK_INFRACFG bool "MediaTek INFRACFG Support" select REGMAP diff --git a/drivers/soc/mediatek/Makefile b/drivers/soc/mediatek/Makefile index 6830512848fd..0665573e3c4b 100644 --- a/drivers/soc/mediatek/Makefile +++ b/drivers/soc/mediatek/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_MTK_CMDQ) += mtk-cmdq-helper.o obj-$(CONFIG_MTK_DEVAPC) += mtk-devapc.o +obj-$(CONFIG_MTK_DVFSRC) += mtk-dvfsrc.o obj-$(CONFIG_MTK_INFRACFG) += mtk-infracfg.o obj-$(CONFIG_MTK_PMIC_WRAP) += mtk-pmic-wrap.o obj-$(CONFIG_MTK_REGULATOR_COUPLER) += mtk-regulator-coupler.o diff --git a/drivers/soc/mediatek/mtk-dvfsrc.c b/drivers/soc/mediatek/mtk-dvfsrc.c new file mode 100644 index 000000000000..83bf46fdcf2d --- /dev/null +++ b/drivers/soc/mediatek/mtk-dvfsrc.c @@ -0,0 +1,545 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 MediaTek Inc. + * Copyright (c) 2024 Collabora Ltd. + * AngeloGioacchino Del Regno + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* DVFSRC_LEVEL */ +#define DVFSRC_V1_LEVEL_TARGET_LEVEL GENMASK(15, 0) +#define DVFSRC_TGT_LEVEL_IDLE 0x00 +#define DVFSRC_V1_LEVEL_CURRENT_LEVEL GENMASK(31, 16) + +/* DVFSRC_SW_REQ, DVFSRC_SW_REQ2 */ +#define DVFSRC_V1_SW_REQ2_DRAM_LEVEL GENMASK(1, 0) +#define DVFSRC_V1_SW_REQ2_VCORE_LEVEL GENMASK(3, 2) + +#define DVFSRC_V2_SW_REQ_DRAM_LEVEL GENMASK(3, 0) +#define DVFSRC_V2_SW_REQ_VCORE_LEVEL GENMASK(6, 4) + +/* DVFSRC_VCORE */ +#define DVFSRC_V2_VCORE_REQ_VSCP_LEVEL GENMASK(14, 12) + +#define DVFSRC_POLL_TIMEOUT_US 1000 +#define STARTUP_TIME_US 1 + +#define MTK_SIP_DVFSRC_INIT 0x0 +#define MTK_SIP_DVFSRC_START 0x1 + +struct dvfsrc_bw_constraints { + u16 max_dram_nom_bw; + u16 max_dram_peak_bw; + u16 max_dram_hrt_bw; +}; + +struct dvfsrc_opp { + u32 vcore_opp; + u32 dram_opp; +}; + +struct dvfsrc_opp_desc { + const struct dvfsrc_opp *opps; + u32 num_opp; +}; + +struct dvfsrc_soc_data; +struct mtk_dvfsrc { + struct device *dev; + struct platform_device *icc; + struct platform_device *regulator; + const struct dvfsrc_soc_data *dvd; + const struct dvfsrc_opp_desc *curr_opps; + void __iomem *regs; + int dram_type; +}; + +struct dvfsrc_soc_data { + const int *regs; + const struct dvfsrc_opp_desc *opps_desc; + u32 (*get_target_level)(struct mtk_dvfsrc *dvfsrc); + u32 (*get_current_level)(struct mtk_dvfsrc *dvfsrc); + u32 (*get_vcore_level)(struct mtk_dvfsrc *dvfsrc); + u32 (*get_vscp_level)(struct mtk_dvfsrc *dvfsrc); + void (*set_dram_bw)(struct mtk_dvfsrc *dvfsrc, u64 bw); + void (*set_dram_peak_bw)(struct mtk_dvfsrc *dvfsrc, u64 bw); + void (*set_dram_hrt_bw)(struct mtk_dvfsrc *dvfsrc, u64 bw); + void (*set_opp_level)(struct mtk_dvfsrc *dvfsrc, u32 level); + void (*set_vcore_level)(struct mtk_dvfsrc *dvfsrc, u32 level); + void (*set_vscp_level)(struct mtk_dvfsrc *dvfsrc, u32 level); + int (*wait_for_opp_level)(struct mtk_dvfsrc *dvfsrc, u32 level); + int (*wait_for_vcore_level)(struct mtk_dvfsrc *dvfsrc, u32 level); + const struct dvfsrc_bw_constraints *bw_constraints; +}; + +static u32 dvfsrc_readl(struct mtk_dvfsrc *dvfs, u32 offset) +{ + return readl(dvfs->regs + dvfs->dvd->regs[offset]); +} + +static void dvfsrc_writel(struct mtk_dvfsrc *dvfs, u32 offset, u32 val) +{ + writel(val, dvfs->regs + dvfs->dvd->regs[offset]); +} + +enum dvfsrc_regs { + DVFSRC_SW_REQ, + DVFSRC_SW_REQ2, + DVFSRC_LEVEL, + DVFSRC_TARGET_LEVEL, + DVFSRC_SW_BW, + DVFSRC_SW_PEAK_BW, + DVFSRC_SW_HRT_BW, + DVFSRC_VCORE, + DVFSRC_REGS_MAX, +}; + +static const int dvfsrc_mt8183_regs[] = { + [DVFSRC_SW_REQ] = 0x4, + [DVFSRC_SW_REQ2] = 0x8, + [DVFSRC_LEVEL] = 0xDC, + [DVFSRC_SW_BW] = 0x160, +}; + +static const int dvfsrc_mt8195_regs[] = { + [DVFSRC_SW_REQ] = 0xc, + [DVFSRC_VCORE] = 0x6c, + [DVFSRC_SW_PEAK_BW] = 0x278, + [DVFSRC_SW_BW] = 0x26c, + [DVFSRC_SW_HRT_BW] = 0x290, + [DVFSRC_LEVEL] = 0xd44, + [DVFSRC_TARGET_LEVEL] = 0xd48, +}; + +static const struct dvfsrc_opp *dvfsrc_get_current_opp(struct mtk_dvfsrc *dvfsrc) +{ + u32 level = dvfsrc->dvd->get_current_level(dvfsrc); + + return &dvfsrc->curr_opps->opps[level]; +} + +static bool dvfsrc_is_idle(struct mtk_dvfsrc *dvfsrc) +{ + if (!dvfsrc->dvd->get_target_level) + return true; + + return dvfsrc->dvd->get_target_level(dvfsrc) == DVFSRC_TGT_LEVEL_IDLE; +} + +static int dvfsrc_wait_for_vcore_level_v1(struct mtk_dvfsrc *dvfsrc, u32 level) +{ + const struct dvfsrc_opp *curr; + + return readx_poll_timeout_atomic(dvfsrc_get_current_opp, dvfsrc, curr, + curr->vcore_opp >= level, STARTUP_TIME_US, + DVFSRC_POLL_TIMEOUT_US); +} + +static int dvfsrc_wait_for_opp_level_v1(struct mtk_dvfsrc *dvfsrc, u32 level) +{ + const struct dvfsrc_opp *target, *curr; + int ret; + + target = &dvfsrc->curr_opps->opps[level]; + ret = readx_poll_timeout_atomic(dvfsrc_get_current_opp, dvfsrc, curr, + curr->dram_opp >= target->dram_opp && + curr->vcore_opp >= target->vcore_opp, + STARTUP_TIME_US, DVFSRC_POLL_TIMEOUT_US); + if (ret < 0) { + dev_warn(dvfsrc->dev, + "timeout! target OPP: %u, dram: %d, vcore: %d\n", level, + curr->dram_opp, curr->vcore_opp); + return ret; + } + + return 0; +} + +static int dvfsrc_wait_for_opp_level_v2(struct mtk_dvfsrc *dvfsrc, u32 level) +{ + const struct dvfsrc_opp *target, *curr; + int ret; + + target = &dvfsrc->curr_opps->opps[level]; + ret = readx_poll_timeout_atomic(dvfsrc_get_current_opp, dvfsrc, curr, + curr->dram_opp >= target->dram_opp && + curr->vcore_opp >= target->vcore_opp, + STARTUP_TIME_US, DVFSRC_POLL_TIMEOUT_US); + if (ret < 0) { + dev_warn(dvfsrc->dev, + "timeout! target OPP: %u, dram: %d\n", level, curr->dram_opp); + return ret; + } + + return 0; +} + +static u32 dvfsrc_get_target_level_v1(struct mtk_dvfsrc *dvfsrc) +{ + u32 val = dvfsrc_readl(dvfsrc, DVFSRC_LEVEL); + + return FIELD_GET(DVFSRC_V1_LEVEL_TARGET_LEVEL, val); +} + +static u32 dvfsrc_get_current_level_v1(struct mtk_dvfsrc *dvfsrc) +{ + u32 val = dvfsrc_readl(dvfsrc, DVFSRC_LEVEL); + u32 current_level = FIELD_GET(DVFSRC_V1_LEVEL_CURRENT_LEVEL, val); + + return ffs(current_level) - 1; +} + +static u32 dvfsrc_get_target_level_v2(struct mtk_dvfsrc *dvfsrc) +{ + return dvfsrc_readl(dvfsrc, DVFSRC_TARGET_LEVEL); +} + +static u32 dvfsrc_get_current_level_v2(struct mtk_dvfsrc *dvfsrc) +{ + u32 val = dvfsrc_readl(dvfsrc, DVFSRC_LEVEL); + u32 level = ffs(val); + + /* Valid levels */ + if (level < dvfsrc->curr_opps->num_opp) + return dvfsrc->curr_opps->num_opp - level; + + /* Zero for level 0 or invalid level */ + return 0; +} + +static u32 dvfsrc_get_vcore_level_v1(struct mtk_dvfsrc *dvfsrc) +{ + u32 val = dvfsrc_readl(dvfsrc, DVFSRC_SW_REQ2); + + return FIELD_GET(DVFSRC_V1_SW_REQ2_VCORE_LEVEL, val); +} + +static void dvfsrc_set_vcore_level_v1(struct mtk_dvfsrc *dvfsrc, u32 level) +{ + u32 val = dvfsrc_readl(dvfsrc, DVFSRC_SW_REQ2); + + val &= ~DVFSRC_V1_SW_REQ2_VCORE_LEVEL; + val |= FIELD_PREP(DVFSRC_V1_SW_REQ2_VCORE_LEVEL, level); + + dvfsrc_writel(dvfsrc, DVFSRC_SW_REQ2, val); +} + +static u32 dvfsrc_get_vcore_level_v2(struct mtk_dvfsrc *dvfsrc) +{ + u32 val = dvfsrc_readl(dvfsrc, DVFSRC_SW_REQ); + + return FIELD_GET(DVFSRC_V2_SW_REQ_VCORE_LEVEL, val); +} + +static void dvfsrc_set_vcore_level_v2(struct mtk_dvfsrc *dvfsrc, u32 level) +{ + u32 val = dvfsrc_readl(dvfsrc, DVFSRC_SW_REQ); + + val &= ~DVFSRC_V2_SW_REQ_VCORE_LEVEL; + val |= FIELD_PREP(DVFSRC_V2_SW_REQ_VCORE_LEVEL, level); + + dvfsrc_writel(dvfsrc, DVFSRC_SW_REQ, val); +} + +static u32 dvfsrc_get_vscp_level_v2(struct mtk_dvfsrc *dvfsrc) +{ + u32 val = dvfsrc_readl(dvfsrc, DVFSRC_VCORE); + + return FIELD_GET(DVFSRC_V2_VCORE_REQ_VSCP_LEVEL, val); +} + +static void dvfsrc_set_vscp_level_v2(struct mtk_dvfsrc *dvfsrc, u32 level) +{ + u32 val = dvfsrc_readl(dvfsrc, DVFSRC_VCORE); + + val &= ~DVFSRC_V2_VCORE_REQ_VSCP_LEVEL; + val |= FIELD_PREP(DVFSRC_V2_VCORE_REQ_VSCP_LEVEL, level); + + dvfsrc_writel(dvfsrc, DVFSRC_VCORE, val); +} + +static void __dvfsrc_set_dram_bw_v1(struct mtk_dvfsrc *dvfsrc, u32 reg, + u16 max_bw, u16 min_bw, u64 bw) +{ + u32 new_bw = (u32)div_u64(bw, 100 * 1000); + + /* If bw constraints (in mbps) are defined make sure to respect them */ + if (max_bw) + new_bw = min(new_bw, max_bw); + if (min_bw && new_bw > 0) + new_bw = max(new_bw, min_bw); + + dvfsrc_writel(dvfsrc, reg, new_bw); +} + +static void dvfsrc_set_dram_bw_v1(struct mtk_dvfsrc *dvfsrc, u64 bw) +{ + u64 max_bw = dvfsrc->dvd->bw_constraints->max_dram_nom_bw; + + __dvfsrc_set_dram_bw_v1(dvfsrc, DVFSRC_SW_BW, max_bw, 0, bw); +}; + +static void dvfsrc_set_dram_peak_bw_v1(struct mtk_dvfsrc *dvfsrc, u64 bw) +{ + u64 max_bw = dvfsrc->dvd->bw_constraints->max_dram_peak_bw; + + __dvfsrc_set_dram_bw_v1(dvfsrc, DVFSRC_SW_PEAK_BW, max_bw, 0, bw); +} + +static void dvfsrc_set_dram_hrt_bw_v1(struct mtk_dvfsrc *dvfsrc, u64 bw) +{ + u64 max_bw = dvfsrc->dvd->bw_constraints->max_dram_hrt_bw; + + __dvfsrc_set_dram_bw_v1(dvfsrc, DVFSRC_SW_HRT_BW, max_bw, 0, bw); +} + +static void dvfsrc_set_opp_level_v1(struct mtk_dvfsrc *dvfsrc, u32 level) +{ + const struct dvfsrc_opp *opp = &dvfsrc->curr_opps->opps[level]; + u32 val; + + /* Translate Pstate to DVFSRC level and set it to DVFSRC HW */ + val = FIELD_PREP(DVFSRC_V1_SW_REQ2_DRAM_LEVEL, opp->dram_opp); + val |= FIELD_PREP(DVFSRC_V1_SW_REQ2_VCORE_LEVEL, opp->vcore_opp); + + dev_dbg(dvfsrc->dev, "vcore_opp: %d, dram_opp: %d\n", opp->vcore_opp, opp->dram_opp); + dvfsrc_writel(dvfsrc, DVFSRC_SW_REQ, val); +} + +int mtk_dvfsrc_send_request(const struct device *dev, u32 cmd, u64 data) +{ + struct mtk_dvfsrc *dvfsrc = dev_get_drvdata(dev); + bool state; + int ret; + + dev_dbg(dvfsrc->dev, "cmd: %d, data: %llu\n", cmd, data); + + switch (cmd) { + case MTK_DVFSRC_CMD_BW: + dvfsrc->dvd->set_dram_bw(dvfsrc, data); + return 0; + case MTK_DVFSRC_CMD_HRT_BW: + if (dvfsrc->dvd->set_dram_hrt_bw) + dvfsrc->dvd->set_dram_hrt_bw(dvfsrc, data); + return 0; + case MTK_DVFSRC_CMD_PEAK_BW: + if (dvfsrc->dvd->set_dram_peak_bw) + dvfsrc->dvd->set_dram_peak_bw(dvfsrc, data); + return 0; + case MTK_DVFSRC_CMD_OPP: + if (!dvfsrc->dvd->set_opp_level) + return 0; + + dvfsrc->dvd->set_opp_level(dvfsrc, data); + break; + case MTK_DVFSRC_CMD_VCORE_LEVEL: + dvfsrc->dvd->set_vcore_level(dvfsrc, data); + break; + case MTK_DVFSRC_CMD_VSCP_LEVEL: + if (!dvfsrc->dvd->set_vscp_level) + return 0; + + dvfsrc->dvd->set_vscp_level(dvfsrc, data); + break; + default: + dev_err(dvfsrc->dev, "unknown command: %d\n", cmd); + return -EOPNOTSUPP; + } + + /* DVFSRC needs at least 2T(~196ns) to handle a request */ + udelay(STARTUP_TIME_US); + + ret = readx_poll_timeout_atomic(dvfsrc_is_idle, dvfsrc, state, state, + STARTUP_TIME_US, DVFSRC_POLL_TIMEOUT_US); + if (ret < 0) { + dev_warn(dvfsrc->dev, + "%d: idle timeout, data: %llu, last: %d -> %d\n", cmd, data, + dvfsrc->dvd->get_current_level(dvfsrc), + dvfsrc->dvd->get_target_level(dvfsrc)); + return ret; + } + + if (cmd == MTK_DVFSRC_CMD_OPP) + ret = dvfsrc->dvd->wait_for_opp_level(dvfsrc, data); + else + ret = dvfsrc->dvd->wait_for_vcore_level(dvfsrc, data); + + if (ret < 0) { + dev_warn(dvfsrc->dev, + "%d: wait timeout, data: %llu, last: %d -> %d\n", + cmd, data, + dvfsrc->dvd->get_current_level(dvfsrc), + dvfsrc->dvd->get_target_level(dvfsrc)); + return ret; + } + + return 0; +} +EXPORT_SYMBOL(mtk_dvfsrc_send_request); + +int mtk_dvfsrc_query_info(const struct device *dev, u32 cmd, int *data) +{ + struct mtk_dvfsrc *dvfsrc = dev_get_drvdata(dev); + + switch (cmd) { + case MTK_DVFSRC_CMD_VCORE_LEVEL: + *data = dvfsrc->dvd->get_vcore_level(dvfsrc); + break; + case MTK_DVFSRC_CMD_VSCP_LEVEL: + *data = dvfsrc->dvd->get_vscp_level(dvfsrc); + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} +EXPORT_SYMBOL(mtk_dvfsrc_query_info); + +static int mtk_dvfsrc_probe(struct platform_device *pdev) +{ + struct arm_smccc_res ares; + struct mtk_dvfsrc *dvfsrc; + int ret; + + dvfsrc = devm_kzalloc(&pdev->dev, sizeof(*dvfsrc), GFP_KERNEL); + if (!dvfsrc) + return -ENOMEM; + + dvfsrc->dvd = of_device_get_match_data(&pdev->dev); + dvfsrc->dev = &pdev->dev; + + dvfsrc->regs = devm_platform_get_and_ioremap_resource(pdev, 0, NULL); + if (IS_ERR(dvfsrc->regs)) + return PTR_ERR(dvfsrc->regs); + + arm_smccc_smc(MTK_SIP_DVFSRC_VCOREFS_CONTROL, MTK_SIP_DVFSRC_INIT, + 0, 0, 0, 0, 0, 0, &ares); + if (ares.a0) + return dev_err_probe(&pdev->dev, -EINVAL, "DVFSRC init failed: %lu\n", ares.a0); + + dvfsrc->dram_type = ares.a1; + dev_dbg(&pdev->dev, "DRAM Type: %d\n", dvfsrc->dram_type); + + dvfsrc->curr_opps = &dvfsrc->dvd->opps_desc[dvfsrc->dram_type]; + platform_set_drvdata(pdev, dvfsrc); + + ret = devm_of_platform_populate(&pdev->dev); + if (ret) + return dev_err_probe(&pdev->dev, ret, "Failed to populate child devices\n"); + + /* Everything is set up - make it run! */ + arm_smccc_smc(MTK_SIP_DVFSRC_VCOREFS_CONTROL, MTK_SIP_DVFSRC_START, + 0, 0, 0, 0, 0, 0, &ares); + if (ares.a0) + return dev_err_probe(&pdev->dev, -EINVAL, "Cannot start DVFSRC: %lu\n", ares.a0); + + return 0; +} + +static const struct dvfsrc_opp dvfsrc_opp_mt8183_lp4[] = { + { 0, 0 }, { 0, 1 }, { 0, 2 }, { 1, 2 }, +}; + +static const struct dvfsrc_opp dvfsrc_opp_mt8183_lp3[] = { + { 0, 0 }, { 0, 1 }, { 1, 1 }, { 1, 2 }, +}; + +static const struct dvfsrc_opp_desc dvfsrc_opp_mt8183_desc[] = { + [0] = { + .opps = dvfsrc_opp_mt8183_lp4, + .num_opp = ARRAY_SIZE(dvfsrc_opp_mt8183_lp4), + }, + [1] = { + .opps = dvfsrc_opp_mt8183_lp3, + .num_opp = ARRAY_SIZE(dvfsrc_opp_mt8183_lp3), + }, + [2] = { + .opps = dvfsrc_opp_mt8183_lp3, + .num_opp = ARRAY_SIZE(dvfsrc_opp_mt8183_lp3), + } +}; + +static const struct dvfsrc_bw_constraints dvfsrc_bw_constr_mt8183 = { 0, 0, 0 }; + +static const struct dvfsrc_soc_data mt8183_data = { + .opps_desc = dvfsrc_opp_mt8183_desc, + .regs = dvfsrc_mt8183_regs, + .get_target_level = dvfsrc_get_target_level_v1, + .get_current_level = dvfsrc_get_current_level_v1, + .get_vcore_level = dvfsrc_get_vcore_level_v1, + .set_dram_bw = dvfsrc_set_dram_bw_v1, + .set_opp_level = dvfsrc_set_opp_level_v1, + .set_vcore_level = dvfsrc_set_vcore_level_v1, + .wait_for_opp_level = dvfsrc_wait_for_opp_level_v1, + .wait_for_vcore_level = dvfsrc_wait_for_vcore_level_v1, + .bw_constraints = &dvfsrc_bw_constr_mt8183, +}; + +static const struct dvfsrc_opp dvfsrc_opp_mt8195_lp4[] = { + { 0, 0 }, { 1, 0 }, { 2, 0 }, { 3, 0 }, + { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 }, + { 0, 2 }, { 1, 2 }, { 2, 2 }, { 3, 2 }, + { 1, 3 }, { 2, 3 }, { 3, 3 }, { 1, 4 }, + { 2, 4 }, { 3, 4 }, { 2, 5 }, { 3, 5 }, + { 3, 6 }, +}; + +static const struct dvfsrc_opp_desc dvfsrc_opp_mt8195_desc[] = { + [0] = { + .opps = dvfsrc_opp_mt8195_lp4, + .num_opp = ARRAY_SIZE(dvfsrc_opp_mt8195_lp4), + } +}; + +static const struct dvfsrc_bw_constraints dvfsrc_bw_constr_mt8195 = { + .max_dram_nom_bw = 255, + .max_dram_peak_bw = 255, + .max_dram_hrt_bw = 1023, +}; + +static const struct dvfsrc_soc_data mt8195_data = { + .opps_desc = dvfsrc_opp_mt8195_desc, + .regs = dvfsrc_mt8195_regs, + .get_target_level = dvfsrc_get_target_level_v2, + .get_current_level = dvfsrc_get_current_level_v2, + .get_vcore_level = dvfsrc_get_vcore_level_v2, + .get_vscp_level = dvfsrc_get_vscp_level_v2, + .set_dram_bw = dvfsrc_set_dram_bw_v1, + .set_dram_peak_bw = dvfsrc_set_dram_peak_bw_v1, + .set_dram_hrt_bw = dvfsrc_set_dram_hrt_bw_v1, + .set_vcore_level = dvfsrc_set_vcore_level_v2, + .set_vscp_level = dvfsrc_set_vscp_level_v2, + .wait_for_opp_level = dvfsrc_wait_for_opp_level_v2, + .wait_for_vcore_level = dvfsrc_wait_for_vcore_level_v1, + .bw_constraints = &dvfsrc_bw_constr_mt8195, +}; + +static const struct of_device_id mtk_dvfsrc_of_match[] = { + { .compatible = "mediatek,mt8183-dvfsrc", .data = &mt8183_data }, + { .compatible = "mediatek,mt8195-dvfsrc", .data = &mt8195_data }, + { /* sentinel */ } +}; + +static struct platform_driver mtk_dvfsrc_driver = { + .probe = mtk_dvfsrc_probe, + .driver = { + .name = "mtk-dvfsrc", + .of_match_table = mtk_dvfsrc_of_match, + }, +}; +module_platform_driver(mtk_dvfsrc_driver); + +MODULE_AUTHOR("AngeloGioacchino Del Regno "); +MODULE_AUTHOR("Dawei Chien "); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("MediaTek DVFSRC driver"); diff --git a/include/linux/soc/mediatek/dvfsrc.h b/include/linux/soc/mediatek/dvfsrc.h new file mode 100644 index 000000000000..1498b3ed396b --- /dev/null +++ b/include/linux/soc/mediatek/dvfsrc.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright (c) 2021 MediaTek Inc. + * Copyright (c) 2024 Collabora Ltd. + * AngeloGioacchino Del Regno + */ + +#ifndef __MEDIATEK_DVFSRC_H +#define __MEDIATEK_DVFSRC_H + +enum mtk_dvfsrc_cmd { + MTK_DVFSRC_CMD_BW, + MTK_DVFSRC_CMD_HRT_BW, + MTK_DVFSRC_CMD_PEAK_BW, + MTK_DVFSRC_CMD_OPP, + MTK_DVFSRC_CMD_VCORE_LEVEL, + MTK_DVFSRC_CMD_VSCP_LEVEL, + MTK_DVFSRC_CMD_MAX, +}; + +#if IS_ENABLED(CONFIG_MTK_DVFSRC) + +int mtk_dvfsrc_send_request(const struct device *dev, u32 cmd, u64 data); +int mtk_dvfsrc_query_info(const struct device *dev, u32 cmd, int *data); + +#else + +static inline int mtk_dvfsrc_send_request(const struct device *dev, u32 cmd, u64 data) +{ return -ENODEV; } + +static inline int mtk_dvfsrc_query_info(const struct device *dev, u32 cmd, int *data) +{ return -ENODEV; } + +#endif /* CONFIG_MTK_DVFSRC */ + +#endif diff --git a/include/linux/soc/mediatek/mtk_sip_svc.h b/include/linux/soc/mediatek/mtk_sip_svc.h index 0761128b4354..abe24a73ee19 100644 --- a/include/linux/soc/mediatek/mtk_sip_svc.h +++ b/include/linux/soc/mediatek/mtk_sip_svc.h @@ -22,6 +22,9 @@ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, MTK_SIP_SMC_CONVENTION, \ ARM_SMCCC_OWNER_SIP, fn_id) +/* DVFSRC SMC calls */ +#define MTK_SIP_DVFSRC_VCOREFS_CONTROL MTK_SIP_SMC_CMD(0x506) + /* IOMMU related SMC call */ #define MTK_SIP_KERNEL_IOMMU_CONTROL MTK_SIP_SMC_CMD(0x514) -- cgit v1.2.3 From fb6f20ecb121cef4d7946f834a6ee867c4e21b4a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 17 Oct 2024 12:28:23 +0200 Subject: reiserfs: The last commit Deprecation period of reiserfs ends with the end of this year so it is time to remove it from the kernel. Acked-by: Darrick J. Wong Acked-by: Christian Brauner Signed-off-by: Jan Kara --- Documentation/filesystems/porting.rst | 2 +- Documentation/userspace-api/ioctl/ioctl-number.rst | 2 +- MAINTAINERS | 5 - arch/alpha/configs/defconfig | 1 - arch/arm/configs/pxa_defconfig | 4 - arch/m68k/configs/amiga_defconfig | 1 - arch/m68k/configs/apollo_defconfig | 1 - arch/m68k/configs/atari_defconfig | 1 - arch/m68k/configs/bvme6000_defconfig | 1 - arch/m68k/configs/hp300_defconfig | 1 - arch/m68k/configs/mac_defconfig | 1 - arch/m68k/configs/multi_defconfig | 1 - arch/m68k/configs/mvme147_defconfig | 1 - arch/m68k/configs/mvme16x_defconfig | 1 - arch/m68k/configs/q40_defconfig | 1 - arch/m68k/configs/sun3_defconfig | 1 - arch/m68k/configs/sun3x_defconfig | 1 - arch/sh/configs/landisk_defconfig | 1 - arch/sh/configs/titan_defconfig | 1 - arch/um/configs/i386_defconfig | 1 - arch/um/configs/x86_64_defconfig | 1 - drivers/block/Kconfig | 2 +- fs/Kconfig | 1 - fs/Makefile | 1 - fs/buffer.c | 3 +- fs/quota/Kconfig | 15 +- fs/reiserfs/Kconfig | 91 - fs/reiserfs/Makefile | 30 - fs/reiserfs/README | 151 - fs/reiserfs/acl.h | 78 - fs/reiserfs/bitmap.c | 1476 ------- fs/reiserfs/dir.c | 346 -- fs/reiserfs/do_balan.c | 1900 --------- fs/reiserfs/file.c | 270 -- fs/reiserfs/fix_node.c | 2822 ------------- fs/reiserfs/hashes.c | 177 - fs/reiserfs/ibalance.c | 1161 ------ fs/reiserfs/inode.c | 3416 --------------- fs/reiserfs/ioctl.c | 221 - fs/reiserfs/item_ops.c | 737 ---- fs/reiserfs/journal.c | 4404 -------------------- fs/reiserfs/lbalance.c | 1426 ------- fs/reiserfs/lock.c | 101 - fs/reiserfs/namei.c | 1725 -------- fs/reiserfs/objectid.c | 216 - fs/reiserfs/prints.c | 792 ---- fs/reiserfs/procfs.c | 490 --- fs/reiserfs/reiserfs.h | 3419 --------------- fs/reiserfs/resize.c | 230 - fs/reiserfs/stree.c | 2280 ---------- fs/reiserfs/super.c | 2646 ------------ fs/reiserfs/tail_conversion.c | 318 -- fs/reiserfs/xattr.c | 1039 ----- fs/reiserfs/xattr.h | 117 - fs/reiserfs/xattr_acl.c | 411 -- fs/reiserfs/xattr_security.c | 127 - fs/reiserfs/xattr_trusted.c | 46 - fs/reiserfs/xattr_user.c | 43 - include/uapi/linux/reiserfs_fs.h | 27 - include/uapi/linux/reiserfs_xattr.h | 25 - scripts/selinux/mdp/mdp.c | 3 - tools/objtool/noreturns.h | 1 - .../filesystems/statmount/statmount_test.c | 2 +- 63 files changed, 12 insertions(+), 32804 deletions(-) delete mode 100644 fs/reiserfs/Kconfig delete mode 100644 fs/reiserfs/Makefile delete mode 100644 fs/reiserfs/README delete mode 100644 fs/reiserfs/acl.h delete mode 100644 fs/reiserfs/bitmap.c delete mode 100644 fs/reiserfs/dir.c delete mode 100644 fs/reiserfs/do_balan.c delete mode 100644 fs/reiserfs/file.c delete mode 100644 fs/reiserfs/fix_node.c delete mode 100644 fs/reiserfs/hashes.c delete mode 100644 fs/reiserfs/ibalance.c delete mode 100644 fs/reiserfs/inode.c delete mode 100644 fs/reiserfs/ioctl.c delete mode 100644 fs/reiserfs/item_ops.c delete mode 100644 fs/reiserfs/journal.c delete mode 100644 fs/reiserfs/lbalance.c delete mode 100644 fs/reiserfs/lock.c delete mode 100644 fs/reiserfs/namei.c delete mode 100644 fs/reiserfs/objectid.c delete mode 100644 fs/reiserfs/prints.c delete mode 100644 fs/reiserfs/procfs.c delete mode 100644 fs/reiserfs/reiserfs.h delete mode 100644 fs/reiserfs/resize.c delete mode 100644 fs/reiserfs/stree.c delete mode 100644 fs/reiserfs/super.c delete mode 100644 fs/reiserfs/tail_conversion.c delete mode 100644 fs/reiserfs/xattr.c delete mode 100644 fs/reiserfs/xattr.h delete mode 100644 fs/reiserfs/xattr_acl.c delete mode 100644 fs/reiserfs/xattr_security.c delete mode 100644 fs/reiserfs/xattr_trusted.c delete mode 100644 fs/reiserfs/xattr_user.c delete mode 100644 include/uapi/linux/reiserfs_fs.h delete mode 100644 include/uapi/linux/reiserfs_xattr.h (limited to 'include') diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst index 92bffcc6747a..9ab2a3d6f2b4 100644 --- a/Documentation/filesystems/porting.rst +++ b/Documentation/filesystems/porting.rst @@ -177,7 +177,7 @@ settles down a bit. **mandatory** s_export_op is now required for exporting a filesystem. -isofs, ext2, ext3, reiserfs, fat +isofs, ext2, ext3, fat can be used as examples of very different filesystems. --- diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst index e4be1378ba26..243f1f1b554a 100644 --- a/Documentation/userspace-api/ioctl/ioctl-number.rst +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst @@ -375,7 +375,7 @@ Code Seq# Include File Comments 0xCB 00-1F CBM serial IEC bus in development: 0xCC 00-0F drivers/misc/ibmvmc.h pseries VMC driver -0xCD 01 linux/reiserfs_fs.h +0xCD 01 linux/reiserfs_fs.h Dead since 6.13 0xCE 01-02 uapi/linux/cxl_mem.h Compute Express Link Memory Devices 0xCF 02 fs/smb/client/cifs_ioctl.h 0xDB 00-0F drivers/char/mwave/mwavepub.h diff --git a/MAINTAINERS b/MAINTAINERS index 7ad507f49324..02de04d4ae1e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19578,11 +19578,6 @@ F: Documentation/devicetree/bindings/regmap/ F: drivers/base/regmap/ F: include/linux/regmap.h -REISERFS FILE SYSTEM -L: reiserfs-devel@vger.kernel.org -S: Obsolete -F: fs/reiserfs/ - REMOTE PROCESSOR (REMOTEPROC) SUBSYSTEM M: Bjorn Andersson M: Mathieu Poirier diff --git a/arch/alpha/configs/defconfig b/arch/alpha/configs/defconfig index 1816c1dc22b1..3280bd9e6578 100644 --- a/arch/alpha/configs/defconfig +++ b/arch/alpha/configs/defconfig @@ -51,7 +51,6 @@ CONFIG_SERIAL_8250_CONSOLE=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_CMOS=y CONFIG_EXT2_FS=y -CONFIG_REISERFS_FS=m CONFIG_ISO9660_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig index e1cb170c2bf0..38916ac4bce4 100644 --- a/arch/arm/configs/pxa_defconfig +++ b/arch/arm/configs/pxa_defconfig @@ -583,10 +583,6 @@ CONFIG_EXT2_FS_SECURITY=y CONFIG_EXT3_FS=y CONFIG_EXT3_FS_POSIX_ACL=y CONFIG_EXT3_FS_SECURITY=y -CONFIG_REISERFS_FS=m -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -CONFIG_REISERFS_FS_SECURITY=y CONFIG_XFS_FS=m CONFIG_AUTOFS_FS=m CONFIG_FUSE_FS=m diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index d01dc47d52ea..fba7b68c235b 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -449,7 +449,6 @@ CONFIG_RTC_DRV_RP5C01=m # CONFIG_IOMMU_SUPPORT is not set CONFIG_DAX=m CONFIG_EXT4_FS=y -CONFIG_REISERFS_FS=m CONFIG_JFS_FS=m CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index 46808e581d7b..308655a98bb1 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -406,7 +406,6 @@ CONFIG_RTC_DRV_GENERIC=m # CONFIG_IOMMU_SUPPORT is not set CONFIG_DAX=m CONFIG_EXT4_FS=y -CONFIG_REISERFS_FS=m CONFIG_JFS_FS=m CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 4469a7839c9d..956a3aed97c6 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -426,7 +426,6 @@ CONFIG_RTC_DRV_GENERIC=m # CONFIG_IOMMU_SUPPORT is not set CONFIG_DAX=m CONFIG_EXT4_FS=y -CONFIG_REISERFS_FS=m CONFIG_JFS_FS=m CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index c0719322c028..8790b6756a76 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -398,7 +398,6 @@ CONFIG_RTC_DRV_GENERIC=m # CONFIG_IOMMU_SUPPORT is not set CONFIG_DAX=m CONFIG_EXT4_FS=y -CONFIG_REISERFS_FS=m CONFIG_JFS_FS=m CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index 8d429e63f8f2..dfb2fface338 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -408,7 +408,6 @@ CONFIG_RTC_DRV_GENERIC=m # CONFIG_IOMMU_SUPPORT is not set CONFIG_DAX=m CONFIG_EXT4_FS=y -CONFIG_REISERFS_FS=m CONFIG_JFS_FS=m CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index bafd33da27c1..6577b4390c38 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -425,7 +425,6 @@ CONFIG_RTC_DRV_GENERIC=m # CONFIG_IOMMU_SUPPORT is not set CONFIG_DAX=m CONFIG_EXT4_FS=y -CONFIG_REISERFS_FS=m CONFIG_JFS_FS=m CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index 6f5ca3f85ea1..ad2bbc92d8d1 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -511,7 +511,6 @@ CONFIG_RTC_DRV_GENERIC=m # CONFIG_IOMMU_SUPPORT is not set CONFIG_DAX=m CONFIG_EXT4_FS=y -CONFIG_REISERFS_FS=m CONFIG_JFS_FS=m CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index d16b328c7136..3b4a2d2d966f 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -397,7 +397,6 @@ CONFIG_RTC_DRV_GENERIC=m # CONFIG_IOMMU_SUPPORT is not set CONFIG_DAX=m CONFIG_EXT4_FS=y -CONFIG_REISERFS_FS=m CONFIG_JFS_FS=m CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index 80f6c15a5ed5..9711f37d2ef7 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -398,7 +398,6 @@ CONFIG_RTC_DRV_GENERIC=m # CONFIG_IOMMU_SUPPORT is not set CONFIG_DAX=m CONFIG_EXT4_FS=y -CONFIG_REISERFS_FS=m CONFIG_JFS_FS=m CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index 0e81589f0ee2..5ae3b707c849 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -415,7 +415,6 @@ CONFIG_RTC_DRV_GENERIC=m # CONFIG_IOMMU_SUPPORT is not set CONFIG_DAX=m CONFIG_EXT4_FS=y -CONFIG_REISERFS_FS=m CONFIG_JFS_FS=m CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 8cd785290339..55efa85492d8 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -396,7 +396,6 @@ CONFIG_RTC_DRV_GENERIC=m # CONFIG_IOMMU_SUPPORT is not set CONFIG_DAX=m CONFIG_EXT4_FS=y -CONFIG_REISERFS_FS=m CONFIG_JFS_FS=m CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index 78035369f60f..cf1c78e02fda 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -396,7 +396,6 @@ CONFIG_RTC_DRV_GENERIC=m # CONFIG_IOMMU_SUPPORT is not set CONFIG_DAX=m CONFIG_EXT4_FS=y -CONFIG_REISERFS_FS=m CONFIG_JFS_FS=m CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set diff --git a/arch/sh/configs/landisk_defconfig b/arch/sh/configs/landisk_defconfig index 0311380160f4..d871623955c5 100644 --- a/arch/sh/configs/landisk_defconfig +++ b/arch/sh/configs/landisk_defconfig @@ -95,7 +95,6 @@ CONFIG_USB_SISUSBVGA=m CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_REISERFS_FS=y CONFIG_ISO9660_FS=m CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig index c1032559ecd4..99bc0e889287 100644 --- a/arch/sh/configs/titan_defconfig +++ b/arch/sh/configs/titan_defconfig @@ -220,7 +220,6 @@ CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set # CONFIG_EXT3_FS_XATTR is not set -CONFIG_REISERFS_FS=m CONFIG_XFS_FS=m CONFIG_FUSE_FS=m CONFIG_ISO9660_FS=m diff --git a/arch/um/configs/i386_defconfig b/arch/um/configs/i386_defconfig index e543cbac8792..9c9c77f1255a 100644 --- a/arch/um/configs/i386_defconfig +++ b/arch/um/configs/i386_defconfig @@ -61,7 +61,6 @@ CONFIG_UML_NET_DAEMON=y CONFIG_UML_NET_MCAST=y CONFIG_UML_NET_SLIRP=y CONFIG_EXT4_FS=y -CONFIG_REISERFS_FS=y CONFIG_QUOTA=y CONFIG_AUTOFS_FS=m CONFIG_ISO9660_FS=m diff --git a/arch/um/configs/x86_64_defconfig b/arch/um/configs/x86_64_defconfig index 939cb12318ca..03b10d3f6816 100644 --- a/arch/um/configs/x86_64_defconfig +++ b/arch/um/configs/x86_64_defconfig @@ -59,7 +59,6 @@ CONFIG_UML_NET_DAEMON=y CONFIG_UML_NET_MCAST=y CONFIG_UML_NET_SLIRP=y CONFIG_EXT4_FS=y -CONFIG_REISERFS_FS=y CONFIG_QUOTA=y CONFIG_AUTOFS_FS=m CONFIG_ISO9660_FS=m diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index ed209f4f2798..a97f2c40c640 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -130,7 +130,7 @@ config BLK_DEV_UBD_SYNC kernel command line option. Alternatively, you can say Y here to turn on synchronous operation by default for all block devices. - If you're running a journalling file system (like reiserfs, for + If you're running a journalling file system (like xfs, for example) in your virtual machine, you will want to say Y here. If you care for the safety of the data in your virtual machine, Y is a wise choice too. In all other cases (for example, if you're just diff --git a/fs/Kconfig b/fs/Kconfig index aae170fc2795..64d420e3c475 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -43,7 +43,6 @@ config FS_MBCACHE default y if EXT4_FS=y default m if EXT2_FS_XATTR || EXT4_FS -source "fs/reiserfs/Kconfig" source "fs/jfs/Kconfig" source "fs/xfs/Kconfig" diff --git a/fs/Makefile b/fs/Makefile index 61679fd587b7..15df0a923d3a 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -61,7 +61,6 @@ obj-$(CONFIG_DLM) += dlm/ # Do not add any filesystems before this line obj-$(CONFIG_NETFS_SUPPORT) += netfs/ -obj-$(CONFIG_REISERFS_FS) += reiserfs/ obj-$(CONFIG_EXT4_FS) += ext4/ # We place ext4 before ext2 so that clean ext3 root fs's do NOT mount using the # ext2 driver, which doesn't know about journalling! Explicitly request ext2 diff --git a/fs/buffer.c b/fs/buffer.c index 1fc9a50def0b..c17011bc7120 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -855,8 +855,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) * done a sync(). Just drop the buffers from the inode list. * * NOTE: we take the inode's blockdev's mapping's i_private_lock. Which - * assumes that all the buffers are against the blockdev. Not true - * for reiserfs. + * assumes that all the buffers are against the blockdev. */ void invalidate_inode_buffers(struct inode *inode) { diff --git a/fs/quota/Kconfig b/fs/quota/Kconfig index 4c925e55dbcd..818083a36bef 100644 --- a/fs/quota/Kconfig +++ b/fs/quota/Kconfig @@ -9,14 +9,13 @@ config QUOTA help If you say Y here, you will be able to set per user limits for disk usage (also called disk quotas). Currently, it works for the - ext2, ext3, ext4, f2fs, jfs, ocfs2 and reiserfs file systems. - Note that gfs2 and xfs use their own quota system. - Ext3, ext4 and reiserfs also support journaled quotas for which - you don't need to run quotacheck(8) after an unclean shutdown. - For further details, read the Quota mini-HOWTO, available from - , or the documentation provided - with the quota tools. Probably the quota support is only useful for - multi user systems. If unsure, say N. + ext2, ext3, ext4, f2fs, jfs and ocfs2 file systems. Note that gfs2 + and xfs use their own quota system. Ext3 and ext4 also support + journaled quotas for which you don't need to run quotacheck(8) after + an unclean shutdown. For further details, read the Quota mini-HOWTO, + available from , or the + documentation provided with the quota tools. Probably the quota + support is only useful for multi user systems. If unsure, say N. config QUOTA_NETLINK_INTERFACE bool "Report quota messages through netlink interface" diff --git a/fs/reiserfs/Kconfig b/fs/reiserfs/Kconfig deleted file mode 100644 index 0e6fe26458fe..000000000000 --- a/fs/reiserfs/Kconfig +++ /dev/null @@ -1,91 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -config REISERFS_FS - tristate "Reiserfs support (deprecated)" - select BUFFER_HEAD - select CRC32 - select LEGACY_DIRECT_IO - help - Reiserfs is deprecated and scheduled to be removed from the kernel - in 2025. If you are still using it, please migrate to another - filesystem or tell us your usecase for reiserfs. - - Reiserfs stores not just filenames but the files themselves in a - balanced tree. Uses journalling. - - Balanced trees are more efficient than traditional file system - architectural foundations. - - In general, ReiserFS is as fast as ext2, but is very efficient with - large directories and small files. Additional patches are needed - for NFS and quotas, please see - for links. - - It is more easily extended to have features currently found in - database and keyword search systems than block allocation based file - systems are. The next version will be so extended, and will support - plugins consistent with our motto ``It takes more than a license to - make source code open.'' - - Read - to learn more about reiserfs. - - Sponsored by Threshold Networks, Emusic.com, and Bigstorage.com. - - If you like it, you can pay us to add new features to it that you - need, buy a support contract, or pay us to port it to another OS. - -config REISERFS_CHECK - bool "Enable reiserfs debug mode" - depends on REISERFS_FS - help - If you set this to Y, then ReiserFS will perform every check it can - possibly imagine of its internal consistency throughout its - operation. It will also go substantially slower. More than once we - have forgotten that this was on, and then gone despondent over the - latest benchmarks.:-) Use of this option allows our team to go all - out in checking for consistency when debugging without fear of its - effect on end users. If you are on the verge of sending in a bug - report, say Y and you might get a useful error message. Almost - everyone should say N. - -config REISERFS_PROC_INFO - bool "Stats in /proc/fs/reiserfs" - depends on REISERFS_FS && PROC_FS - help - Create under /proc/fs/reiserfs a hierarchy of files, displaying - various ReiserFS statistics and internal data at the expense of - making your kernel or module slightly larger (+8 KB). This also - increases the amount of kernel memory required for each mount. - Almost everyone but ReiserFS developers and people fine-tuning - reiserfs or tracing problems should say N. - -config REISERFS_FS_XATTR - bool "ReiserFS extended attributes" - depends on REISERFS_FS - help - Extended attributes are name:value pairs associated with inodes by - the kernel or by users (see the attr(5) manual page for details). - - If unsure, say N. - -config REISERFS_FS_POSIX_ACL - bool "ReiserFS POSIX Access Control Lists" - depends on REISERFS_FS_XATTR - select FS_POSIX_ACL - help - Posix Access Control Lists (ACLs) support permissions for users and - groups beyond the owner/group/world scheme. - - If you don't know what Access Control Lists are, say N - -config REISERFS_FS_SECURITY - bool "ReiserFS Security Labels" - depends on REISERFS_FS_XATTR - help - Security labels support alternative access control models - implemented by security modules like SELinux. This option - enables an extended attribute handler for file security - labels in the ReiserFS filesystem. - - If you are not using a security module that requires using - extended attributes for file security labels, say N. diff --git a/fs/reiserfs/Makefile b/fs/reiserfs/Makefile deleted file mode 100644 index bd29c58ccbd8..000000000000 --- a/fs/reiserfs/Makefile +++ /dev/null @@ -1,30 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# Makefile for the linux reiser-filesystem routines. -# - -obj-$(CONFIG_REISERFS_FS) += reiserfs.o - -reiserfs-objs := bitmap.o do_balan.o namei.o inode.o file.o dir.o fix_node.o \ - super.o prints.o objectid.o lbalance.o ibalance.o stree.o \ - hashes.o tail_conversion.o journal.o resize.o \ - item_ops.o ioctl.o xattr.o lock.o - -ifeq ($(CONFIG_REISERFS_PROC_INFO),y) -reiserfs-objs += procfs.o -endif - -ifeq ($(CONFIG_REISERFS_FS_XATTR),y) -reiserfs-objs += xattr_user.o xattr_trusted.o -endif - -ifeq ($(CONFIG_REISERFS_FS_SECURITY),y) -reiserfs-objs += xattr_security.o -endif - -ifeq ($(CONFIG_REISERFS_FS_POSIX_ACL),y) -reiserfs-objs += xattr_acl.o -endif - -TAGS: - etags *.c diff --git a/fs/reiserfs/README b/fs/reiserfs/README deleted file mode 100644 index 11e9ecf24b63..000000000000 --- a/fs/reiserfs/README +++ /dev/null @@ -1,151 +0,0 @@ -[LICENSING] - -ReiserFS is hereby licensed under the GNU General -Public License version 2. - -Source code files that contain the phrase "licensing governed by -reiserfs/README" are "governed files" throughout this file. Governed -files are licensed under the GPL. The portions of them owned by Hans -Reiser, or authorized to be licensed by him, have been in the past, -and likely will be in the future, licensed to other parties under -other licenses. If you add your code to governed files, and don't -want it to be owned by Hans Reiser, put your copyright label on that -code so the poor blight and his customers can keep things straight. -All portions of governed files not labeled otherwise are owned by Hans -Reiser, and by adding your code to it, widely distributing it to -others or sending us a patch, and leaving the sentence in stating that -licensing is governed by the statement in this file, you accept this. -It will be a kindness if you identify whether Hans Reiser is allowed -to license code labeled as owned by you on your behalf other than -under the GPL, because he wants to know if it is okay to do so and put -a check in the mail to you (for non-trivial improvements) when he -makes his next sale. He makes no guarantees as to the amount if any, -though he feels motivated to motivate contributors, and you can surely -discuss this with him before or after contributing. You have the -right to decline to allow him to license your code contribution other -than under the GPL. - -Further licensing options are available for commercial and/or other -interests directly from Hans Reiser: hans@reiser.to. If you interpret -the GPL as not allowing those additional licensing options, you read -it wrongly, and Richard Stallman agrees with me, when carefully read -you can see that those restrictions on additional terms do not apply -to the owner of the copyright, and my interpretation of this shall -govern for this license. - -Finally, nothing in this license shall be interpreted to allow you to -fail to fairly credit me, or to remove my credits, without my -permission, unless you are an end user not redistributing to others. -If you have doubts about how to properly do that, or about what is -fair, ask. (Last I spoke with him Richard was contemplating how best -to address the fair crediting issue in the next GPL version.) - -[END LICENSING] - -Reiserfs is a file system based on balanced tree algorithms, which is -described at https://reiser4.wiki.kernel.org/index.php/Main_Page - -Stop reading here. Go there, then return. - -Send bug reports to yura@namesys.botik.ru. - -mkreiserfs and other utilities are in reiserfs/utils, or wherever your -Linux provider put them. There is some disagreement about how useful -it is for users to get their fsck and mkreiserfs out of sync with the -version of reiserfs that is in their kernel, with many important -distributors wanting them out of sync.:-) Please try to remember to -recompile and reinstall fsck and mkreiserfs with every update of -reiserfs, this is a common source of confusion. Note that some of the -utilities cannot be compiled without accessing the balancing code -which is in the kernel code, and relocating the utilities may require -you to specify where that code can be found. - -Yes, if you update your reiserfs kernel module you do have to -recompile your kernel, most of the time. The errors you get will be -quite cryptic if your forget to do so. - -Real users, as opposed to folks who want to hack and then understand -what went wrong, will want REISERFS_CHECK off. - -Hideous Commercial Pitch: Spread your development costs across other OS -vendors. Select from the best in the world, not the best in your -building, by buying from third party OS component suppliers. Leverage -the software component development power of the internet. Be the most -aggressive in taking advantage of the commercial possibilities of -decentralized internet development, and add value through your branded -integration that you sell as an operating system. Let your competitors -be the ones to compete against the entire internet by themselves. Be -hip, get with the new economic trend, before your competitors do. Send -email to hans@reiser.to. - -To understand the code, after reading the website, start reading the -code by reading reiserfs_fs.h first. - -Hans Reiser was the project initiator, primary architect, source of all -funding for the first 5.5 years, and one of the programmers. He owns -the copyright. - -Vladimir Saveljev was one of the programmers, and he worked long hours -writing the cleanest code. He always made the effort to be the best he -could be, and to make his code the best that it could be. What resulted -was quite remarkable. I don't think that money can ever motivate someone -to work the way he did, he is one of the most selfless men I know. - -Yura helps with benchmarking, coding hashes, and block pre-allocation -code. - -Anatoly Pinchuk is a former member of our team who worked closely with -Vladimir throughout the project's development. He wrote a quite -substantial portion of the total code. He realized that there was a -space problem with packing tails of files for files larger than a node -that start on a node aligned boundary (there are reasons to want to node -align files), and he invented and implemented indirect items and -unformatted nodes as the solution. - -Konstantin Shvachko was taking part in the early days. - -Mikhail Gilula was a brilliant innovator that has shown much generosity. - -Grigory Zaigralin was an extremely effective system administrator for -our group. - -Igor Krasheninnikov was wonderful at hardware procurement, repair, and -network installation. - -Jeremy Fitzhardinge wrote the teahash.c code, and he gives credit to a -textbook he got the algorithm from in the code. Note that his analysis -of how we could use the hashing code in making 32 bit NFS cookies work -was probably more important than the actual algorithm. Colin Plumb also -contributed to it. - -Chris Mason dived right into our code, and in just a few months produced -the journaling code that dramatically increased the value of ReiserFS. -He is just an amazing programmer. - -Igor Zagorovsky is writing much of the new item handler and extent code -for our next major release. - -Alexander Zarochentcev (sometimes known as zam, or sasha), wrote the -resizer, and is hard at work on implementing allocate on flush. SGI -implemented allocate on flush before us for XFS, and generously took -the time to convince me we should do it also. They are great people, -and a great company. - -Yuri Shevchuk and Nikita Danilov are doing squid cache optimization. - -Vitaly Fertman is doing fsck. - -Jeff Mahoney, of SuSE, contributed a few cleanup fixes, most notably -the endian safe patches which allow ReiserFS to run on any platform -supported by the Linux kernel. - -SuSE, IntegratedLinux.com, Ecila, MP3.com, bigstorage.com, and the -Alpha PC Company made it possible for me to not have a day job -anymore, and to dramatically increase our staffing. Ecila funded -hypertext feature development, MP3.com funded journaling, SuSE funded -core development, IntegratedLinux.com funded squid web cache -appliances, bigstorage.com funded HSM, and the alpha PC company funded -the alpha port. Many of these tasks were helped by sponsors other -than the ones just named. SuSE has helped in much more than just -funding.... - diff --git a/fs/reiserfs/acl.h b/fs/reiserfs/acl.h deleted file mode 100644 index 2571b1a8be84..000000000000 --- a/fs/reiserfs/acl.h +++ /dev/null @@ -1,78 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include -#include - -#define REISERFS_ACL_VERSION 0x0001 - -typedef struct { - __le16 e_tag; - __le16 e_perm; - __le32 e_id; -} reiserfs_acl_entry; - -typedef struct { - __le16 e_tag; - __le16 e_perm; -} reiserfs_acl_entry_short; - -typedef struct { - __le32 a_version; -} reiserfs_acl_header; - -static inline size_t reiserfs_acl_size(int count) -{ - if (count <= 4) { - return sizeof(reiserfs_acl_header) + - count * sizeof(reiserfs_acl_entry_short); - } else { - return sizeof(reiserfs_acl_header) + - 4 * sizeof(reiserfs_acl_entry_short) + - (count - 4) * sizeof(reiserfs_acl_entry); - } -} - -static inline int reiserfs_acl_count(size_t size) -{ - ssize_t s; - size -= sizeof(reiserfs_acl_header); - s = size - 4 * sizeof(reiserfs_acl_entry_short); - if (s < 0) { - if (size % sizeof(reiserfs_acl_entry_short)) - return -1; - return size / sizeof(reiserfs_acl_entry_short); - } else { - if (s % sizeof(reiserfs_acl_entry)) - return -1; - return s / sizeof(reiserfs_acl_entry) + 4; - } -} - -#ifdef CONFIG_REISERFS_FS_POSIX_ACL -struct posix_acl *reiserfs_get_acl(struct inode *inode, int type, bool rcu); -int reiserfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, - struct posix_acl *acl, int type); -int reiserfs_acl_chmod(struct dentry *dentry); -int reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, - struct inode *dir, struct dentry *dentry, - struct inode *inode); -int reiserfs_cache_default_acl(struct inode *dir); - -#else - -#define reiserfs_cache_default_acl(inode) 0 -#define reiserfs_get_acl NULL -#define reiserfs_set_acl NULL - -static inline int reiserfs_acl_chmod(struct dentry *dentry) -{ - return 0; -} - -static inline int -reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, - const struct inode *dir, struct dentry *dentry, - struct inode *inode) -{ - return 0; -} -#endif diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c deleted file mode 100644 index bf708ac287b4..000000000000 --- a/fs/reiserfs/bitmap.c +++ /dev/null @@ -1,1476 +0,0 @@ -/* - * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README - */ -/* Reiserfs block (de)allocator, bitmap-based. */ - -#include -#include "reiserfs.h" -#include -#include -#include -#include -#include -#include -#include - -#define PREALLOCATION_SIZE 9 - -/* different reiserfs block allocator options */ - -#define SB_ALLOC_OPTS(s) (REISERFS_SB(s)->s_alloc_options.bits) - -#define _ALLOC_concentrating_formatted_nodes 0 -#define _ALLOC_displacing_large_files 1 -#define _ALLOC_displacing_new_packing_localities 2 -#define _ALLOC_old_hashed_relocation 3 -#define _ALLOC_new_hashed_relocation 4 -#define _ALLOC_skip_busy 5 -#define _ALLOC_displace_based_on_dirid 6 -#define _ALLOC_hashed_formatted_nodes 7 -#define _ALLOC_old_way 8 -#define _ALLOC_hundredth_slices 9 -#define _ALLOC_dirid_groups 10 -#define _ALLOC_oid_groups 11 -#define _ALLOC_packing_groups 12 - -#define concentrating_formatted_nodes(s) test_bit(_ALLOC_concentrating_formatted_nodes, &SB_ALLOC_OPTS(s)) -#define displacing_large_files(s) test_bit(_ALLOC_displacing_large_files, &SB_ALLOC_OPTS(s)) -#define displacing_new_packing_localities(s) test_bit(_ALLOC_displacing_new_packing_localities, &SB_ALLOC_OPTS(s)) - -#define SET_OPTION(optname) \ - do { \ - reiserfs_info(s, "block allocator option \"%s\" is set", #optname); \ - set_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)); \ - } while(0) -#define TEST_OPTION(optname, s) \ - test_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)) - -static inline void get_bit_address(struct super_block *s, - b_blocknr_t block, - unsigned int *bmap_nr, - unsigned int *offset) -{ - /* - * It is in the bitmap block number equal to the block - * number divided by the number of bits in a block. - */ - *bmap_nr = block >> (s->s_blocksize_bits + 3); - /* Within that bitmap block it is located at bit offset *offset. */ - *offset = block & ((s->s_blocksize << 3) - 1); -} - -int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value) -{ - unsigned int bmap, offset; - unsigned int bmap_count = reiserfs_bmap_count(s); - - if (block == 0 || block >= SB_BLOCK_COUNT(s)) { - reiserfs_error(s, "vs-4010", - "block number is out of range %lu (%u)", - block, SB_BLOCK_COUNT(s)); - return 0; - } - - get_bit_address(s, block, &bmap, &offset); - - /* - * Old format filesystem? Unlikely, but the bitmaps are all - * up front so we need to account for it. - */ - if (unlikely(test_bit(REISERFS_OLD_FORMAT, - &REISERFS_SB(s)->s_properties))) { - b_blocknr_t bmap1 = REISERFS_SB(s)->s_sbh->b_blocknr + 1; - if (block >= bmap1 && - block <= bmap1 + bmap_count) { - reiserfs_error(s, "vs-4019", "bitmap block %lu(%u) " - "can't be freed or reused", - block, bmap_count); - return 0; - } - } else { - if (offset == 0) { - reiserfs_error(s, "vs-4020", "bitmap block %lu(%u) " - "can't be freed or reused", - block, bmap_count); - return 0; - } - } - - if (bmap >= bmap_count) { - reiserfs_error(s, "vs-4030", "bitmap for requested block " - "is out of range: block=%lu, bitmap_nr=%u", - block, bmap); - return 0; - } - - if (bit_value == 0 && block == SB_ROOT_BLOCK(s)) { - reiserfs_error(s, "vs-4050", "this is root block (%u), " - "it must be busy", SB_ROOT_BLOCK(s)); - return 0; - } - - return 1; -} - -/* - * Searches in journal structures for a given block number (bmap, off). - * If block is found in reiserfs journal it suggests next free block - * candidate to test. - */ -static inline int is_block_in_journal(struct super_block *s, unsigned int bmap, - int off, int *next) -{ - b_blocknr_t tmp; - - if (reiserfs_in_journal(s, bmap, off, 1, &tmp)) { - if (tmp) { /* hint supplied */ - *next = tmp; - PROC_INFO_INC(s, scan_bitmap.in_journal_hint); - } else { - (*next) = off + 1; /* inc offset to avoid looping. */ - PROC_INFO_INC(s, scan_bitmap.in_journal_nohint); - } - PROC_INFO_INC(s, scan_bitmap.retry); - return 1; - } - return 0; -} - -/* - * Searches for a window of zero bits with given minimum and maximum - * lengths in one bitmap block - */ -static int scan_bitmap_block(struct reiserfs_transaction_handle *th, - unsigned int bmap_n, int *beg, int boundary, - int min, int max, int unfm) -{ - struct super_block *s = th->t_super; - struct reiserfs_bitmap_info *bi = &SB_AP_BITMAP(s)[bmap_n]; - struct buffer_head *bh; - int end, next; - int org = *beg; - - BUG_ON(!th->t_trans_id); - RFALSE(bmap_n >= reiserfs_bmap_count(s), "Bitmap %u is out of " - "range (0..%u)", bmap_n, reiserfs_bmap_count(s) - 1); - PROC_INFO_INC(s, scan_bitmap.bmap); - - if (!bi) { - reiserfs_error(s, "jdm-4055", "NULL bitmap info pointer " - "for bitmap %d", bmap_n); - return 0; - } - - bh = reiserfs_read_bitmap_block(s, bmap_n); - if (bh == NULL) - return 0; - - while (1) { -cont: - if (bi->free_count < min) { - brelse(bh); - return 0; /* No free blocks in this bitmap */ - } - - /* search for a first zero bit -- beginning of a window */ - *beg = reiserfs_find_next_zero_le_bit - ((unsigned long *)(bh->b_data), boundary, *beg); - - /* - * search for a zero bit fails or the rest of bitmap block - * cannot contain a zero window of minimum size - */ - if (*beg + min > boundary) { - brelse(bh); - return 0; - } - - if (unfm && is_block_in_journal(s, bmap_n, *beg, beg)) - continue; - /* first zero bit found; we check next bits */ - for (end = *beg + 1;; end++) { - if (end >= *beg + max || end >= boundary - || reiserfs_test_le_bit(end, bh->b_data)) { - next = end; - break; - } - - /* - * finding the other end of zero bit window requires - * looking into journal structures (in case of - * searching for free blocks for unformatted nodes) - */ - if (unfm && is_block_in_journal(s, bmap_n, end, &next)) - break; - } - - /* - * now (*beg) points to beginning of zero bits window, - * (end) points to one bit after the window end - */ - - /* found window of proper size */ - if (end - *beg >= min) { - int i; - reiserfs_prepare_for_journal(s, bh, 1); - /* - * try to set all blocks used checking are - * they still free - */ - for (i = *beg; i < end; i++) { - /* Don't check in journal again. */ - if (reiserfs_test_and_set_le_bit - (i, bh->b_data)) { - /* - * bit was set by another process while - * we slept in prepare_for_journal() - */ - PROC_INFO_INC(s, scan_bitmap.stolen); - - /* - * we can continue with smaller set - * of allocated blocks, if length of - * this set is more or equal to `min' - */ - if (i >= *beg + min) { - end = i; - break; - } - - /* - * otherwise we clear all bit - * were set ... - */ - while (--i >= *beg) - reiserfs_clear_le_bit - (i, bh->b_data); - reiserfs_restore_prepared_buffer(s, bh); - *beg = org; - - /* - * Search again in current block - * from beginning - */ - goto cont; - } - } - bi->free_count -= (end - *beg); - journal_mark_dirty(th, bh); - brelse(bh); - - /* free block count calculation */ - reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), - 1); - PUT_SB_FREE_BLOCKS(s, SB_FREE_BLOCKS(s) - (end - *beg)); - journal_mark_dirty(th, SB_BUFFER_WITH_SB(s)); - - return end - (*beg); - } else { - *beg = next; - } - } -} - -static int bmap_hash_id(struct super_block *s, u32 id) -{ - char *hash_in = NULL; - unsigned long hash; - unsigned bm; - - if (id <= 2) { - bm = 1; - } else { - hash_in = (char *)(&id); - hash = keyed_hash(hash_in, 4); - bm = hash % reiserfs_bmap_count(s); - if (!bm) - bm = 1; - } - /* this can only be true when SB_BMAP_NR = 1 */ - if (bm >= reiserfs_bmap_count(s)) - bm = 0; - return bm; -} - -/* - * hashes the id and then returns > 0 if the block group for the - * corresponding hash is full - */ -static inline int block_group_used(struct super_block *s, u32 id) -{ - int bm = bmap_hash_id(s, id); - struct reiserfs_bitmap_info *info = &SB_AP_BITMAP(s)[bm]; - - /* - * If we don't have cached information on this bitmap block, we're - * going to have to load it later anyway. Loading it here allows us - * to make a better decision. This favors long-term performance gain - * with a better on-disk layout vs. a short term gain of skipping the - * read and potentially having a bad placement. - */ - if (info->free_count == UINT_MAX) { - struct buffer_head *bh = reiserfs_read_bitmap_block(s, bm); - brelse(bh); - } - - if (info->free_count > ((s->s_blocksize << 3) * 60 / 100)) { - return 0; - } - return 1; -} - -/* - * the packing is returned in disk byte order - */ -__le32 reiserfs_choose_packing(struct inode * dir) -{ - __le32 packing; - if (TEST_OPTION(packing_groups, dir->i_sb)) { - u32 parent_dir = le32_to_cpu(INODE_PKEY(dir)->k_dir_id); - /* - * some versions of reiserfsck expect packing locality 1 to be - * special - */ - if (parent_dir == 1 || block_group_used(dir->i_sb, parent_dir)) - packing = INODE_PKEY(dir)->k_objectid; - else - packing = INODE_PKEY(dir)->k_dir_id; - } else - packing = INODE_PKEY(dir)->k_objectid; - return packing; -} - -/* - * Tries to find contiguous zero bit window (given size) in given region of - * bitmap and place new blocks there. Returns number of allocated blocks. - */ -static int scan_bitmap(struct reiserfs_transaction_handle *th, - b_blocknr_t * start, b_blocknr_t finish, - int min, int max, int unfm, sector_t file_block) -{ - int nr_allocated = 0; - struct super_block *s = th->t_super; - unsigned int bm, off; - unsigned int end_bm, end_off; - unsigned int off_max = s->s_blocksize << 3; - - BUG_ON(!th->t_trans_id); - PROC_INFO_INC(s, scan_bitmap.call); - - /* No point in looking for more free blocks */ - if (SB_FREE_BLOCKS(s) <= 0) - return 0; - - get_bit_address(s, *start, &bm, &off); - get_bit_address(s, finish, &end_bm, &end_off); - if (bm > reiserfs_bmap_count(s)) - return 0; - if (end_bm > reiserfs_bmap_count(s)) - end_bm = reiserfs_bmap_count(s); - - /* - * When the bitmap is more than 10% free, anyone can allocate. - * When it's less than 10% free, only files that already use the - * bitmap are allowed. Once we pass 80% full, this restriction - * is lifted. - * - * We do this so that files that grow later still have space close to - * their original allocation. This improves locality, and presumably - * performance as a result. - * - * This is only an allocation policy and does not make up for getting a - * bad hint. Decent hinting must be implemented for this to work well. - */ - if (TEST_OPTION(skip_busy, s) - && SB_FREE_BLOCKS(s) > SB_BLOCK_COUNT(s) / 20) { - for (; bm < end_bm; bm++, off = 0) { - if ((off && (!unfm || (file_block != 0))) - || SB_AP_BITMAP(s)[bm].free_count > - (s->s_blocksize << 3) / 10) - nr_allocated = - scan_bitmap_block(th, bm, &off, off_max, - min, max, unfm); - if (nr_allocated) - goto ret; - } - /* we know from above that start is a reasonable number */ - get_bit_address(s, *start, &bm, &off); - } - - for (; bm < end_bm; bm++, off = 0) { - nr_allocated = - scan_bitmap_block(th, bm, &off, off_max, min, max, unfm); - if (nr_allocated) - goto ret; - } - - nr_allocated = - scan_bitmap_block(th, bm, &off, end_off + 1, min, max, unfm); - -ret: - *start = bm * off_max + off; - return nr_allocated; - -} - -static void _reiserfs_free_block(struct reiserfs_transaction_handle *th, - struct inode *inode, b_blocknr_t block, - int for_unformatted) -{ - struct super_block *s = th->t_super; - struct reiserfs_super_block *rs; - struct buffer_head *sbh, *bmbh; - struct reiserfs_bitmap_info *apbi; - unsigned int nr, offset; - - BUG_ON(!th->t_trans_id); - PROC_INFO_INC(s, free_block); - rs = SB_DISK_SUPER_BLOCK(s); - sbh = SB_BUFFER_WITH_SB(s); - apbi = SB_AP_BITMAP(s); - - get_bit_address(s, block, &nr, &offset); - - if (nr >= reiserfs_bmap_count(s)) { - reiserfs_error(s, "vs-4075", "block %lu is out of range", - block); - return; - } - - bmbh = reiserfs_read_bitmap_block(s, nr); - if (!bmbh) - return; - - reiserfs_prepare_for_journal(s, bmbh, 1); - - /* clear bit for the given block in bit map */ - if (!reiserfs_test_and_clear_le_bit(offset, bmbh->b_data)) { - reiserfs_error(s, "vs-4080", - "block %lu: bit already cleared", block); - } - apbi[nr].free_count++; - journal_mark_dirty(th, bmbh); - brelse(bmbh); - - reiserfs_prepare_for_journal(s, sbh, 1); - /* update super block */ - set_sb_free_blocks(rs, sb_free_blocks(rs) + 1); - - journal_mark_dirty(th, sbh); - if (for_unformatted) { - int depth = reiserfs_write_unlock_nested(s); - dquot_free_block_nodirty(inode, 1); - reiserfs_write_lock_nested(s, depth); - } -} - -void reiserfs_free_block(struct reiserfs_transaction_handle *th, - struct inode *inode, b_blocknr_t block, - int for_unformatted) -{ - struct super_block *s = th->t_super; - - BUG_ON(!th->t_trans_id); - RFALSE(!s, "vs-4061: trying to free block on nonexistent device"); - if (!is_reusable(s, block, 1)) - return; - - if (block > sb_block_count(REISERFS_SB(s)->s_rs)) { - reiserfs_error(th->t_super, "bitmap-4072", - "Trying to free block outside file system " - "boundaries (%lu > %lu)", - block, sb_block_count(REISERFS_SB(s)->s_rs)); - return; - } - /* mark it before we clear it, just in case */ - journal_mark_freed(th, s, block); - _reiserfs_free_block(th, inode, block, for_unformatted); -} - -/* preallocated blocks don't need to be run through journal_mark_freed */ -static void reiserfs_free_prealloc_block(struct reiserfs_transaction_handle *th, - struct inode *inode, b_blocknr_t block) -{ - BUG_ON(!th->t_trans_id); - RFALSE(!th->t_super, - "vs-4060: trying to free block on nonexistent device"); - if (!is_reusable(th->t_super, block, 1)) - return; - _reiserfs_free_block(th, inode, block, 1); -} - -static void __discard_prealloc(struct reiserfs_transaction_handle *th, - struct reiserfs_inode_info *ei) -{ - unsigned long save = ei->i_prealloc_block; - int dirty = 0; - struct inode *inode = &ei->vfs_inode; - - BUG_ON(!th->t_trans_id); -#ifdef CONFIG_REISERFS_CHECK - if (ei->i_prealloc_count < 0) - reiserfs_error(th->t_super, "zam-4001", - "inode has negative prealloc blocks count."); -#endif - while (ei->i_prealloc_count > 0) { - b_blocknr_t block_to_free; - - /* - * reiserfs_free_prealloc_block can drop the write lock, - * which could allow another caller to free the same block. - * We can protect against it by modifying the prealloc - * state before calling it. - */ - block_to_free = ei->i_prealloc_block++; - ei->i_prealloc_count--; - reiserfs_free_prealloc_block(th, inode, block_to_free); - dirty = 1; - } - if (dirty) - reiserfs_update_sd(th, inode); - ei->i_prealloc_block = save; - list_del_init(&ei->i_prealloc_list); -} - -/* FIXME: It should be inline function */ -void reiserfs_discard_prealloc(struct reiserfs_transaction_handle *th, - struct inode *inode) -{ - struct reiserfs_inode_info *ei = REISERFS_I(inode); - - BUG_ON(!th->t_trans_id); - if (ei->i_prealloc_count) - __discard_prealloc(th, ei); -} - -void reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle *th) -{ - struct list_head *plist = &SB_JOURNAL(th->t_super)->j_prealloc_list; - - BUG_ON(!th->t_trans_id); - while (!list_empty(plist)) { - struct reiserfs_inode_info *ei; - ei = list_entry(plist->next, struct reiserfs_inode_info, - i_prealloc_list); -#ifdef CONFIG_REISERFS_CHECK - if (!ei->i_prealloc_count) { - reiserfs_error(th->t_super, "zam-4001", - "inode is in prealloc list but has " - "no preallocated blocks."); - } -#endif - __discard_prealloc(th, ei); - } -} - -void reiserfs_init_alloc_options(struct super_block *s) -{ - set_bit(_ALLOC_skip_busy, &SB_ALLOC_OPTS(s)); - set_bit(_ALLOC_dirid_groups, &SB_ALLOC_OPTS(s)); - set_bit(_ALLOC_packing_groups, &SB_ALLOC_OPTS(s)); -} - -/* block allocator related options are parsed here */ -int reiserfs_parse_alloc_options(struct super_block *s, char *options) -{ - char *this_char, *value; - - /* clear default settings */ - REISERFS_SB(s)->s_alloc_options.bits = 0; - - while ((this_char = strsep(&options, ":")) != NULL) { - if ((value = strchr(this_char, '=')) != NULL) - *value++ = 0; - - if (!strcmp(this_char, "concentrating_formatted_nodes")) { - int temp; - SET_OPTION(concentrating_formatted_nodes); - temp = (value - && *value) ? simple_strtoul(value, &value, - 0) : 10; - if (temp <= 0 || temp > 100) { - REISERFS_SB(s)->s_alloc_options.border = 10; - } else { - REISERFS_SB(s)->s_alloc_options.border = - 100 / temp; - } - continue; - } - if (!strcmp(this_char, "displacing_large_files")) { - SET_OPTION(displacing_large_files); - REISERFS_SB(s)->s_alloc_options.large_file_size = - (value - && *value) ? simple_strtoul(value, &value, 0) : 16; - continue; - } - if (!strcmp(this_char, "displacing_new_packing_localities")) { - SET_OPTION(displacing_new_packing_localities); - continue; - } - - if (!strcmp(this_char, "old_hashed_relocation")) { - SET_OPTION(old_hashed_relocation); - continue; - } - - if (!strcmp(this_char, "new_hashed_relocation")) { - SET_OPTION(new_hashed_relocation); - continue; - } - - if (!strcmp(this_char, "dirid_groups")) { - SET_OPTION(dirid_groups); - continue; - } - if (!strcmp(this_char, "oid_groups")) { - SET_OPTION(oid_groups); - continue; - } - if (!strcmp(this_char, "packing_groups")) { - SET_OPTION(packing_groups); - continue; - } - if (!strcmp(this_char, "hashed_formatted_nodes")) { - SET_OPTION(hashed_formatted_nodes); - continue; - } - - if (!strcmp(this_char, "skip_busy")) { - SET_OPTION(skip_busy); - continue; - } - - if (!strcmp(this_char, "hundredth_slices")) { - SET_OPTION(hundredth_slices); - continue; - } - - if (!strcmp(this_char, "old_way")) { - SET_OPTION(old_way); - continue; - } - - if (!strcmp(this_char, "displace_based_on_dirid")) { - SET_OPTION(displace_based_on_dirid); - continue; - } - - if (!strcmp(this_char, "preallocmin")) { - REISERFS_SB(s)->s_alloc_options.preallocmin = - (value - && *value) ? simple_strtoul(value, &value, 0) : 4; - continue; - } - - if (!strcmp(this_char, "preallocsize")) { - REISERFS_SB(s)->s_alloc_options.preallocsize = - (value - && *value) ? simple_strtoul(value, &value, - 0) : - PREALLOCATION_SIZE; - continue; - } - - reiserfs_warning(s, "zam-4001", "unknown option - %s", - this_char); - return 1; - } - - reiserfs_info(s, "allocator options = [%08x]\n", SB_ALLOC_OPTS(s)); - return 0; -} - -static void print_sep(struct seq_file *seq, int *first) -{ - if (!*first) - seq_puts(seq, ":"); - else - *first = 0; -} - -void show_alloc_options(struct seq_file *seq, struct super_block *s) -{ - int first = 1; - - if (SB_ALLOC_OPTS(s) == ((1 << _ALLOC_skip_busy) | - (1 << _ALLOC_dirid_groups) | (1 << _ALLOC_packing_groups))) - return; - - seq_puts(seq, ",alloc="); - - if (TEST_OPTION(concentrating_formatted_nodes, s)) { - print_sep(seq, &first); - if (REISERFS_SB(s)->s_alloc_options.border != 10) { - seq_printf(seq, "concentrating_formatted_nodes=%d", - 100 / REISERFS_SB(s)->s_alloc_options.border); - } else - seq_puts(seq, "concentrating_formatted_nodes"); - } - if (TEST_OPTION(displacing_large_files, s)) { - print_sep(seq, &first); - if (REISERFS_SB(s)->s_alloc_options.large_file_size != 16) { - seq_printf(seq, "displacing_large_files=%lu", - REISERFS_SB(s)->s_alloc_options.large_file_size); - } else - seq_puts(seq, "displacing_large_files"); - } - if (TEST_OPTION(displacing_new_packing_localities, s)) { - print_sep(seq, &first); - seq_puts(seq, "displacing_new_packing_localities"); - } - if (TEST_OPTION(old_hashed_relocation, s)) { - print_sep(seq, &first); - seq_puts(seq, "old_hashed_relocation"); - } - if (TEST_OPTION(new_hashed_relocation, s)) { - print_sep(seq, &first); - seq_puts(seq, "new_hashed_relocation"); - } - if (TEST_OPTION(dirid_groups, s)) { - print_sep(seq, &first); - seq_puts(seq, "dirid_groups"); - } - if (TEST_OPTION(oid_groups, s)) { - print_sep(seq, &first); - seq_puts(seq, "oid_groups"); - } - if (TEST_OPTION(packing_groups, s)) { - print_sep(seq, &first); - seq_puts(seq, "packing_groups"); - } - if (TEST_OPTION(hashed_formatted_nodes, s)) { - print_sep(seq, &first); - seq_puts(seq, "hashed_formatted_nodes"); - } - if (TEST_OPTION(skip_busy, s)) { - print_sep(seq, &first); - seq_puts(seq, "skip_busy"); - } - if (TEST_OPTION(hundredth_slices, s)) { - print_sep(seq, &first); - seq_puts(seq, "hundredth_slices"); - } - if (TEST_OPTION(old_way, s)) { - print_sep(seq, &first); - seq_puts(seq, "old_way"); - } - if (TEST_OPTION(displace_based_on_dirid, s)) { - print_sep(seq, &first); - seq_puts(seq, "displace_based_on_dirid"); - } - if (REISERFS_SB(s)->s_alloc_options.preallocmin != 0) { - print_sep(seq, &first); - seq_printf(seq, "preallocmin=%d", - REISERFS_SB(s)->s_alloc_options.preallocmin); - } - if (REISERFS_SB(s)->s_alloc_options.preallocsize != 17) { - print_sep(seq, &first); - seq_printf(seq, "preallocsize=%d", - REISERFS_SB(s)->s_alloc_options.preallocsize); - } -} - -static inline void new_hashed_relocation(reiserfs_blocknr_hint_t * hint) -{ - char *hash_in; - - if (hint->formatted_node) { - hash_in = (char *)&hint->key.k_dir_id; - } else { - if (!hint->inode) { - /*hint->search_start = hint->beg;*/ - hash_in = (char *)&hint->key.k_dir_id; - } else - if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super)) - hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id); - else - hash_in = - (char *)(&INODE_PKEY(hint->inode)->k_objectid); - } - - hint->search_start = - hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg); -} - -/* - * Relocation based on dirid, hashing them into a given bitmap block - * files. Formatted nodes are unaffected, a separate policy covers them - */ -static void dirid_groups(reiserfs_blocknr_hint_t * hint) -{ - unsigned long hash; - __u32 dirid = 0; - int bm = 0; - struct super_block *sb = hint->th->t_super; - - if (hint->inode) - dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id); - else if (hint->formatted_node) - dirid = hint->key.k_dir_id; - - if (dirid) { - bm = bmap_hash_id(sb, dirid); - hash = bm * (sb->s_blocksize << 3); - /* give a portion of the block group to metadata */ - if (hint->inode) - hash += sb->s_blocksize / 2; - hint->search_start = hash; - } -} - -/* - * Relocation based on oid, hashing them into a given bitmap block - * files. Formatted nodes are unaffected, a separate policy covers them - */ -static void oid_groups(reiserfs_blocknr_hint_t * hint) -{ - if (hint->inode) { - unsigned long hash; - __u32 oid; - __u32 dirid; - int bm; - - dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id); - - /* - * keep the root dir and it's first set of subdirs close to - * the start of the disk - */ - if (dirid <= 2) - hash = (hint->inode->i_sb->s_blocksize << 3); - else { - oid = le32_to_cpu(INODE_PKEY(hint->inode)->k_objectid); - bm = bmap_hash_id(hint->inode->i_sb, oid); - hash = bm * (hint->inode->i_sb->s_blocksize << 3); - } - hint->search_start = hash; - } -} - -/* - * returns 1 if it finds an indirect item and gets valid hint info - * from it, otherwise 0 - */ -static int get_left_neighbor(reiserfs_blocknr_hint_t * hint) -{ - struct treepath *path; - struct buffer_head *bh; - struct item_head *ih; - int pos_in_item; - __le32 *item; - int ret = 0; - - /* - * reiserfs code can call this function w/o pointer to path - * structure supplied; then we rely on supplied search_start - */ - if (!hint->path) - return 0; - - path = hint->path; - bh = get_last_bh(path); - RFALSE(!bh, "green-4002: Illegal path specified to get_left_neighbor"); - ih = tp_item_head(path); - pos_in_item = path->pos_in_item; - item = tp_item_body(path); - - hint->search_start = bh->b_blocknr; - - /* - * for indirect item: go to left and look for the first non-hole entry - * in the indirect item - */ - if (!hint->formatted_node && is_indirect_le_ih(ih)) { - if (pos_in_item == I_UNFM_NUM(ih)) - pos_in_item--; - while (pos_in_item >= 0) { - int t = get_block_num(item, pos_in_item); - if (t) { - hint->search_start = t; - ret = 1; - break; - } - pos_in_item--; - } - } - - /* does result value fit into specified region? */ - return ret; -} - -/* - * should be, if formatted node, then try to put on first part of the device - * specified as number of percent with mount option device, else try to put - * on last of device. This is not to say it is good code to do so, - * but the effect should be measured. - */ -static inline void set_border_in_hint(struct super_block *s, - reiserfs_blocknr_hint_t * hint) -{ - b_blocknr_t border = - SB_BLOCK_COUNT(s) / REISERFS_SB(s)->s_alloc_options.border; - - if (hint->formatted_node) - hint->end = border - 1; - else - hint->beg = border; -} - -static inline void displace_large_file(reiserfs_blocknr_hint_t * hint) -{ - if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super)) - hint->search_start = - hint->beg + - keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_dir_id), - 4) % (hint->end - hint->beg); - else - hint->search_start = - hint->beg + - keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_objectid), - 4) % (hint->end - hint->beg); -} - -static inline void hash_formatted_node(reiserfs_blocknr_hint_t * hint) -{ - char *hash_in; - - if (!hint->inode) - hash_in = (char *)&hint->key.k_dir_id; - else if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super)) - hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id); - else - hash_in = (char *)(&INODE_PKEY(hint->inode)->k_objectid); - - hint->search_start = - hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg); -} - -static inline int -this_blocknr_allocation_would_make_it_a_large_file(reiserfs_blocknr_hint_t * - hint) -{ - return hint->block == - REISERFS_SB(hint->th->t_super)->s_alloc_options.large_file_size; -} - -#ifdef DISPLACE_NEW_PACKING_LOCALITIES -static inline void displace_new_packing_locality(reiserfs_blocknr_hint_t * hint) -{ - struct in_core_key *key = &hint->key; - - hint->th->displace_new_blocks = 0; - hint->search_start = - hint->beg + keyed_hash((char *)(&key->k_objectid), - 4) % (hint->end - hint->beg); -} -#endif - -static inline int old_hashed_relocation(reiserfs_blocknr_hint_t * hint) -{ - b_blocknr_t border; - u32 hash_in; - - if (hint->formatted_node || hint->inode == NULL) { - return 0; - } - - hash_in = le32_to_cpu((INODE_PKEY(hint->inode))->k_dir_id); - border = - hint->beg + (u32) keyed_hash(((char *)(&hash_in)), - 4) % (hint->end - hint->beg - 1); - if (border > hint->search_start) - hint->search_start = border; - - return 1; -} - -static inline int old_way(reiserfs_blocknr_hint_t * hint) -{ - b_blocknr_t border; - - if (hint->formatted_node || hint->inode == NULL) { - return 0; - } - - border = - hint->beg + - le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id) % (hint->end - - hint->beg); - if (border > hint->search_start) - hint->search_start = border; - - return 1; -} - -static inline void hundredth_slices(reiserfs_blocknr_hint_t * hint) -{ - struct in_core_key *key = &hint->key; - b_blocknr_t slice_start; - - slice_start = - (keyed_hash((char *)(&key->k_dir_id), 4) % 100) * (hint->end / 100); - if (slice_start > hint->search_start - || slice_start + (hint->end / 100) <= hint->search_start) { - hint->search_start = slice_start; - } -} - -static void determine_search_start(reiserfs_blocknr_hint_t * hint, - int amount_needed) -{ - struct super_block *s = hint->th->t_super; - int unfm_hint; - - hint->beg = 0; - hint->end = SB_BLOCK_COUNT(s) - 1; - - /* This is former border algorithm. Now with tunable border offset */ - if (concentrating_formatted_nodes(s)) - set_border_in_hint(s, hint); - -#ifdef DISPLACE_NEW_PACKING_LOCALITIES - /* - * whenever we create a new directory, we displace it. At first - * we will hash for location, later we might look for a moderately - * empty place for it - */ - if (displacing_new_packing_localities(s) - && hint->th->displace_new_blocks) { - displace_new_packing_locality(hint); - - /* - * we do not continue determine_search_start, - * if new packing locality is being displaced - */ - return; - } -#endif - - /* - * all persons should feel encouraged to add more special cases - * here and test them - */ - - if (displacing_large_files(s) && !hint->formatted_node - && this_blocknr_allocation_would_make_it_a_large_file(hint)) { - displace_large_file(hint); - return; - } - - /* - * if none of our special cases is relevant, use the left - * neighbor in the tree order of the new node we are allocating for - */ - if (hint->formatted_node && TEST_OPTION(hashed_formatted_nodes, s)) { - hash_formatted_node(hint); - return; - } - - unfm_hint = get_left_neighbor(hint); - - /* - * Mimic old block allocator behaviour, that is if VFS allowed for - * preallocation, new blocks are displaced based on directory ID. - * Also, if suggested search_start is less than last preallocated - * block, we start searching from it, assuming that HDD dataflow - * is faster in forward direction - */ - if (TEST_OPTION(old_way, s)) { - if (!hint->formatted_node) { - if (!reiserfs_hashed_relocation(s)) - old_way(hint); - else if (!reiserfs_no_unhashed_relocation(s)) - old_hashed_relocation(hint); - - if (hint->inode - && hint->search_start < - REISERFS_I(hint->inode)->i_prealloc_block) - hint->search_start = - REISERFS_I(hint->inode)->i_prealloc_block; - } - return; - } - - /* This is an approach proposed by Hans */ - if (TEST_OPTION(hundredth_slices, s) - && !(displacing_large_files(s) && !hint->formatted_node)) { - hundredth_slices(hint); - return; - } - - /* old_hashed_relocation only works on unformatted */ - if (!unfm_hint && !hint->formatted_node && - TEST_OPTION(old_hashed_relocation, s)) { - old_hashed_relocation(hint); - } - - /* new_hashed_relocation works with both formatted/unformatted nodes */ - if ((!unfm_hint || hint->formatted_node) && - TEST_OPTION(new_hashed_relocation, s)) { - new_hashed_relocation(hint); - } - - /* dirid grouping works only on unformatted nodes */ - if (!unfm_hint && !hint->formatted_node && TEST_OPTION(dirid_groups, s)) { - dirid_groups(hint); - } -#ifdef DISPLACE_NEW_PACKING_LOCALITIES - if (hint->formatted_node && TEST_OPTION(dirid_groups, s)) { - dirid_groups(hint); - } -#endif - - /* oid grouping works only on unformatted nodes */ - if (!unfm_hint && !hint->formatted_node && TEST_OPTION(oid_groups, s)) { - oid_groups(hint); - } - return; -} - -static int determine_prealloc_size(reiserfs_blocknr_hint_t * hint) -{ - /* make minimum size a mount option and benchmark both ways */ - /* we preallocate blocks only for regular files, specific size */ - /* benchmark preallocating always and see what happens */ - - hint->prealloc_size = 0; - - if (!hint->formatted_node && hint->preallocate) { - if (S_ISREG(hint->inode->i_mode) && !IS_PRIVATE(hint->inode) - && hint->inode->i_size >= - REISERFS_SB(hint->th->t_super)->s_alloc_options. - preallocmin * hint->inode->i_sb->s_blocksize) - hint->prealloc_size = - REISERFS_SB(hint->th->t_super)->s_alloc_options. - preallocsize - 1; - } - return CARRY_ON; -} - -static inline int allocate_without_wrapping_disk(reiserfs_blocknr_hint_t * hint, - b_blocknr_t * new_blocknrs, - b_blocknr_t start, - b_blocknr_t finish, int min, - int amount_needed, - int prealloc_size) -{ - int rest = amount_needed; - int nr_allocated; - - while (rest > 0 && start <= finish) { - nr_allocated = scan_bitmap(hint->th, &start, finish, min, - rest + prealloc_size, - !hint->formatted_node, hint->block); - - if (nr_allocated == 0) /* no new blocks allocated, return */ - break; - - /* fill free_blocknrs array first */ - while (rest > 0 && nr_allocated > 0) { - *new_blocknrs++ = start++; - rest--; - nr_allocated--; - } - - /* do we have something to fill prealloc. array also ? */ - if (nr_allocated > 0) { - /* - * it means prealloc_size was greater that 0 and - * we do preallocation - */ - list_add(&REISERFS_I(hint->inode)->i_prealloc_list, - &SB_JOURNAL(hint->th->t_super)-> - j_prealloc_list); - REISERFS_I(hint->inode)->i_prealloc_block = start; - REISERFS_I(hint->inode)->i_prealloc_count = - nr_allocated; - break; - } - } - - return (amount_needed - rest); -} - -static inline int blocknrs_and_prealloc_arrays_from_search_start - (reiserfs_blocknr_hint_t * hint, b_blocknr_t * new_blocknrs, - int amount_needed) { - struct super_block *s = hint->th->t_super; - b_blocknr_t start = hint->search_start; - b_blocknr_t finish = SB_BLOCK_COUNT(s) - 1; - int passno = 0; - int nr_allocated = 0; - int depth; - - determine_prealloc_size(hint); - if (!hint->formatted_node) { - int quota_ret; -#ifdef REISERQUOTA_DEBUG - reiserfs_debug(s, REISERFS_DEBUG_CODE, - "reiserquota: allocating %d blocks id=%u", - amount_needed, hint->inode->i_uid); -#endif - depth = reiserfs_write_unlock_nested(s); - quota_ret = - dquot_alloc_block_nodirty(hint->inode, amount_needed); - if (quota_ret) { /* Quota exceeded? */ - reiserfs_write_lock_nested(s, depth); - return QUOTA_EXCEEDED; - } - if (hint->preallocate && hint->prealloc_size) { -#ifdef REISERQUOTA_DEBUG - reiserfs_debug(s, REISERFS_DEBUG_CODE, - "reiserquota: allocating (prealloc) %d blocks id=%u", - hint->prealloc_size, hint->inode->i_uid); -#endif - quota_ret = dquot_prealloc_block_nodirty(hint->inode, - hint->prealloc_size); - if (quota_ret) - hint->preallocate = hint->prealloc_size = 0; - } - /* for unformatted nodes, force large allocations */ - reiserfs_write_lock_nested(s, depth); - } - - do { - switch (passno++) { - case 0: /* Search from hint->search_start to end of disk */ - start = hint->search_start; - finish = SB_BLOCK_COUNT(s) - 1; - break; - case 1: /* Search from hint->beg to hint->search_start */ - start = hint->beg; - finish = hint->search_start; - break; - case 2: /* Last chance: Search from 0 to hint->beg */ - start = 0; - finish = hint->beg; - break; - default: - /* We've tried searching everywhere, not enough space */ - /* Free the blocks */ - if (!hint->formatted_node) { -#ifdef REISERQUOTA_DEBUG - reiserfs_debug(s, REISERFS_DEBUG_CODE, - "reiserquota: freeing (nospace) %d blocks id=%u", - amount_needed + - hint->prealloc_size - - nr_allocated, - hint->inode->i_uid); -#endif - /* Free not allocated blocks */ - depth = reiserfs_write_unlock_nested(s); - dquot_free_block_nodirty(hint->inode, - amount_needed + hint->prealloc_size - - nr_allocated); - reiserfs_write_lock_nested(s, depth); - } - while (nr_allocated--) - reiserfs_free_block(hint->th, hint->inode, - new_blocknrs[nr_allocated], - !hint->formatted_node); - - return NO_DISK_SPACE; - } - } while ((nr_allocated += allocate_without_wrapping_disk(hint, - new_blocknrs + - nr_allocated, - start, finish, - 1, - amount_needed - - nr_allocated, - hint-> - prealloc_size)) - < amount_needed); - if (!hint->formatted_node && - amount_needed + hint->prealloc_size > - nr_allocated + REISERFS_I(hint->inode)->i_prealloc_count) { - /* Some of preallocation blocks were not allocated */ -#ifdef REISERQUOTA_DEBUG - reiserfs_debug(s, REISERFS_DEBUG_CODE, - "reiserquota: freeing (failed prealloc) %d blocks id=%u", - amount_needed + hint->prealloc_size - - nr_allocated - - REISERFS_I(hint->inode)->i_prealloc_count, - hint->inode->i_uid); -#endif - - depth = reiserfs_write_unlock_nested(s); - dquot_free_block_nodirty(hint->inode, amount_needed + - hint->prealloc_size - nr_allocated - - REISERFS_I(hint->inode)-> - i_prealloc_count); - reiserfs_write_lock_nested(s, depth); - } - - return CARRY_ON; -} - -/* grab new blocknrs from preallocated list */ -/* return amount still needed after using them */ -static int use_preallocated_list_if_available(reiserfs_blocknr_hint_t * hint, - b_blocknr_t * new_blocknrs, - int amount_needed) -{ - struct inode *inode = hint->inode; - - if (REISERFS_I(inode)->i_prealloc_count > 0) { - while (amount_needed) { - - *new_blocknrs++ = REISERFS_I(inode)->i_prealloc_block++; - REISERFS_I(inode)->i_prealloc_count--; - - amount_needed--; - - if (REISERFS_I(inode)->i_prealloc_count <= 0) { - list_del(&REISERFS_I(inode)->i_prealloc_list); - break; - } - } - } - /* return amount still needed after using preallocated blocks */ - return amount_needed; -} - -int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *hint, - b_blocknr_t *new_blocknrs, - int amount_needed, - /* Amount of blocks we have already reserved */ - int reserved_by_us) -{ - int initial_amount_needed = amount_needed; - int ret; - struct super_block *s = hint->th->t_super; - - /* Check if there is enough space, taking into account reserved space */ - if (SB_FREE_BLOCKS(s) - REISERFS_SB(s)->reserved_blocks < - amount_needed - reserved_by_us) - return NO_DISK_SPACE; - /* should this be if !hint->inode && hint->preallocate? */ - /* do you mean hint->formatted_node can be removed ? - Zam */ - /* - * hint->formatted_node cannot be removed because we try to access - * inode information here, and there is often no inode associated with - * metadata allocations - green - */ - - if (!hint->formatted_node && hint->preallocate) { - amount_needed = use_preallocated_list_if_available - (hint, new_blocknrs, amount_needed); - - /* - * We have all the block numbers we need from the - * prealloc list - */ - if (amount_needed == 0) - return CARRY_ON; - new_blocknrs += (initial_amount_needed - amount_needed); - } - - /* find search start and save it in hint structure */ - determine_search_start(hint, amount_needed); - if (hint->search_start >= SB_BLOCK_COUNT(s)) - hint->search_start = SB_BLOCK_COUNT(s) - 1; - - /* allocation itself; fill new_blocknrs and preallocation arrays */ - ret = blocknrs_and_prealloc_arrays_from_search_start - (hint, new_blocknrs, amount_needed); - - /* - * We used prealloc. list to fill (partially) new_blocknrs array. - * If final allocation fails we need to return blocks back to - * prealloc. list or just free them. -- Zam (I chose second - * variant) - */ - if (ret != CARRY_ON) { - while (amount_needed++ < initial_amount_needed) { - reiserfs_free_block(hint->th, hint->inode, - *(--new_blocknrs), 1); - } - } - return ret; -} - -void reiserfs_cache_bitmap_metadata(struct super_block *sb, - struct buffer_head *bh, - struct reiserfs_bitmap_info *info) -{ - unsigned long *cur = (unsigned long *)(bh->b_data + bh->b_size); - - /* The first bit must ALWAYS be 1 */ - if (!reiserfs_test_le_bit(0, (unsigned long *)bh->b_data)) - reiserfs_error(sb, "reiserfs-2025", "bitmap block %lu is " - "corrupted: first bit must be 1", bh->b_blocknr); - - info->free_count = 0; - - while (--cur >= (unsigned long *)bh->b_data) { - /* 0 and ~0 are special, we can optimize for them */ - if (*cur == 0) - info->free_count += BITS_PER_LONG; - else if (*cur != ~0L) /* A mix, investigate */ - info->free_count += BITS_PER_LONG - hweight_long(*cur); - } -} - -struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb, - unsigned int bitmap) -{ - b_blocknr_t block = (sb->s_blocksize << 3) * bitmap; - struct reiserfs_bitmap_info *info = SB_AP_BITMAP(sb) + bitmap; - struct buffer_head *bh; - - /* - * Way old format filesystems had the bitmaps packed up front. - * I doubt there are any of these left, but just in case... - */ - if (unlikely(test_bit(REISERFS_OLD_FORMAT, - &REISERFS_SB(sb)->s_properties))) - block = REISERFS_SB(sb)->s_sbh->b_blocknr + 1 + bitmap; - else if (bitmap == 0) - block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1; - - bh = sb_bread(sb, block); - if (bh == NULL) - reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) " - "reading failed", __func__, block); - else { - if (buffer_locked(bh)) { - int depth; - PROC_INFO_INC(sb, scan_bitmap.wait); - depth = reiserfs_write_unlock_nested(sb); - __wait_on_buffer(bh); - reiserfs_write_lock_nested(sb, depth); - } - BUG_ON(!buffer_uptodate(bh)); - BUG_ON(atomic_read(&bh->b_count) == 0); - - if (info->free_count == UINT_MAX) - reiserfs_cache_bitmap_metadata(sb, bh, info); - } - - return bh; -} - -int reiserfs_init_bitmap_cache(struct super_block *sb) -{ - struct reiserfs_bitmap_info *bitmap; - unsigned int bmap_nr = reiserfs_bmap_count(sb); - - bitmap = vmalloc(array_size(bmap_nr, sizeof(*bitmap))); - if (bitmap == NULL) - return -ENOMEM; - - memset(bitmap, 0xff, sizeof(*bitmap) * bmap_nr); - - SB_AP_BITMAP(sb) = bitmap; - - return 0; -} - -void reiserfs_free_bitmap_cache(struct super_block *sb) -{ - if (SB_AP_BITMAP(sb)) { - vfree(SB_AP_BITMAP(sb)); - SB_AP_BITMAP(sb) = NULL; - } -} diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c deleted file mode 100644 index 79ee2b436685..000000000000 --- a/fs/reiserfs/dir.c +++ /dev/null @@ -1,346 +0,0 @@ -/* - * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README - */ - -#include -#include -#include -#include "reiserfs.h" -#include -#include -#include -#include - -extern const struct reiserfs_key MIN_KEY; - -static int reiserfs_readdir(struct file *, struct dir_context *); -static int reiserfs_dir_fsync(struct file *filp, loff_t start, loff_t end, - int datasync); - -const struct file_operations reiserfs_dir_operations = { - .llseek = generic_file_llseek, - .read = generic_read_dir, - .iterate_shared = reiserfs_readdir, - .fsync = reiserfs_dir_fsync, - .unlocked_ioctl = reiserfs_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = reiserfs_compat_ioctl, -#endif -}; - -static int reiserfs_dir_fsync(struct file *filp, loff_t start, loff_t end, - int datasync) -{ - struct inode *inode = filp->f_mapping->host; - int err; - - err = file_write_and_wait_range(filp, start, end); - if (err) - return err; - - inode_lock(inode); - reiserfs_write_lock(inode->i_sb); - err = reiserfs_commit_for_inode(inode); - reiserfs_write_unlock(inode->i_sb); - inode_unlock(inode); - if (err < 0) - return err; - return 0; -} - -#define store_ih(where,what) copy_item_head (where, what) - -static inline bool is_privroot_deh(struct inode *dir, struct reiserfs_de_head *deh) -{ - struct dentry *privroot = REISERFS_SB(dir->i_sb)->priv_root; - return (d_really_is_positive(privroot) && - deh->deh_objectid == INODE_PKEY(d_inode(privroot))->k_objectid); -} - -int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx) -{ - - /* key of current position in the directory (key of directory entry) */ - struct cpu_key pos_key; - - INITIALIZE_PATH(path_to_entry); - struct buffer_head *bh; - int item_num, entry_num; - const struct reiserfs_key *rkey; - struct item_head *ih, tmp_ih; - int search_res; - char *local_buf; - loff_t next_pos; - char small_buf[32]; /* avoid kmalloc if we can */ - struct reiserfs_dir_entry de; - int ret = 0; - int depth; - - reiserfs_write_lock(inode->i_sb); - - reiserfs_check_lock_depth(inode->i_sb, "readdir"); - - /* - * form key for search the next directory entry using - * f_pos field of file structure - */ - make_cpu_key(&pos_key, inode, ctx->pos ?: DOT_OFFSET, TYPE_DIRENTRY, 3); - next_pos = cpu_key_k_offset(&pos_key); - - path_to_entry.reada = PATH_READA; - while (1) { -research: - /* - * search the directory item, containing entry with - * specified key - */ - search_res = - search_by_entry_key(inode->i_sb, &pos_key, &path_to_entry, - &de); - if (search_res == IO_ERROR) { - /* - * FIXME: we could just skip part of directory - * which could not be read - */ - ret = -EIO; - goto out; - } - entry_num = de.de_entry_num; - bh = de.de_bh; - item_num = de.de_item_num; - ih = de.de_ih; - store_ih(&tmp_ih, ih); - - /* we must have found item, that is item of this directory, */ - RFALSE(COMP_SHORT_KEYS(&ih->ih_key, &pos_key), - "vs-9000: found item %h does not match to dir we readdir %K", - ih, &pos_key); - RFALSE(item_num > B_NR_ITEMS(bh) - 1, - "vs-9005 item_num == %d, item amount == %d", - item_num, B_NR_ITEMS(bh)); - - /* - * and entry must be not more than number of entries - * in the item - */ - RFALSE(ih_entry_count(ih) < entry_num, - "vs-9010: entry number is too big %d (%d)", - entry_num, ih_entry_count(ih)); - - /* - * go through all entries in the directory item beginning - * from the entry, that has been found - */ - if (search_res == POSITION_FOUND - || entry_num < ih_entry_count(ih)) { - struct reiserfs_de_head *deh = - B_I_DEH(bh, ih) + entry_num; - - for (; entry_num < ih_entry_count(ih); - entry_num++, deh++) { - int d_reclen; - char *d_name; - ino_t d_ino; - loff_t cur_pos = deh_offset(deh); - - /* it is hidden entry */ - if (!de_visible(deh)) - continue; - d_reclen = entry_length(bh, ih, entry_num); - d_name = B_I_DEH_ENTRY_FILE_NAME(bh, ih, deh); - - if (d_reclen <= 0 || - d_name + d_reclen > bh->b_data + bh->b_size) { - /* - * There is corrupted data in entry, - * We'd better stop here - */ - pathrelse(&path_to_entry); - ret = -EIO; - goto out; - } - - if (!d_name[d_reclen - 1]) - d_reclen = strlen(d_name); - - /* too big to send back to VFS */ - if (d_reclen > - REISERFS_MAX_NAME(inode->i_sb-> - s_blocksize)) { - continue; - } - - /* Ignore the .reiserfs_priv entry */ - if (is_privroot_deh(inode, deh)) - continue; - - ctx->pos = deh_offset(deh); - d_ino = deh_objectid(deh); - if (d_reclen <= 32) { - local_buf = small_buf; - } else { - local_buf = kmalloc(d_reclen, - GFP_NOFS); - if (!local_buf) { - pathrelse(&path_to_entry); - ret = -ENOMEM; - goto out; - } - if (item_moved(&tmp_ih, &path_to_entry)) { - kfree(local_buf); - goto research; - } - } - - /* - * Note, that we copy name to user space via - * temporary buffer (local_buf) because - * filldir will block if user space buffer is - * swapped out. At that time entry can move to - * somewhere else - */ - memcpy(local_buf, d_name, d_reclen); - - /* - * Since filldir might sleep, we can release - * the write lock here for other waiters - */ - depth = reiserfs_write_unlock_nested(inode->i_sb); - if (!dir_emit - (ctx, local_buf, d_reclen, d_ino, - DT_UNKNOWN)) { - reiserfs_write_lock_nested(inode->i_sb, depth); - if (local_buf != small_buf) { - kfree(local_buf); - } - goto end; - } - reiserfs_write_lock_nested(inode->i_sb, depth); - if (local_buf != small_buf) { - kfree(local_buf); - } - - /* deh_offset(deh) may be invalid now. */ - next_pos = cur_pos + 1; - - if (item_moved(&tmp_ih, &path_to_entry)) { - set_cpu_key_k_offset(&pos_key, - next_pos); - goto research; - } - } /* for */ - } - - /* end of directory has been reached */ - if (item_num != B_NR_ITEMS(bh) - 1) - goto end; - - /* - * item we went through is last item of node. Using right - * delimiting key check is it directory end - */ - rkey = get_rkey(&path_to_entry, inode->i_sb); - if (!comp_le_keys(rkey, &MIN_KEY)) { - /* - * set pos_key to key, that is the smallest and greater - * that key of the last entry in the item - */ - set_cpu_key_k_offset(&pos_key, next_pos); - continue; - } - - /* end of directory has been reached */ - if (COMP_SHORT_KEYS(rkey, &pos_key)) { - goto end; - } - - /* directory continues in the right neighboring block */ - set_cpu_key_k_offset(&pos_key, - le_key_k_offset(KEY_FORMAT_3_5, rkey)); - - } /* while */ - -end: - ctx->pos = next_pos; - pathrelse(&path_to_entry); - reiserfs_check_path(&path_to_entry); -out: - reiserfs_write_unlock(inode->i_sb); - return ret; -} - -static int reiserfs_readdir(struct file *file, struct dir_context *ctx) -{ - return reiserfs_readdir_inode(file_inode(file), ctx); -} - -/* - * compose directory item containing "." and ".." entries (entries are - * not aligned to 4 byte boundary) - */ -void make_empty_dir_item_v1(char *body, __le32 dirid, __le32 objid, - __le32 par_dirid, __le32 par_objid) -{ - struct reiserfs_de_head *dot, *dotdot; - - memset(body, 0, EMPTY_DIR_SIZE_V1); - dot = (struct reiserfs_de_head *)body; - dotdot = dot + 1; - - /* direntry header of "." */ - put_deh_offset(dot, DOT_OFFSET); - /* these two are from make_le_item_head, and are LE */ - dot->deh_dir_id = dirid; - dot->deh_objectid = objid; - dot->deh_state = 0; /* Endian safe if 0 */ - put_deh_location(dot, EMPTY_DIR_SIZE_V1 - strlen(".")); - mark_de_visible(dot); - - /* direntry header of ".." */ - put_deh_offset(dotdot, DOT_DOT_OFFSET); - /* key of ".." for the root directory */ - /* these two are from the inode, and are LE */ - dotdot->deh_dir_id = par_dirid; - dotdot->deh_objectid = par_objid; - dotdot->deh_state = 0; /* Endian safe if 0 */ - put_deh_location(dotdot, deh_location(dot) - strlen("..")); - mark_de_visible(dotdot); - - /* copy ".." and "." */ - memcpy(body + deh_location(dot), ".", 1); - memcpy(body + deh_location(dotdot), "..", 2); -} - -/* compose directory item containing "." and ".." entries */ -void make_empty_dir_item(char *body, __le32 dirid, __le32 objid, - __le32 par_dirid, __le32 par_objid) -{ - struct reiserfs_de_head *dot, *dotdot; - - memset(body, 0, EMPTY_DIR_SIZE); - dot = (struct reiserfs_de_head *)body; - dotdot = dot + 1; - - /* direntry header of "." */ - put_deh_offset(dot, DOT_OFFSET); - /* these two are from make_le_item_head, and are LE */ - dot->deh_dir_id = dirid; - dot->deh_objectid = objid; - dot->deh_state = 0; /* Endian safe if 0 */ - put_deh_location(dot, EMPTY_DIR_SIZE - ROUND_UP(strlen("."))); - mark_de_visible(dot); - - /* direntry header of ".." */ - put_deh_offset(dotdot, DOT_DOT_OFFSET); - /* key of ".." for the root directory */ - /* these two are from the inode, and are LE */ - dotdot->deh_dir_id = par_dirid; - dotdot->deh_objectid = par_objid; - dotdot->deh_state = 0; /* Endian safe if 0 */ - put_deh_location(dotdot, deh_location(dot) - ROUND_UP(strlen(".."))); - mark_de_visible(dotdot); - - /* copy ".." and "." */ - memcpy(body + deh_location(dot), ".", 1); - memcpy(body + deh_location(dotdot), "..", 2); -} diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c deleted file mode 100644 index 5129efc6f2e6..000000000000 --- a/fs/reiserfs/do_balan.c +++ /dev/null @@ -1,1900 +0,0 @@ -/* - * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README - */ - -/* - * Now we have all buffers that must be used in balancing of the tree - * Further calculations can not cause schedule(), and thus the buffer - * tree will be stable until the balancing will be finished - * balance the tree according to the analysis made before, - * and using buffers obtained after all above. - */ - -#include -#include -#include "reiserfs.h" -#include -#include - -static inline void buffer_info_init_left(struct tree_balance *tb, - struct buffer_info *bi) -{ - bi->tb = tb; - bi->bi_bh = tb->L[0]; - bi->bi_parent = tb->FL[0]; - bi->bi_position = get_left_neighbor_position(tb, 0); -} - -static inline void buffer_info_init_right(struct tree_balance *tb, - struct buffer_info *bi) -{ - bi->tb = tb; - bi->bi_bh = tb->R[0]; - bi->bi_parent = tb->FR[0]; - bi->bi_position = get_right_neighbor_position(tb, 0); -} - -static inline void buffer_info_init_tbS0(struct tree_balance *tb, - struct buffer_info *bi) -{ - bi->tb = tb; - bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path); - bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0); - bi->bi_position = PATH_H_POSITION(tb->tb_path, 1); -} - -static inline void buffer_info_init_bh(struct tree_balance *tb, - struct buffer_info *bi, - struct buffer_head *bh) -{ - bi->tb = tb; - bi->bi_bh = bh; - bi->bi_parent = NULL; - bi->bi_position = 0; -} - -inline void do_balance_mark_leaf_dirty(struct tree_balance *tb, - struct buffer_head *bh, int flag) -{ - journal_mark_dirty(tb->transaction_handle, bh); -} - -#define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty -#define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty - -/* - * summary: - * if deleting something ( tb->insert_size[0] < 0 ) - * return(balance_leaf_when_delete()); (flag d handled here) - * else - * if lnum is larger than 0 we put items into the left node - * if rnum is larger than 0 we put items into the right node - * if snum1 is larger than 0 we put items into the new node s1 - * if snum2 is larger than 0 we put items into the new node s2 - * Note that all *num* count new items being created. - */ - -static void balance_leaf_when_delete_del(struct tree_balance *tb) -{ - struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); - int item_pos = PATH_LAST_POSITION(tb->tb_path); - struct buffer_info bi; -#ifdef CONFIG_REISERFS_CHECK - struct item_head *ih = item_head(tbS0, item_pos); -#endif - - RFALSE(ih_item_len(ih) + IH_SIZE != -tb->insert_size[0], - "vs-12013: mode Delete, insert size %d, ih to be deleted %h", - -tb->insert_size[0], ih); - - buffer_info_init_tbS0(tb, &bi); - leaf_delete_items(&bi, 0, item_pos, 1, -1); - - if (!item_pos && tb->CFL[0]) { - if (B_NR_ITEMS(tbS0)) { - replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0); - } else { - if (!PATH_H_POSITION(tb->tb_path, 1)) - replace_key(tb, tb->CFL[0], tb->lkey[0], - PATH_H_PPARENT(tb->tb_path, 0), 0); - } - } - - RFALSE(!item_pos && !tb->CFL[0], - "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0], - tb->L[0]); -} - -/* cut item in S[0] */ -static void balance_leaf_when_delete_cut(struct tree_balance *tb) -{ - struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); - int item_pos = PATH_LAST_POSITION(tb->tb_path); - struct item_head *ih = item_head(tbS0, item_pos); - int pos_in_item = tb->tb_path->pos_in_item; - struct buffer_info bi; - buffer_info_init_tbS0(tb, &bi); - - if (is_direntry_le_ih(ih)) { - /* - * UFS unlink semantics are such that you can only - * delete one directory entry at a time. - * - * when we cut a directory tb->insert_size[0] means - * number of entries to be cut (always 1) - */ - tb->insert_size[0] = -1; - leaf_cut_from_buffer(&bi, item_pos, pos_in_item, - -tb->insert_size[0]); - - RFALSE(!item_pos && !pos_in_item && !tb->CFL[0], - "PAP-12030: can not change delimiting key. CFL[0]=%p", - tb->CFL[0]); - - if (!item_pos && !pos_in_item && tb->CFL[0]) - replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0); - } else { - leaf_cut_from_buffer(&bi, item_pos, pos_in_item, - -tb->insert_size[0]); - - RFALSE(!ih_item_len(ih), - "PAP-12035: cut must leave non-zero dynamic " - "length of item"); - } -} - -static int balance_leaf_when_delete_left(struct tree_balance *tb) -{ - struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); - int n = B_NR_ITEMS(tbS0); - - /* L[0] must be joined with S[0] */ - if (tb->lnum[0] == -1) { - /* R[0] must be also joined with S[0] */ - if (tb->rnum[0] == -1) { - if (tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0)) { - /* - * all contents of all the - * 3 buffers will be in L[0] - */ - if (PATH_H_POSITION(tb->tb_path, 1) == 0 && - 1 < B_NR_ITEMS(tb->FR[0])) - replace_key(tb, tb->CFL[0], - tb->lkey[0], tb->FR[0], 1); - - leaf_move_items(LEAF_FROM_S_TO_L, tb, n, -1, - NULL); - leaf_move_items(LEAF_FROM_R_TO_L, tb, - B_NR_ITEMS(tb->R[0]), -1, - NULL); - - reiserfs_invalidate_buffer(tb, tbS0); - reiserfs_invalidate_buffer(tb, tb->R[0]); - - return 0; - } - - /* all contents of all the 3 buffers will be in R[0] */ - leaf_move_items(LEAF_FROM_S_TO_R, tb, n, -1, NULL); - leaf_move_items(LEAF_FROM_L_TO_R, tb, - B_NR_ITEMS(tb->L[0]), -1, NULL); - - /* right_delimiting_key is correct in R[0] */ - replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0); - - reiserfs_invalidate_buffer(tb, tbS0); - reiserfs_invalidate_buffer(tb, tb->L[0]); - - return -1; - } - - RFALSE(tb->rnum[0] != 0, - "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]); - /* all contents of L[0] and S[0] will be in L[0] */ - leaf_shift_left(tb, n, -1); - - reiserfs_invalidate_buffer(tb, tbS0); - - return 0; - } - - /* - * a part of contents of S[0] will be in L[0] and - * the rest part of S[0] will be in R[0] - */ - - RFALSE((tb->lnum[0] + tb->rnum[0] < n) || - (tb->lnum[0] + tb->rnum[0] > n + 1), - "PAP-12050: rnum(%d) and lnum(%d) and item " - "number(%d) in S[0] are not consistent", - tb->rnum[0], tb->lnum[0], n); - RFALSE((tb->lnum[0] + tb->rnum[0] == n) && - (tb->lbytes != -1 || tb->rbytes != -1), - "PAP-12055: bad rbytes (%d)/lbytes (%d) " - "parameters when items are not split", - tb->rbytes, tb->lbytes); - RFALSE((tb->lnum[0] + tb->rnum[0] == n + 1) && - (tb->lbytes < 1 || tb->rbytes != -1), - "PAP-12060: bad rbytes (%d)/lbytes (%d) " - "parameters when items are split", - tb->rbytes, tb->lbytes); - - leaf_shift_left(tb, tb->lnum[0], tb->lbytes); - leaf_shift_right(tb, tb->rnum[0], tb->rbytes); - - reiserfs_invalidate_buffer(tb, tbS0); - - return 0; -} - -/* - * Balance leaf node in case of delete or cut: insert_size[0] < 0 - * - * lnum, rnum can have values >= -1 - * -1 means that the neighbor must be joined with S - * 0 means that nothing should be done with the neighbor - * >0 means to shift entirely or partly the specified number of items - * to the neighbor - */ -static int balance_leaf_when_delete(struct tree_balance *tb, int flag) -{ - struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); - struct buffer_info bi; - int n; - - RFALSE(tb->FR[0] && B_LEVEL(tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1, - "vs- 12000: level: wrong FR %z", tb->FR[0]); - RFALSE(tb->blknum[0] > 1, - "PAP-12005: tb->blknum == %d, can not be > 1", tb->blknum[0]); - RFALSE(!tb->blknum[0] && !PATH_H_PPARENT(tb->tb_path, 0), - "PAP-12010: tree can not be empty"); - - buffer_info_init_tbS0(tb, &bi); - - /* Delete or truncate the item */ - - BUG_ON(flag != M_DELETE && flag != M_CUT); - if (flag == M_DELETE) - balance_leaf_when_delete_del(tb); - else /* M_CUT */ - balance_leaf_when_delete_cut(tb); - - - /* - * the rule is that no shifting occurs unless by shifting - * a node can be freed - */ - n = B_NR_ITEMS(tbS0); - - - /* L[0] takes part in balancing */ - if (tb->lnum[0]) - return balance_leaf_when_delete_left(tb); - - if (tb->rnum[0] == -1) { - /* all contents of R[0] and S[0] will be in R[0] */ - leaf_shift_right(tb, n, -1); - reiserfs_invalidate_buffer(tb, tbS0); - return 0; - } - - RFALSE(tb->rnum[0], - "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]); - return 0; -} - -static unsigned int balance_leaf_insert_left(struct tree_balance *tb, - struct item_head *const ih, - const char * const body) -{ - int ret; - struct buffer_info bi; - int n = B_NR_ITEMS(tb->L[0]); - unsigned body_shift_bytes = 0; - - if (tb->item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) { - /* part of new item falls into L[0] */ - int new_item_len, shift; - - ret = leaf_shift_left(tb, tb->lnum[0] - 1, -1); - - /* Calculate item length to insert to S[0] */ - new_item_len = ih_item_len(ih) - tb->lbytes; - - /* Calculate and check item length to insert to L[0] */ - put_ih_item_len(ih, ih_item_len(ih) - new_item_len); - - RFALSE(ih_item_len(ih) <= 0, - "PAP-12080: there is nothing to insert into L[0]: " - "ih_item_len=%d", ih_item_len(ih)); - - /* Insert new item into L[0] */ - buffer_info_init_left(tb, &bi); - leaf_insert_into_buf(&bi, n + tb->item_pos - ret, ih, body, - min_t(int, tb->zeroes_num, ih_item_len(ih))); - - /* - * Calculate key component, item length and body to - * insert into S[0] - */ - shift = 0; - if (is_indirect_le_ih(ih)) - shift = tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT; - - add_le_ih_k_offset(ih, tb->lbytes << shift); - - put_ih_item_len(ih, new_item_len); - if (tb->lbytes > tb->zeroes_num) { - body_shift_bytes = tb->lbytes - tb->zeroes_num; - tb->zeroes_num = 0; - } else - tb->zeroes_num -= tb->lbytes; - - RFALSE(ih_item_len(ih) <= 0, - "PAP-12085: there is nothing to insert into S[0]: " - "ih_item_len=%d", ih_item_len(ih)); - } else { - /* new item in whole falls into L[0] */ - /* Shift lnum[0]-1 items to L[0] */ - ret = leaf_shift_left(tb, tb->lnum[0] - 1, tb->lbytes); - - /* Insert new item into L[0] */ - buffer_info_init_left(tb, &bi); - leaf_insert_into_buf(&bi, n + tb->item_pos - ret, ih, body, - tb->zeroes_num); - tb->insert_size[0] = 0; - tb->zeroes_num = 0; - } - return body_shift_bytes; -} - -static void balance_leaf_paste_left_shift_dirent(struct tree_balance *tb, - struct item_head * const ih, - const char * const body) -{ - int n = B_NR_ITEMS(tb->L[0]); - struct buffer_info bi; - - RFALSE(tb->zeroes_num, - "PAP-12090: invalid parameter in case of a directory"); - - /* directory item */ - if (tb->lbytes > tb->pos_in_item) { - /* new directory entry falls into L[0] */ - struct item_head *pasted; - int ret, l_pos_in_item = tb->pos_in_item; - - /* - * Shift lnum[0] - 1 items in whole. - * Shift lbytes - 1 entries from given directory item - */ - ret = leaf_shift_left(tb, tb->lnum[0], tb->lbytes - 1); - if (ret && !tb->item_pos) { - pasted = item_head(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1); - l_pos_in_item += ih_entry_count(pasted) - - (tb->lbytes - 1); - } - - /* Append given directory entry to directory item */ - buffer_info_init_left(tb, &bi); - leaf_paste_in_buffer(&bi, n + tb->item_pos - ret, - l_pos_in_item, tb->insert_size[0], - body, tb->zeroes_num); - - /* - * previous string prepared space for pasting new entry, - * following string pastes this entry - */ - - /* - * when we have merge directory item, pos_in_item - * has been changed too - */ - - /* paste new directory entry. 1 is entry number */ - leaf_paste_entries(&bi, n + tb->item_pos - ret, - l_pos_in_item, 1, - (struct reiserfs_de_head *) body, - body + DEH_SIZE, tb->insert_size[0]); - tb->insert_size[0] = 0; - } else { - /* new directory item doesn't fall into L[0] */ - /* - * Shift lnum[0]-1 items in whole. Shift lbytes - * directory entries from directory item number lnum[0] - */ - leaf_shift_left(tb, tb->lnum[0], tb->lbytes); - } - - /* Calculate new position to append in item body */ - tb->pos_in_item -= tb->lbytes; -} - -static unsigned int balance_leaf_paste_left_shift(struct tree_balance *tb, - struct item_head * const ih, - const char * const body) -{ - struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); - int n = B_NR_ITEMS(tb->L[0]); - struct buffer_info bi; - int body_shift_bytes = 0; - - if (is_direntry_le_ih(item_head(tbS0, tb->item_pos))) { - balance_leaf_paste_left_shift_dirent(tb, ih, body); - return 0; - } - - RFALSE(tb->lbytes <= 0, - "PAP-12095: there is nothing to shift to L[0]. " - "lbytes=%d", tb->lbytes); - RFALSE(tb->pos_in_item != ih_item_len(item_head(tbS0, tb->item_pos)), - "PAP-12100: incorrect position to paste: " - "item_len=%d, pos_in_item=%d", - ih_item_len(item_head(tbS0, tb->item_pos)), tb->pos_in_item); - - /* appended item will be in L[0] in whole */ - if (tb->lbytes >= tb->pos_in_item) { - struct item_head *tbS0_pos_ih, *tbL0_ih; - struct item_head *tbS0_0_ih; - struct reiserfs_key *left_delim_key; - int ret, l_n, version, temp_l; - - tbS0_pos_ih = item_head(tbS0, tb->item_pos); - tbS0_0_ih = item_head(tbS0, 0); - - /* - * this bytes number must be appended - * to the last item of L[h] - */ - l_n = tb->lbytes - tb->pos_in_item; - - /* Calculate new insert_size[0] */ - tb->insert_size[0] -= l_n; - - RFALSE(tb->insert_size[0] <= 0, - "PAP-12105: there is nothing to paste into " - "L[0]. insert_size=%d", tb->insert_size[0]); - - ret = leaf_shift_left(tb, tb->lnum[0], - ih_item_len(tbS0_pos_ih)); - - tbL0_ih = item_head(tb->L[0], n + tb->item_pos - ret); - - /* Append to body of item in L[0] */ - buffer_info_init_left(tb, &bi); - leaf_paste_in_buffer(&bi, n + tb->item_pos - ret, - ih_item_len(tbL0_ih), l_n, body, - min_t(int, l_n, tb->zeroes_num)); - - /* - * 0-th item in S0 can be only of DIRECT type - * when l_n != 0 - */ - temp_l = l_n; - - RFALSE(ih_item_len(tbS0_0_ih), - "PAP-12106: item length must be 0"); - RFALSE(comp_short_le_keys(&tbS0_0_ih->ih_key, - leaf_key(tb->L[0], n + tb->item_pos - ret)), - "PAP-12107: items must be of the same file"); - - if (is_indirect_le_ih(tbL0_ih)) { - int shift = tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT; - temp_l = l_n << shift; - } - /* update key of first item in S0 */ - version = ih_version(tbS0_0_ih); - add_le_key_k_offset(version, &tbS0_0_ih->ih_key, temp_l); - - /* update left delimiting key */ - left_delim_key = internal_key(tb->CFL[0], tb->lkey[0]); - add_le_key_k_offset(version, left_delim_key, temp_l); - - /* - * Calculate new body, position in item and - * insert_size[0] - */ - if (l_n > tb->zeroes_num) { - body_shift_bytes = l_n - tb->zeroes_num; - tb->zeroes_num = 0; - } else - tb->zeroes_num -= l_n; - tb->pos_in_item = 0; - - RFALSE(comp_short_le_keys(&tbS0_0_ih->ih_key, - leaf_key(tb->L[0], - B_NR_ITEMS(tb->L[0]) - 1)) || - !op_is_left_mergeable(leaf_key(tbS0, 0), tbS0->b_size) || - !op_is_left_mergeable(left_delim_key, tbS0->b_size), - "PAP-12120: item must be merge-able with left " - "neighboring item"); - } else { - /* only part of the appended item will be in L[0] */ - - /* Calculate position in item for append in S[0] */ - tb->pos_in_item -= tb->lbytes; - - RFALSE(tb->pos_in_item <= 0, - "PAP-12125: no place for paste. pos_in_item=%d", - tb->pos_in_item); - - /* - * Shift lnum[0] - 1 items in whole. - * Shift lbytes - 1 byte from item number lnum[0] - */ - leaf_shift_left(tb, tb->lnum[0], tb->lbytes); - } - return body_shift_bytes; -} - - -/* appended item will be in L[0] in whole */ -static void balance_leaf_paste_left_whole(struct tree_balance *tb, - struct item_head * const ih, - const char * const body) -{ - struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); - int n = B_NR_ITEMS(tb->L[0]); - struct buffer_info bi; - struct item_head *pasted; - int ret; - - /* if we paste into first item of S[0] and it is left mergable */ - if (!tb->item_pos && - op_is_left_mergeable(leaf_key(tbS0, 0), tbS0->b_size)) { - /* - * then increment pos_in_item by the size of the - * last item in L[0] - */ - pasted = item_head(tb->L[0], n - 1); - if (is_direntry_le_ih(pasted)) - tb->pos_in_item += ih_entry_count(pasted); - else - tb->pos_in_item += ih_item_len(pasted); - } - - /* - * Shift lnum[0] - 1 items in whole. - * Shift lbytes - 1 byte from item number lnum[0] - */ - ret = leaf_shift_left(tb, tb->lnum[0], tb->lbytes); - - /* Append to body of item in L[0] */ - buffer_info_init_left(tb, &bi); - leaf_paste_in_buffer(&bi, n + tb->item_pos - ret, tb->pos_in_item, - tb->insert_size[0], body, tb->zeroes_num); - - /* if appended item is directory, paste entry */ - pasted = item_head(tb->L[0], n + tb->item_pos - ret); - if (is_direntry_le_ih(pasted)) - leaf_paste_entries(&bi, n + tb->item_pos - ret, - tb->pos_in_item, 1, - (struct reiserfs_de_head *)body, - body + DEH_SIZE, tb->insert_size[0]); - - /* - * if appended item is indirect item, put unformatted node - * into un list - */ - if (is_indirect_le_ih(pasted)) - set_ih_free_space(pasted, 0); - - tb->insert_size[0] = 0; - tb->zeroes_num = 0; -} - -static unsigned int balance_leaf_paste_left(struct tree_balance *tb, - struct item_head * const ih, - const char * const body) -{ - /* we must shift the part of the appended item */ - if (tb->item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) - return balance_leaf_paste_left_shift(tb, ih, body); - else - balance_leaf_paste_left_whole(tb, ih, body); - return 0; -} - -/* Shift lnum[0] items from S[0] to the left neighbor L[0] */ -static unsigned int balance_leaf_left(struct tree_balance *tb, - struct item_head * const ih, - const char * const body, int flag) -{ - if (tb->lnum[0] <= 0) - return 0; - - /* new item or it part falls to L[0], shift it too */ - if (tb->item_pos < tb->lnum[0]) { - BUG_ON(flag != M_INSERT && flag != M_PASTE); - - if (flag == M_INSERT) - return balance_leaf_insert_left(tb, ih, body); - else /* M_PASTE */ - return balance_leaf_paste_left(tb, ih, body); - } else - /* new item doesn't fall into L[0] */ - leaf_shift_left(tb, tb->lnum[0], tb->lbytes); - return 0; -} - - -static void balance_leaf_insert_right(struct tree_balance *tb, - struct item_head * const ih, - const char * const body) -{ - - struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); - int n = B_NR_ITEMS(tbS0); - struct buffer_info bi; - - /* new item or part of it doesn't fall into R[0] */ - if (n - tb->rnum[0] >= tb->item_pos) { - leaf_shift_right(tb, tb->rnum[0], tb->rbytes); - return; - } - - /* new item or its part falls to R[0] */ - - /* part of new item falls into R[0] */ - if (tb->item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1) { - loff_t old_key_comp, old_len, r_zeroes_number; - const char *r_body; - int shift; - loff_t offset; - - leaf_shift_right(tb, tb->rnum[0] - 1, -1); - - /* Remember key component and item length */ - old_key_comp = le_ih_k_offset(ih); - old_len = ih_item_len(ih); - - /* - * Calculate key component and item length to insert - * into R[0] - */ - shift = 0; - if (is_indirect_le_ih(ih)) - shift = tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT; - offset = le_ih_k_offset(ih) + ((old_len - tb->rbytes) << shift); - set_le_ih_k_offset(ih, offset); - put_ih_item_len(ih, tb->rbytes); - - /* Insert part of the item into R[0] */ - buffer_info_init_right(tb, &bi); - if ((old_len - tb->rbytes) > tb->zeroes_num) { - r_zeroes_number = 0; - r_body = body + (old_len - tb->rbytes) - tb->zeroes_num; - } else { - r_body = body; - r_zeroes_number = tb->zeroes_num - - (old_len - tb->rbytes); - tb->zeroes_num -= r_zeroes_number; - } - - leaf_insert_into_buf(&bi, 0, ih, r_body, r_zeroes_number); - - /* Replace right delimiting key by first key in R[0] */ - replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0); - - /* - * Calculate key component and item length to - * insert into S[0] - */ - set_le_ih_k_offset(ih, old_key_comp); - put_ih_item_len(ih, old_len - tb->rbytes); - - tb->insert_size[0] -= tb->rbytes; - - } else { - /* whole new item falls into R[0] */ - - /* Shift rnum[0]-1 items to R[0] */ - leaf_shift_right(tb, tb->rnum[0] - 1, tb->rbytes); - - /* Insert new item into R[0] */ - buffer_info_init_right(tb, &bi); - leaf_insert_into_buf(&bi, tb->item_pos - n + tb->rnum[0] - 1, - ih, body, tb->zeroes_num); - - if (tb->item_pos - n + tb->rnum[0] - 1 == 0) - replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0); - - tb->zeroes_num = tb->insert_size[0] = 0; - } -} - - -static void balance_leaf_paste_right_shift_dirent(struct tree_balance *tb, - struct item_head * const ih, - const char * const body) -{ - struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); - struct buffer_info bi; - int entry_count; - - RFALSE(tb->zeroes_num, - "PAP-12145: invalid parameter in case of a directory"); - entry_count = ih_entry_count(item_head(tbS0, tb->item_pos)); - - /* new directory entry falls into R[0] */ - if (entry_count - tb->rbytes < tb->pos_in_item) { - int paste_entry_position; - - RFALSE(tb->rbytes - 1 >= entry_count || !tb->insert_size[0], - "PAP-12150: no enough of entries to shift to R[0]: " - "rbytes=%d, entry_count=%d", tb->rbytes, entry_count); - - /* - * Shift rnum[0]-1 items in whole. - * Shift rbytes-1 directory entries from directory - * item number rnum[0] - */ - leaf_shift_right(tb, tb->rnum[0], tb->rbytes - 1); - - /* Paste given directory entry to directory item */ - paste_entry_position = tb->pos_in_item - entry_count + - tb->rbytes - 1; - buffer_info_init_right(tb, &bi); - leaf_paste_in_buffer(&bi, 0, paste_entry_position, - tb->insert_size[0], body, tb->zeroes_num); - - /* paste entry */ - leaf_paste_entries(&bi, 0, paste_entry_position, 1, - (struct reiserfs_de_head *) body, - body + DEH_SIZE, tb->insert_size[0]); - - /* change delimiting keys */ - if (paste_entry_position == 0) - replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0); - - tb->insert_size[0] = 0; - tb->pos_in_item++; - } else { - /* new directory entry doesn't fall into R[0] */ - leaf_shift_right(tb, tb->rnum[0], tb->rbytes); - } -} - -static void balance_leaf_paste_right_shift(struct tree_balance *tb, - struct item_head * const ih, - const char * const body) -{ - struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); - int n_shift, n_rem, r_zeroes_number, version; - unsigned long temp_rem; - const char *r_body; - struct buffer_info bi; - - /* we append to directory item */ - if (is_direntry_le_ih(item_head(tbS0, tb->item_pos))) { - balance_leaf_paste_right_shift_dirent(tb, ih, body); - return; - } - - /* regular object */ - - /* - * Calculate number of bytes which must be shifted - * from appended item - */ - n_shift = tb->rbytes - tb->insert_size[0]; - if (n_shift < 0) - n_shift = 0; - - RFALSE(tb->pos_in_item != ih_item_len(item_head(tbS0, tb->item_pos)), - "PAP-12155: invalid position to paste. ih_item_len=%d, " - "pos_in_item=%d", tb->pos_in_item, - ih_item_len(item_head(tbS0, tb->item_pos))); - - leaf_shift_right(tb, tb->rnum[0], n_shift); - - /* - * Calculate number of bytes which must remain in body - * after appending to R[0] - */ - n_rem = tb->insert_size[0] - tb->rbytes; - if (n_rem < 0) - n_rem = 0; - - temp_rem = n_rem; - - version = ih_version(item_head(tb->R[0], 0)); - - if (is_indirect_le_key(version, leaf_key(tb->R[0], 0))) { - int shift = tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT; - temp_rem = n_rem << shift; - } - - add_le_key_k_offset(version, leaf_key(tb->R[0], 0), temp_rem); - add_le_key_k_offset(version, internal_key(tb->CFR[0], tb->rkey[0]), - temp_rem); - - do_balance_mark_internal_dirty(tb, tb->CFR[0], 0); - - /* Append part of body into R[0] */ - buffer_info_init_right(tb, &bi); - if (n_rem > tb->zeroes_num) { - r_zeroes_number = 0; - r_body = body + n_rem - tb->zeroes_num; - } else { - r_body = body; - r_zeroes_number = tb->zeroes_num - n_rem; - tb->zeroes_num -= r_zeroes_number; - } - - leaf_paste_in_buffer(&bi, 0, n_shift, tb->insert_size[0] - n_rem, - r_body, r_zeroes_number); - - if (is_indirect_le_ih(item_head(tb->R[0], 0))) - set_ih_free_space(item_head(tb->R[0], 0), 0); - - tb->insert_size[0] = n_rem; - if (!n_rem) - tb->pos_in_item++; -} - -static void balance_leaf_paste_right_whole(struct tree_balance *tb, - struct item_head * const ih, - const char * const body) -{ - struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); - int n = B_NR_ITEMS(tbS0); - struct item_head *pasted; - struct buffer_info bi; - - buffer_info_init_right(tb, &bi); - leaf_shift_right(tb, tb->rnum[0], tb->rbytes); - - /* append item in R[0] */ - if (tb->pos_in_item >= 0) { - buffer_info_init_right(tb, &bi); - leaf_paste_in_buffer(&bi, tb->item_pos - n + tb->rnum[0], - tb->pos_in_item, tb->insert_size[0], body, - tb->zeroes_num); - } - - /* paste new entry, if item is directory item */ - pasted = item_head(tb->R[0], tb->item_pos - n + tb->rnum[0]); - if (is_direntry_le_ih(pasted) && tb->pos_in_item >= 0) { - leaf_paste_entries(&bi, tb->item_pos - n + tb->rnum[0], - tb->pos_in_item, 1, - (struct reiserfs_de_head *)body, - body + DEH_SIZE, tb->insert_size[0]); - - if (!tb->pos_in_item) { - - RFALSE(tb->item_pos - n + tb->rnum[0], - "PAP-12165: directory item must be first " - "item of node when pasting is in 0th position"); - - /* update delimiting keys */ - replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0); - } - } - - if (is_indirect_le_ih(pasted)) - set_ih_free_space(pasted, 0); - tb->zeroes_num = tb->insert_size[0] = 0; -} - -static void balance_leaf_paste_right(struct tree_balance *tb, - struct item_head * const ih, - const char * const body) -{ - struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); - int n = B_NR_ITEMS(tbS0); - - /* new item doesn't fall into R[0] */ - if (n - tb->rnum[0] > tb->item_pos) { - leaf_shift_right(tb, tb->rnum[0], tb->rbytes); - return; - } - - /* pasted item or part of it falls to R[0] */ - - if (tb->item_pos == n - tb->rnum[0] && tb->rbytes != -1) - /* we must shift the part of the appended item */ - balance_leaf_paste_right_shift(tb, ih, body); - else - /* pasted item in whole falls into R[0] */ - balance_leaf_paste_right_whole(tb, ih, body); -} - -/* shift rnum[0] items from S[0] to the right neighbor R[0] */ -static void balance_leaf_right(struct tree_balance *tb, - struct item_head * const ih, - const char * const body, int flag) -{ - if (tb->rnum[0] <= 0) - return; - - BUG_ON(flag != M_INSERT && flag != M_PASTE); - - if (flag == M_INSERT) - balance_leaf_insert_right(tb, ih, body); - else /* M_PASTE */ - balance_leaf_paste_right(tb, ih, body); -} - -static void balance_leaf_new_nodes_insert(struct tree_balance *tb, - struct item_head * const ih, - const char * const body, - struct item_head *insert_key, - struct buffer_head **insert_ptr, - int i) -{ - struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); - int n = B_NR_ITEMS(tbS0); - struct buffer_info bi; - int shift; - - /* new item or it part don't falls into S_new[i] */ - if (n - tb->snum[i] >= tb->item_pos) { - leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, - tb->snum[i], tb->sbytes[i], tb->S_new[i]); - return; - } - - /* new item or it's part falls to first new node S_new[i] */ - - /* part of new item falls into S_new[i] */ - if (tb->item_pos == n - tb->snum[i] + 1 && tb->sbytes[i] != -1) { - int old_key_comp, old_len, r_zeroes_number; - const char *r_body; - - /* Move snum[i]-1 items from S[0] to S_new[i] */ - leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, tb->snum[i] - 1, -1, - tb->S_new[i]); - - /* Remember key component and item length */ - old_key_comp = le_ih_k_offset(ih); - old_len = ih_item_len(ih); - - /* - * Calculate key component and item length to insert - * into S_new[i] - */ - shift = 0; - if (is_indirect_le_ih(ih)) - shift = tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT; - set_le_ih_k_offset(ih, - le_ih_k_offset(ih) + - ((old_len - tb->sbytes[i]) << shift)); - - put_ih_item_len(ih, tb->sbytes[i]); - - /* Insert part of the item into S_new[i] before 0-th item */ - buffer_info_init_bh(tb, &bi, tb->S_new[i]); - - if ((old_len - tb->sbytes[i]) > tb->zeroes_num) { - r_zeroes_number = 0; - r_body = body + (old_len - tb->sbytes[i]) - - tb->zeroes_num; - } else { - r_body = body; - r_zeroes_number = tb->zeroes_num - (old_len - - tb->sbytes[i]); - tb->zeroes_num -= r_zeroes_number; - } - - leaf_insert_into_buf(&bi, 0, ih, r_body, r_zeroes_number); - - /* - * Calculate key component and item length to - * insert into S[i] - */ - set_le_ih_k_offset(ih, old_key_comp); - put_ih_item_len(ih, old_len - tb->sbytes[i]); - tb->insert_size[0] -= tb->sbytes[i]; - } else { - /* whole new item falls into S_new[i] */ - - /* - * Shift snum[0] - 1 items to S_new[i] - * (sbytes[i] of split item) - */ - leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, - tb->snum[i] - 1, tb->sbytes[i], tb->S_new[i]); - - /* Insert new item into S_new[i] */ - buffer_info_init_bh(tb, &bi, tb->S_new[i]); - leaf_insert_into_buf(&bi, tb->item_pos - n + tb->snum[i] - 1, - ih, body, tb->zeroes_num); - - tb->zeroes_num = tb->insert_size[0] = 0; - } -} - -/* we append to directory item */ -static void balance_leaf_new_nodes_paste_dirent(struct tree_balance *tb, - struct item_head * const ih, - const char * const body, - struct item_head *insert_key, - struct buffer_head **insert_ptr, - int i) -{ - struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); - struct item_head *aux_ih = item_head(tbS0, tb->item_pos); - int entry_count = ih_entry_count(aux_ih); - struct buffer_info bi; - - if (entry_count - tb->sbytes[i] < tb->pos_in_item && - tb->pos_in_item <= entry_count) { - /* new directory entry falls into S_new[i] */ - - RFALSE(!tb->insert_size[0], - "PAP-12215: insert_size is already 0"); - RFALSE(tb->sbytes[i] - 1 >= entry_count, - "PAP-12220: there are no so much entries (%d), only %d", - tb->sbytes[i] - 1, entry_count); - - /* - * Shift snum[i]-1 items in whole. - * Shift sbytes[i] directory entries - * from directory item number snum[i] - */ - leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, tb->snum[i], - tb->sbytes[i] - 1, tb->S_new[i]); - - /* - * Paste given directory entry to - * directory item - */ - buffer_info_init_bh(tb, &bi, tb->S_new[i]); - leaf_paste_in_buffer(&bi, 0, tb->pos_in_item - entry_count + - tb->sbytes[i] - 1, tb->insert_size[0], - body, tb->zeroes_num); - - /* paste new directory entry */ - leaf_paste_entries(&bi, 0, tb->pos_in_item - entry_count + - tb->sbytes[i] - 1, 1, - (struct reiserfs_de_head *) body, - body + DEH_SIZE, tb->insert_size[0]); - - tb->insert_size[0] = 0; - tb->pos_in_item++; - } else { - /* new directory entry doesn't fall into S_new[i] */ - leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, tb->snum[i], - tb->sbytes[i], tb->S_new[i]); - } - -} - -static void balance_leaf_new_nodes_paste_shift(struct tree_balance *tb, - struct item_head * const ih, - const char * const body, - struct item_head *insert_key, - struct buffer_head **insert_ptr, - int i) -{ - struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); - struct item_head *aux_ih = item_head(tbS0, tb->item_pos); - int n_shift, n_rem, r_zeroes_number, shift; - const char *r_body; - struct item_head *tmp; - struct buffer_info bi; - - RFALSE(ih, "PAP-12210: ih must be 0"); - - if (is_direntry_le_ih(aux_ih)) { - balance_leaf_new_nodes_paste_dirent(tb, ih, body, insert_key, - insert_ptr, i); - return; - } - - /* regular object */ - - - RFALSE(tb->pos_in_item != ih_item_len(item_head(tbS0, tb->item_pos)) || - tb->insert_size[0] <= 0, - "PAP-12225: item too short or insert_size <= 0"); - - /* - * Calculate number of bytes which must be shifted from appended item - */ - n_shift = tb->sbytes[i] - tb->insert_size[0]; - if (n_shift < 0) - n_shift = 0; - leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, tb->snum[i], n_shift, - tb->S_new[i]); - - /* - * Calculate number of bytes which must remain in body after - * append to S_new[i] - */ - n_rem = tb->insert_size[0] - tb->sbytes[i]; - if (n_rem < 0) - n_rem = 0; - - /* Append part of body into S_new[0] */ - buffer_info_init_bh(tb, &bi, tb->S_new[i]); - if (n_rem > tb->zeroes_num) { - r_zeroes_number = 0; - r_body = body + n_rem - tb->zeroes_num; - } else { - r_body = body; - r_zeroes_number = tb->zeroes_num - n_rem; - tb->zeroes_num -= r_zeroes_number; - } - - leaf_paste_in_buffer(&bi, 0, n_shift, tb->insert_size[0] - n_rem, - r_body, r_zeroes_number); - - tmp = item_head(tb->S_new[i], 0); - shift = 0; - if (is_indirect_le_ih(tmp)) { - set_ih_free_space(tmp, 0); - shift = tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT; - } - add_le_ih_k_offset(tmp, n_rem << shift); - - tb->insert_size[0] = n_rem; - if (!n_rem) - tb->pos_in_item++; -} - -static void balance_leaf_new_nodes_paste_whole(struct tree_balance *tb, - struct item_head * const ih, - const char * const body, - struct item_head *insert_key, - struct buffer_head **insert_ptr, - int i) - -{ - struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); - int n = B_NR_ITEMS(tbS0); - int leaf_mi; - struct item_head *pasted; - struct buffer_info bi; - -#ifdef CONFIG_REISERFS_CHECK - struct item_head *ih_check = item_head(tbS0, tb->item_pos); - - if (!is_direntry_le_ih(ih_check) && - (tb->pos_in_item != ih_item_len(ih_check) || - tb->insert_size[0] <= 0)) - reiserfs_panic(tb->tb_sb, - "PAP-12235", - "pos_in_item must be equal to ih_item_len"); -#endif - - leaf_mi = leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, tb->snum[i], - tb->sbytes[i], tb->S_new[i]); - - RFALSE(leaf_mi, - "PAP-12240: unexpected value returned by leaf_move_items (%d)", - leaf_mi); - - /* paste into item */ - buffer_info_init_bh(tb, &bi, tb->S_new[i]); - leaf_paste_in_buffer(&bi, tb->item_pos - n + tb->snum[i], - tb->pos_in_item, tb->insert_size[0], - body, tb->zeroes_num); - - pasted = item_head(tb->S_new[i], tb->item_pos - n + - tb->snum[i]); - if (is_direntry_le_ih(pasted)) - leaf_paste_entries(&bi, tb->item_pos - n + tb->snum[i], - tb->pos_in_item, 1, - (struct reiserfs_de_head *)body, - body + DEH_SIZE, tb->insert_size[0]); - - /* if we paste to indirect item update ih_free_space */ - if (is_indirect_le_ih(pasted)) - set_ih_free_space(pasted, 0); - - tb->zeroes_num = tb->insert_size[0] = 0; - -} -static void balance_leaf_new_nodes_paste(struct tree_balance *tb, - struct item_head * const ih, - const char * const body, - struct item_head *insert_key, - struct buffer_head **insert_ptr, - int i) -{ - struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); - int n = B_NR_ITEMS(tbS0); - - /* pasted item doesn't fall into S_new[i] */ - if (n - tb->snum[i] > tb->item_pos) { - leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, - tb->snum[i], tb->sbytes[i], tb->S_new[i]); - return; - } - - /* pasted item or part if it falls to S_new[i] */ - - if (tb->item_pos == n - tb->snum[i] && tb->sbytes[i] != -1) - /* we must shift part of the appended item */ - balance_leaf_new_nodes_paste_shift(tb, ih, body, insert_key, - insert_ptr, i); - else - /* item falls wholly into S_new[i] */ - balance_leaf_new_nodes_paste_whole(tb, ih, body, insert_key, - insert_ptr, i); -} - -/* Fill new nodes that appear in place of S[0] */ -static void balance_leaf_new_nodes(struct tree_balance *tb, - struct item_head * const ih, - const char * const body, - struct item_head *insert_key, - struct buffer_head **insert_ptr, - int flag) -{ - int i; - for (i = tb->blknum[0] - 2; i >= 0; i--) { - BUG_ON(flag != M_INSERT && flag != M_PASTE); - - RFALSE(!tb->snum[i], - "PAP-12200: snum[%d] == %d. Must be > 0", i, - tb->snum[i]); - - /* here we shift from S to S_new nodes */ - - tb->S_new[i] = get_FEB(tb); - - /* initialized block type and tree level */ - set_blkh_level(B_BLK_HEAD(tb->S_new[i]), DISK_LEAF_NODE_LEVEL); - - if (flag == M_INSERT) - balance_leaf_new_nodes_insert(tb, ih, body, insert_key, - insert_ptr, i); - else /* M_PASTE */ - balance_leaf_new_nodes_paste(tb, ih, body, insert_key, - insert_ptr, i); - - memcpy(insert_key + i, leaf_key(tb->S_new[i], 0), KEY_SIZE); - insert_ptr[i] = tb->S_new[i]; - - RFALSE(!buffer_journaled(tb->S_new[i]) - || buffer_journal_dirty(tb->S_new[i]) - || buffer_dirty(tb->S_new[i]), - "PAP-12247: S_new[%d] : (%b)", - i, tb->S_new[i]); - } -} - -static void balance_leaf_finish_node_insert(struct tree_balance *tb, - struct item_head * const ih, - const char * const body) -{ - struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); - struct buffer_info bi; - buffer_info_init_tbS0(tb, &bi); - leaf_insert_into_buf(&bi, tb->item_pos, ih, body, tb->zeroes_num); - - /* If we insert the first key change the delimiting key */ - if (tb->item_pos == 0) { - if (tb->CFL[0]) /* can be 0 in reiserfsck */ - replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0); - - } -} - -static void balance_leaf_finish_node_paste_dirent(struct tree_balance *tb, - struct item_head * const ih, - const char * const body) -{ - struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); - struct item_head *pasted = item_head(tbS0, tb->item_pos); - struct buffer_info bi; - - if (tb->pos_in_item >= 0 && tb->pos_in_item <= ih_entry_count(pasted)) { - RFALSE(!tb->insert_size[0], - "PAP-12260: insert_size is 0 already"); - - /* prepare space */ - buffer_info_init_tbS0(tb, &bi); - leaf_paste_in_buffer(&bi, tb->item_pos, tb->pos_in_item, - tb->insert_size[0], body, tb->zeroes_num); - - /* paste entry */ - leaf_paste_entries(&bi, tb->item_pos, tb->pos_in_item, 1, - (struct reiserfs_de_head *)body, - body + DEH_SIZE, tb->insert_size[0]); - - if (!tb->item_pos && !tb->pos_in_item) { - RFALSE(!tb->CFL[0] || !tb->L[0], - "PAP-12270: CFL[0]/L[0] must be specified"); - if (tb->CFL[0]) - replace_key(tb, tb->CFL[0], tb->lkey[0], - tbS0, 0); - } - - tb->insert_size[0] = 0; - } -} - -static void balance_leaf_finish_node_paste(struct tree_balance *tb, - struct item_head * const ih, - const char * const body) -{ - struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); - struct buffer_info bi; - struct item_head *pasted = item_head(tbS0, tb->item_pos); - - /* when directory, may be new entry already pasted */ - if (is_direntry_le_ih(pasted)) { - balance_leaf_finish_node_paste_dirent(tb, ih, body); - return; - } - - /* regular object */ - - if (tb->pos_in_item == ih_item_len(pasted)) { - RFALSE(tb->insert_size[0] <= 0, - "PAP-12275: insert size must not be %d", - tb->insert_size[0]); - buffer_info_init_tbS0(tb, &bi); - leaf_paste_in_buffer(&bi, tb->item_pos, - tb->pos_in_item, tb->insert_size[0], body, - tb->zeroes_num); - - if (is_indirect_le_ih(pasted)) - set_ih_free_space(pasted, 0); - - tb->insert_size[0] = 0; - } -#ifdef CONFIG_REISERFS_CHECK - else if (tb->insert_size[0]) { - print_cur_tb("12285"); - reiserfs_panic(tb->tb_sb, "PAP-12285", - "insert_size must be 0 (%d)", tb->insert_size[0]); - } -#endif -} - -/* - * if the affected item was not wholly shifted then we - * perform all necessary operations on that part or whole - * of the affected item which remains in S - */ -static void balance_leaf_finish_node(struct tree_balance *tb, - struct item_head * const ih, - const char * const body, int flag) -{ - /* if we must insert or append into buffer S[0] */ - if (0 <= tb->item_pos && tb->item_pos < tb->s0num) { - if (flag == M_INSERT) - balance_leaf_finish_node_insert(tb, ih, body); - else /* M_PASTE */ - balance_leaf_finish_node_paste(tb, ih, body); - } -} - -/** - * balance_leaf - reiserfs tree balancing algorithm - * @tb: tree balance state - * @ih: item header of inserted item (little endian) - * @body: body of inserted item or bytes to paste - * @flag: i - insert, d - delete, c - cut, p - paste (see do_balance) - * passed back: - * @insert_key: key to insert new nodes - * @insert_ptr: array of nodes to insert at the next level - * - * In our processing of one level we sometimes determine what must be - * inserted into the next higher level. This insertion consists of a - * key or two keys and their corresponding pointers. - */ -static int balance_leaf(struct tree_balance *tb, struct item_head *ih, - const char *body, int flag, - struct item_head *insert_key, - struct buffer_head **insert_ptr) -{ - struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); - - PROC_INFO_INC(tb->tb_sb, balance_at[0]); - - /* Make balance in case insert_size[0] < 0 */ - if (tb->insert_size[0] < 0) - return balance_leaf_when_delete(tb, flag); - - tb->item_pos = PATH_LAST_POSITION(tb->tb_path), - tb->pos_in_item = tb->tb_path->pos_in_item, - tb->zeroes_num = 0; - if (flag == M_INSERT && !body) - tb->zeroes_num = ih_item_len(ih); - - /* - * for indirect item pos_in_item is measured in unformatted node - * pointers. Recalculate to bytes - */ - if (flag != M_INSERT - && is_indirect_le_ih(item_head(tbS0, tb->item_pos))) - tb->pos_in_item *= UNFM_P_SIZE; - - body += balance_leaf_left(tb, ih, body, flag); - - /* tb->lnum[0] > 0 */ - /* Calculate new item position */ - tb->item_pos -= (tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0)); - - balance_leaf_right(tb, ih, body, flag); - - /* tb->rnum[0] > 0 */ - RFALSE(tb->blknum[0] > 3, - "PAP-12180: blknum can not be %d. It must be <= 3", tb->blknum[0]); - RFALSE(tb->blknum[0] < 0, - "PAP-12185: blknum can not be %d. It must be >= 0", tb->blknum[0]); - - /* - * if while adding to a node we discover that it is possible to split - * it in two, and merge the left part into the left neighbor and the - * right part into the right neighbor, eliminating the node - */ - if (tb->blknum[0] == 0) { /* node S[0] is empty now */ - - RFALSE(!tb->lnum[0] || !tb->rnum[0], - "PAP-12190: lnum and rnum must not be zero"); - /* - * if insertion was done before 0-th position in R[0], right - * delimiting key of the tb->L[0]'s and left delimiting key are - * not set correctly - */ - if (tb->CFL[0]) { - if (!tb->CFR[0]) - reiserfs_panic(tb->tb_sb, "vs-12195", - "CFR not initialized"); - copy_key(internal_key(tb->CFL[0], tb->lkey[0]), - internal_key(tb->CFR[0], tb->rkey[0])); - do_balance_mark_internal_dirty(tb, tb->CFL[0], 0); - } - - reiserfs_invalidate_buffer(tb, tbS0); - return 0; - } - - balance_leaf_new_nodes(tb, ih, body, insert_key, insert_ptr, flag); - - balance_leaf_finish_node(tb, ih, body, flag); - -#ifdef CONFIG_REISERFS_CHECK - if (flag == M_PASTE && tb->insert_size[0]) { - print_cur_tb("12290"); - reiserfs_panic(tb->tb_sb, - "PAP-12290", "insert_size is still not 0 (%d)", - tb->insert_size[0]); - } -#endif - - /* Leaf level of the tree is balanced (end of balance_leaf) */ - return 0; -} - -/* Make empty node */ -void make_empty_node(struct buffer_info *bi) -{ - struct block_head *blkh; - - RFALSE(bi->bi_bh == NULL, "PAP-12295: pointer to the buffer is NULL"); - - blkh = B_BLK_HEAD(bi->bi_bh); - set_blkh_nr_item(blkh, 0); - set_blkh_free_space(blkh, MAX_CHILD_SIZE(bi->bi_bh)); - - if (bi->bi_parent) - B_N_CHILD(bi->bi_parent, bi->bi_position)->dc_size = 0; /* Endian safe if 0 */ -} - -/* Get first empty buffer */ -struct buffer_head *get_FEB(struct tree_balance *tb) -{ - int i; - struct buffer_info bi; - - for (i = 0; i < MAX_FEB_SIZE; i++) - if (tb->FEB[i] != NULL) - break; - - if (i == MAX_FEB_SIZE) - reiserfs_panic(tb->tb_sb, "vs-12300", "FEB list is empty"); - - buffer_info_init_bh(tb, &bi, tb->FEB[i]); - make_empty_node(&bi); - set_buffer_uptodate(tb->FEB[i]); - tb->used[i] = tb->FEB[i]; - tb->FEB[i] = NULL; - - return tb->used[i]; -} - -/* This is now used because reiserfs_free_block has to be able to schedule. */ -static void store_thrown(struct tree_balance *tb, struct buffer_head *bh) -{ - int i; - - if (buffer_dirty(bh)) - reiserfs_warning(tb->tb_sb, "reiserfs-12320", - "called with dirty buffer"); - for (i = 0; i < ARRAY_SIZE(tb->thrown); i++) - if (!tb->thrown[i]) { - tb->thrown[i] = bh; - get_bh(bh); /* free_thrown puts this */ - return; - } - reiserfs_warning(tb->tb_sb, "reiserfs-12321", - "too many thrown buffers"); -} - -static void free_thrown(struct tree_balance *tb) -{ - int i; - b_blocknr_t blocknr; - for (i = 0; i < ARRAY_SIZE(tb->thrown); i++) { - if (tb->thrown[i]) { - blocknr = tb->thrown[i]->b_blocknr; - if (buffer_dirty(tb->thrown[i])) - reiserfs_warning(tb->tb_sb, "reiserfs-12322", - "called with dirty buffer %d", - blocknr); - brelse(tb->thrown[i]); /* incremented in store_thrown */ - reiserfs_free_block(tb->transaction_handle, NULL, - blocknr, 0); - } - } -} - -void reiserfs_invalidate_buffer(struct tree_balance *tb, struct buffer_head *bh) -{ - struct block_head *blkh; - blkh = B_BLK_HEAD(bh); - set_blkh_level(blkh, FREE_LEVEL); - set_blkh_nr_item(blkh, 0); - - clear_buffer_dirty(bh); - store_thrown(tb, bh); -} - -/* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/ -void replace_key(struct tree_balance *tb, struct buffer_head *dest, int n_dest, - struct buffer_head *src, int n_src) -{ - - RFALSE(dest == NULL || src == NULL, - "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)", - src, dest); - RFALSE(!B_IS_KEYS_LEVEL(dest), - "vs-12310: invalid level (%z) for destination buffer. dest must be leaf", - dest); - RFALSE(n_dest < 0 || n_src < 0, - "vs-12315: src(%d) or dest(%d) key number < 0", n_src, n_dest); - RFALSE(n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src), - "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big", - n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest)); - - if (B_IS_ITEMS_LEVEL(src)) - /* source buffer contains leaf node */ - memcpy(internal_key(dest, n_dest), item_head(src, n_src), - KEY_SIZE); - else - memcpy(internal_key(dest, n_dest), internal_key(src, n_src), - KEY_SIZE); - - do_balance_mark_internal_dirty(tb, dest, 0); -} - -int get_left_neighbor_position(struct tree_balance *tb, int h) -{ - int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1); - - RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FL[h] == NULL, - "vs-12325: FL[%d](%p) or F[%d](%p) does not exist", - h, tb->FL[h], h, PATH_H_PPARENT(tb->tb_path, h)); - - if (Sh_position == 0) - return B_NR_ITEMS(tb->FL[h]); - else - return Sh_position - 1; -} - -int get_right_neighbor_position(struct tree_balance *tb, int h) -{ - int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1); - - RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FR[h] == NULL, - "vs-12330: F[%d](%p) or FR[%d](%p) does not exist", - h, PATH_H_PPARENT(tb->tb_path, h), h, tb->FR[h]); - - if (Sh_position == B_NR_ITEMS(PATH_H_PPARENT(tb->tb_path, h))) - return 0; - else - return Sh_position + 1; -} - -#ifdef CONFIG_REISERFS_CHECK - -int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value); -static void check_internal_node(struct super_block *s, struct buffer_head *bh, - char *mes) -{ - struct disk_child *dc; - int i; - - RFALSE(!bh, "PAP-12336: bh == 0"); - - if (!bh || !B_IS_IN_TREE(bh)) - return; - - RFALSE(!buffer_dirty(bh) && - !(buffer_journaled(bh) || buffer_journal_dirty(bh)), - "PAP-12337: buffer (%b) must be dirty", bh); - dc = B_N_CHILD(bh, 0); - - for (i = 0; i <= B_NR_ITEMS(bh); i++, dc++) { - if (!is_reusable(s, dc_block_number(dc), 1)) { - print_cur_tb(mes); - reiserfs_panic(s, "PAP-12338", - "invalid child pointer %y in %b", - dc, bh); - } - } -} - -static int locked_or_not_in_tree(struct tree_balance *tb, - struct buffer_head *bh, char *which) -{ - if ((!buffer_journal_prepared(bh) && buffer_locked(bh)) || - !B_IS_IN_TREE(bh)) { - reiserfs_warning(tb->tb_sb, "vs-12339", "%s (%b)", which, bh); - return 1; - } - return 0; -} - -static int check_before_balancing(struct tree_balance *tb) -{ - int retval = 0; - - if (REISERFS_SB(tb->tb_sb)->cur_tb) { - reiserfs_panic(tb->tb_sb, "vs-12335", "suspect that schedule " - "occurred based on cur_tb not being null at " - "this point in code. do_balance cannot properly " - "handle concurrent tree accesses on a same " - "mount point."); - } - - /* - * double check that buffers that we will modify are unlocked. - * (fix_nodes should already have prepped all of these for us). - */ - if (tb->lnum[0]) { - retval |= locked_or_not_in_tree(tb, tb->L[0], "L[0]"); - retval |= locked_or_not_in_tree(tb, tb->FL[0], "FL[0]"); - retval |= locked_or_not_in_tree(tb, tb->CFL[0], "CFL[0]"); - check_leaf(tb->L[0]); - } - if (tb->rnum[0]) { - retval |= locked_or_not_in_tree(tb, tb->R[0], "R[0]"); - retval |= locked_or_not_in_tree(tb, tb->FR[0], "FR[0]"); - retval |= locked_or_not_in_tree(tb, tb->CFR[0], "CFR[0]"); - check_leaf(tb->R[0]); - } - retval |= locked_or_not_in_tree(tb, PATH_PLAST_BUFFER(tb->tb_path), - "S[0]"); - check_leaf(PATH_PLAST_BUFFER(tb->tb_path)); - - return retval; -} - -static void check_after_balance_leaf(struct tree_balance *tb) -{ - if (tb->lnum[0]) { - if (B_FREE_SPACE(tb->L[0]) != - MAX_CHILD_SIZE(tb->L[0]) - - dc_size(B_N_CHILD - (tb->FL[0], get_left_neighbor_position(tb, 0)))) { - print_cur_tb("12221"); - reiserfs_panic(tb->tb_sb, "PAP-12355", - "shift to left was incorrect"); - } - } - if (tb->rnum[0]) { - if (B_FREE_SPACE(tb->R[0]) != - MAX_CHILD_SIZE(tb->R[0]) - - dc_size(B_N_CHILD - (tb->FR[0], get_right_neighbor_position(tb, 0)))) { - print_cur_tb("12222"); - reiserfs_panic(tb->tb_sb, "PAP-12360", - "shift to right was incorrect"); - } - } - if (PATH_H_PBUFFER(tb->tb_path, 1) && - (B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0)) != - (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) - - dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1), - PATH_H_POSITION(tb->tb_path, 1)))))) { - int left = B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0)); - int right = (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) - - dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1), - PATH_H_POSITION(tb->tb_path, - 1)))); - print_cur_tb("12223"); - reiserfs_warning(tb->tb_sb, "reiserfs-12363", - "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; " - "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d", - left, - MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)), - PATH_H_PBUFFER(tb->tb_path, 1), - PATH_H_POSITION(tb->tb_path, 1), - dc_size(B_N_CHILD - (PATH_H_PBUFFER(tb->tb_path, 1), - PATH_H_POSITION(tb->tb_path, 1))), - right); - reiserfs_panic(tb->tb_sb, "PAP-12365", "S is incorrect"); - } -} - -static void check_leaf_level(struct tree_balance *tb) -{ - check_leaf(tb->L[0]); - check_leaf(tb->R[0]); - check_leaf(PATH_PLAST_BUFFER(tb->tb_path)); -} - -static void check_internal_levels(struct tree_balance *tb) -{ - int h; - - /* check all internal nodes */ - for (h = 1; tb->insert_size[h]; h++) { - check_internal_node(tb->tb_sb, PATH_H_PBUFFER(tb->tb_path, h), - "BAD BUFFER ON PATH"); - if (tb->lnum[h]) - check_internal_node(tb->tb_sb, tb->L[h], "BAD L"); - if (tb->rnum[h]) - check_internal_node(tb->tb_sb, tb->R[h], "BAD R"); - } - -} - -#endif - -/* - * Now we have all of the buffers that must be used in balancing of - * the tree. We rely on the assumption that schedule() will not occur - * while do_balance works. ( Only interrupt handlers are acceptable.) - * We balance the tree according to the analysis made before this, - * using buffers already obtained. For SMP support it will someday be - * necessary to add ordered locking of tb. - */ - -/* - * Some interesting rules of balancing: - * we delete a maximum of two nodes per level per balancing: we never - * delete R, when we delete two of three nodes L, S, R then we move - * them into R. - * - * we only delete L if we are deleting two nodes, if we delete only - * one node we delete S - * - * if we shift leaves then we shift as much as we can: this is a - * deliberate policy of extremism in node packing which results in - * higher average utilization after repeated random balance operations - * at the cost of more memory copies and more balancing as a result of - * small insertions to full nodes. - * - * if we shift internal nodes we try to evenly balance the node - * utilization, with consequent less balancing at the cost of lower - * utilization. - * - * one could argue that the policy for directories in leaves should be - * that of internal nodes, but we will wait until another day to - * evaluate this.... It would be nice to someday measure and prove - * these assumptions as to what is optimal.... - */ - -static inline void do_balance_starts(struct tree_balance *tb) -{ - /* use print_cur_tb() to see initial state of struct tree_balance */ - - /* store_print_tb (tb); */ - - /* do not delete, just comment it out */ - /* - print_tb(flag, PATH_LAST_POSITION(tb->tb_path), - tb->tb_path->pos_in_item, tb, "check"); - */ - RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB"); -#ifdef CONFIG_REISERFS_CHECK - REISERFS_SB(tb->tb_sb)->cur_tb = tb; -#endif -} - -static inline void do_balance_completed(struct tree_balance *tb) -{ - -#ifdef CONFIG_REISERFS_CHECK - check_leaf_level(tb); - check_internal_levels(tb); - REISERFS_SB(tb->tb_sb)->cur_tb = NULL; -#endif - - /* - * reiserfs_free_block is no longer schedule safe. So, we need to - * put the buffers we want freed on the thrown list during do_balance, - * and then free them now - */ - - REISERFS_SB(tb->tb_sb)->s_do_balance++; - - /* release all nodes hold to perform the balancing */ - unfix_nodes(tb); - - free_thrown(tb); -} - -/* - * do_balance - balance the tree - * - * @tb: tree_balance structure - * @ih: item header of inserted item - * @body: body of inserted item or bytes to paste - * @flag: 'i' - insert, 'd' - delete, 'c' - cut, 'p' paste - * - * Cut means delete part of an item (includes removing an entry from a - * directory). - * - * Delete means delete whole item. - * - * Insert means add a new item into the tree. - * - * Paste means to append to the end of an existing file or to - * insert a directory entry. - */ -void do_balance(struct tree_balance *tb, struct item_head *ih, - const char *body, int flag) -{ - int child_pos; /* position of a child node in its parent */ - int h; /* level of the tree being processed */ - - /* - * in our processing of one level we sometimes determine what - * must be inserted into the next higher level. This insertion - * consists of a key or two keys and their corresponding - * pointers - */ - struct item_head insert_key[2]; - - /* inserted node-ptrs for the next level */ - struct buffer_head *insert_ptr[2]; - - tb->tb_mode = flag; - tb->need_balance_dirty = 0; - - if (FILESYSTEM_CHANGED_TB(tb)) { - reiserfs_panic(tb->tb_sb, "clm-6000", "fs generation has " - "changed"); - } - /* if we have no real work to do */ - if (!tb->insert_size[0]) { - reiserfs_warning(tb->tb_sb, "PAP-12350", - "insert_size == 0, mode == %c", flag); - unfix_nodes(tb); - return; - } - - atomic_inc(&fs_generation(tb->tb_sb)); - do_balance_starts(tb); - - /* - * balance_leaf returns 0 except if combining L R and S into - * one node. see balance_internal() for explanation of this - * line of code. - */ - child_pos = PATH_H_B_ITEM_ORDER(tb->tb_path, 0) + - balance_leaf(tb, ih, body, flag, insert_key, insert_ptr); - -#ifdef CONFIG_REISERFS_CHECK - check_after_balance_leaf(tb); -#endif - - /* Balance internal level of the tree. */ - for (h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++) - child_pos = balance_internal(tb, h, child_pos, insert_key, - insert_ptr); - - do_balance_completed(tb); -} diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c deleted file mode 100644 index 8eb3ad3e8ae9..000000000000 --- a/fs/reiserfs/file.c +++ /dev/null @@ -1,270 +0,0 @@ -/* - * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README - */ - -#include -#include "reiserfs.h" -#include "acl.h" -#include "xattr.h" -#include -#include -#include -#include -#include -#include -#include - -/* - * We pack the tails of files on file close, not at the time they are written. - * This implies an unnecessary copy of the tail and an unnecessary indirect item - * insertion/balancing, for files that are written in one write. - * It avoids unnecessary tail packings (balances) for files that are written in - * multiple writes and are small enough to have tails. - * - * file_release is called by the VFS layer when the file is closed. If - * this is the last open file descriptor, and the file - * small enough to have a tail, and the tail is currently in an - * unformatted node, the tail is converted back into a direct item. - * - * We use reiserfs_truncate_file to pack the tail, since it already has - * all the conditions coded. - */ -static int reiserfs_file_release(struct inode *inode, struct file *filp) -{ - - struct reiserfs_transaction_handle th; - int err; - int jbegin_failure = 0; - - BUG_ON(!S_ISREG(inode->i_mode)); - - if (!atomic_dec_and_mutex_lock(&REISERFS_I(inode)->openers, - &REISERFS_I(inode)->tailpack)) - return 0; - - /* fast out for when nothing needs to be done */ - if ((!(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) || - !tail_has_to_be_packed(inode)) && - REISERFS_I(inode)->i_prealloc_count <= 0) { - mutex_unlock(&REISERFS_I(inode)->tailpack); - return 0; - } - - reiserfs_write_lock(inode->i_sb); - /* - * freeing preallocation only involves relogging blocks that - * are already in the current transaction. preallocation gets - * freed at the end of each transaction, so it is impossible for - * us to log any additional blocks (including quota blocks) - */ - err = journal_begin(&th, inode->i_sb, 1); - if (err) { - /* - * uh oh, we can't allow the inode to go away while there - * is still preallocation blocks pending. Try to join the - * aborted transaction - */ - jbegin_failure = err; - err = journal_join_abort(&th, inode->i_sb); - - if (err) { - /* - * hmpf, our choices here aren't good. We can pin - * the inode which will disallow unmount from ever - * happening, we can do nothing, which will corrupt - * random memory on unmount, or we can forcibly - * remove the file from the preallocation list, which - * will leak blocks on disk. Lets pin the inode - * and let the admin know what is going on. - */ - igrab(inode); - reiserfs_warning(inode->i_sb, "clm-9001", - "pinning inode %lu because the " - "preallocation can't be freed", - inode->i_ino); - goto out; - } - } - reiserfs_update_inode_transaction(inode); - -#ifdef REISERFS_PREALLOCATE - reiserfs_discard_prealloc(&th, inode); -#endif - err = journal_end(&th); - - /* copy back the error code from journal_begin */ - if (!err) - err = jbegin_failure; - - if (!err && - (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) && - tail_has_to_be_packed(inode)) { - - /* - * if regular file is released by last holder and it has been - * appended (we append by unformatted node only) or its direct - * item(s) had to be converted, then it may have to be - * indirect2direct converted - */ - err = reiserfs_truncate_file(inode, 0); - } -out: - reiserfs_write_unlock(inode->i_sb); - mutex_unlock(&REISERFS_I(inode)->tailpack); - return err; -} - -static int reiserfs_file_open(struct inode *inode, struct file *file) -{ - int err = dquot_file_open(inode, file); - - /* somebody might be tailpacking on final close; wait for it */ - if (!atomic_inc_not_zero(&REISERFS_I(inode)->openers)) { - mutex_lock(&REISERFS_I(inode)->tailpack); - atomic_inc(&REISERFS_I(inode)->openers); - mutex_unlock(&REISERFS_I(inode)->tailpack); - } - return err; -} - -void reiserfs_vfs_truncate_file(struct inode *inode) -{ - mutex_lock(&REISERFS_I(inode)->tailpack); - reiserfs_truncate_file(inode, 1); - mutex_unlock(&REISERFS_I(inode)->tailpack); -} - -/* Sync a reiserfs file. */ - -/* - * FIXME: sync_mapping_buffers() never has anything to sync. Can - * be removed... - */ - -static int reiserfs_sync_file(struct file *filp, loff_t start, loff_t end, - int datasync) -{ - struct inode *inode = filp->f_mapping->host; - int err; - int barrier_done; - - err = file_write_and_wait_range(filp, start, end); - if (err) - return err; - - inode_lock(inode); - BUG_ON(!S_ISREG(inode->i_mode)); - err = sync_mapping_buffers(inode->i_mapping); - reiserfs_write_lock(inode->i_sb); - barrier_done = reiserfs_commit_for_inode(inode); - reiserfs_write_unlock(inode->i_sb); - if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb)) - blkdev_issue_flush(inode->i_sb->s_bdev); - inode_unlock(inode); - if (barrier_done < 0) - return barrier_done; - return (err < 0) ? -EIO : 0; -} - -/* taken fs/buffer.c:__block_commit_write */ -int reiserfs_commit_page(struct inode *inode, struct page *page, - unsigned from, unsigned to) -{ - unsigned block_start, block_end; - int partial = 0; - unsigned blocksize; - struct buffer_head *bh, *head; - unsigned long i_size_index = inode->i_size >> PAGE_SHIFT; - int new; - int logit = reiserfs_file_data_log(inode); - struct super_block *s = inode->i_sb; - int bh_per_page = PAGE_SIZE / s->s_blocksize; - struct reiserfs_transaction_handle th; - int ret = 0; - - th.t_trans_id = 0; - blocksize = i_blocksize(inode); - - if (logit) { - reiserfs_write_lock(s); - ret = journal_begin(&th, s, bh_per_page + 1); - if (ret) - goto drop_write_lock; - reiserfs_update_inode_transaction(inode); - } - for (bh = head = page_buffers(page), block_start = 0; - bh != head || !block_start; - block_start = block_end, bh = bh->b_this_page) { - - new = buffer_new(bh); - clear_buffer_new(bh); - block_end = block_start + blocksize; - if (block_end <= from || block_start >= to) { - if (!buffer_uptodate(bh)) - partial = 1; - } else { - set_buffer_uptodate(bh); - if (logit) { - reiserfs_prepare_for_journal(s, bh, 1); - journal_mark_dirty(&th, bh); - } else if (!buffer_dirty(bh)) { - mark_buffer_dirty(bh); - /* - * do data=ordered on any page past the end - * of file and any buffer marked BH_New. - */ - if (reiserfs_data_ordered(inode->i_sb) && - (new || page->index >= i_size_index)) { - reiserfs_add_ordered_list(inode, bh); - } - } - } - } - if (logit) { - ret = journal_end(&th); -drop_write_lock: - reiserfs_write_unlock(s); - } - /* - * If this is a partial write which happened to make all buffers - * uptodate then we can optimize away a bogus read_folio() for - * the next read(). Here we 'discover' whether the page went - * uptodate as a result of this (potentially partial) write. - */ - if (!partial) - SetPageUptodate(page); - return ret; -} - -const struct file_operations reiserfs_file_operations = { - .unlocked_ioctl = reiserfs_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = reiserfs_compat_ioctl, -#endif - .mmap = generic_file_mmap, - .open = reiserfs_file_open, - .release = reiserfs_file_release, - .fsync = reiserfs_sync_file, - .read_iter = generic_file_read_iter, - .write_iter = generic_file_write_iter, - .splice_read = filemap_splice_read, - .splice_write = iter_file_splice_write, - .llseek = generic_file_llseek, -}; - -const struct inode_operations reiserfs_file_inode_operations = { - .setattr = reiserfs_setattr, - .listxattr = reiserfs_listxattr, - .permission = reiserfs_permission, - .get_inode_acl = reiserfs_get_acl, - .set_acl = reiserfs_set_acl, - .fileattr_get = reiserfs_fileattr_get, - .fileattr_set = reiserfs_fileattr_set, -}; - -const struct inode_operations reiserfs_priv_file_inode_operations = { - .setattr = reiserfs_setattr, - .permission = reiserfs_permission, - .fileattr_get = reiserfs_fileattr_get, - .fileattr_set = reiserfs_fileattr_set, -}; diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c deleted file mode 100644 index 6c13a8d9a73c..000000000000 --- a/fs/reiserfs/fix_node.c +++ /dev/null @@ -1,2822 +0,0 @@ -/* - * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README - */ - -#include -#include -#include -#include "reiserfs.h" -#include - -/* - * To make any changes in the tree we find a node that contains item - * to be changed/deleted or position in the node we insert a new item - * to. We call this node S. To do balancing we need to decide what we - * will shift to left/right neighbor, or to a new node, where new item - * will be etc. To make this analysis simpler we build virtual - * node. Virtual node is an array of items, that will replace items of - * node S. (For instance if we are going to delete an item, virtual - * node does not contain it). Virtual node keeps information about - * item sizes and types, mergeability of first and last items, sizes - * of all entries in directory item. We use this array of items when - * calculating what we can shift to neighbors and how many nodes we - * have to have if we do not any shiftings, if we shift to left/right - * neighbor or to both. - */ - -/* - * Takes item number in virtual node, returns number of item - * that it has in source buffer - */ -static inline int old_item_num(int new_num, int affected_item_num, int mode) -{ - if (mode == M_PASTE || mode == M_CUT || new_num < affected_item_num) - return new_num; - - if (mode == M_INSERT) { - - RFALSE(new_num == 0, - "vs-8005: for INSERT mode and item number of inserted item"); - - return new_num - 1; - } - - RFALSE(mode != M_DELETE, - "vs-8010: old_item_num: mode must be M_DELETE (mode = \'%c\'", - mode); - /* delete mode */ - return new_num + 1; -} - -static void create_virtual_node(struct tree_balance *tb, int h) -{ - struct item_head *ih; - struct virtual_node *vn = tb->tb_vn; - int new_num; - struct buffer_head *Sh; /* this comes from tb->S[h] */ - - Sh = PATH_H_PBUFFER(tb->tb_path, h); - - /* size of changed node */ - vn->vn_size = - MAX_CHILD_SIZE(Sh) - B_FREE_SPACE(Sh) + tb->insert_size[h]; - - /* for internal nodes array if virtual items is not created */ - if (h) { - vn->vn_nr_item = (vn->vn_size - DC_SIZE) / (DC_SIZE + KEY_SIZE); - return; - } - - /* number of items in virtual node */ - vn->vn_nr_item = - B_NR_ITEMS(Sh) + ((vn->vn_mode == M_INSERT) ? 1 : 0) - - ((vn->vn_mode == M_DELETE) ? 1 : 0); - - /* first virtual item */ - vn->vn_vi = (struct virtual_item *)(tb->tb_vn + 1); - memset(vn->vn_vi, 0, vn->vn_nr_item * sizeof(struct virtual_item)); - vn->vn_free_ptr += vn->vn_nr_item * sizeof(struct virtual_item); - - /* first item in the node */ - ih = item_head(Sh, 0); - - /* define the mergeability for 0-th item (if it is not being deleted) */ - if (op_is_left_mergeable(&ih->ih_key, Sh->b_size) - && (vn->vn_mode != M_DELETE || vn->vn_affected_item_num)) - vn->vn_vi[0].vi_type |= VI_TYPE_LEFT_MERGEABLE; - - /* - * go through all items that remain in the virtual - * node (except for the new (inserted) one) - */ - for (new_num = 0; new_num < vn->vn_nr_item; new_num++) { - int j; - struct virtual_item *vi = vn->vn_vi + new_num; - int is_affected = - ((new_num != vn->vn_affected_item_num) ? 0 : 1); - - if (is_affected && vn->vn_mode == M_INSERT) - continue; - - /* get item number in source node */ - j = old_item_num(new_num, vn->vn_affected_item_num, - vn->vn_mode); - - vi->vi_item_len += ih_item_len(ih + j) + IH_SIZE; - vi->vi_ih = ih + j; - vi->vi_item = ih_item_body(Sh, ih + j); - vi->vi_uarea = vn->vn_free_ptr; - - /* - * FIXME: there is no check that item operation did not - * consume too much memory - */ - vn->vn_free_ptr += - op_create_vi(vn, vi, is_affected, tb->insert_size[0]); - if (tb->vn_buf + tb->vn_buf_size < vn->vn_free_ptr) - reiserfs_panic(tb->tb_sb, "vs-8030", - "virtual node space consumed"); - - if (!is_affected) - /* this is not being changed */ - continue; - - if (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT) { - vn->vn_vi[new_num].vi_item_len += tb->insert_size[0]; - /* pointer to data which is going to be pasted */ - vi->vi_new_data = vn->vn_data; - } - } - - /* virtual inserted item is not defined yet */ - if (vn->vn_mode == M_INSERT) { - struct virtual_item *vi = vn->vn_vi + vn->vn_affected_item_num; - - RFALSE(vn->vn_ins_ih == NULL, - "vs-8040: item header of inserted item is not specified"); - vi->vi_item_len = tb->insert_size[0]; - vi->vi_ih = vn->vn_ins_ih; - vi->vi_item = vn->vn_data; - vi->vi_uarea = vn->vn_free_ptr; - - op_create_vi(vn, vi, 0 /*not pasted or cut */ , - tb->insert_size[0]); - } - - /* - * set right merge flag we take right delimiting key and - * check whether it is a mergeable item - */ - if (tb->CFR[0]) { - struct reiserfs_key *key; - - key = internal_key(tb->CFR[0], tb->rkey[0]); - if (op_is_left_mergeable(key, Sh->b_size) - && (vn->vn_mode != M_DELETE - || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1)) - vn->vn_vi[vn->vn_nr_item - 1].vi_type |= - VI_TYPE_RIGHT_MERGEABLE; - -#ifdef CONFIG_REISERFS_CHECK - if (op_is_left_mergeable(key, Sh->b_size) && - !(vn->vn_mode != M_DELETE - || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1)) { - /* - * we delete last item and it could be merged - * with right neighbor's first item - */ - if (! - (B_NR_ITEMS(Sh) == 1 - && is_direntry_le_ih(item_head(Sh, 0)) - && ih_entry_count(item_head(Sh, 0)) == 1)) { - /* - * node contains more than 1 item, or item - * is not directory item, or this item - * contains more than 1 entry - */ - print_block(Sh, 0, -1, -1); - reiserfs_panic(tb->tb_sb, "vs-8045", - "rdkey %k, affected item==%d " - "(mode==%c) Must be %c", - key, vn->vn_affected_item_num, - vn->vn_mode, M_DELETE); - } - } -#endif - - } -} - -/* - * Using virtual node check, how many items can be - * shifted to left neighbor - */ -static void check_left(struct tree_balance *tb, int h, int cur_free) -{ - int i; - struct virtual_node *vn = tb->tb_vn; - struct virtual_item *vi; - int d_size, ih_size; - - RFALSE(cur_free < 0, "vs-8050: cur_free (%d) < 0", cur_free); - - /* internal level */ - if (h > 0) { - tb->lnum[h] = cur_free / (DC_SIZE + KEY_SIZE); - return; - } - - /* leaf level */ - - if (!cur_free || !vn->vn_nr_item) { - /* no free space or nothing to move */ - tb->lnum[h] = 0; - tb->lbytes = -1; - return; - } - - RFALSE(!PATH_H_PPARENT(tb->tb_path, 0), - "vs-8055: parent does not exist or invalid"); - - vi = vn->vn_vi; - if ((unsigned int)cur_free >= - (vn->vn_size - - ((vi->vi_type & VI_TYPE_LEFT_MERGEABLE) ? IH_SIZE : 0))) { - /* all contents of S[0] fits into L[0] */ - - RFALSE(vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE, - "vs-8055: invalid mode or balance condition failed"); - - tb->lnum[0] = vn->vn_nr_item; - tb->lbytes = -1; - return; - } - - d_size = 0, ih_size = IH_SIZE; - - /* first item may be merge with last item in left neighbor */ - if (vi->vi_type & VI_TYPE_LEFT_MERGEABLE) - d_size = -((int)IH_SIZE), ih_size = 0; - - tb->lnum[0] = 0; - for (i = 0; i < vn->vn_nr_item; - i++, ih_size = IH_SIZE, d_size = 0, vi++) { - d_size += vi->vi_item_len; - if (cur_free >= d_size) { - /* the item can be shifted entirely */ - cur_free -= d_size; - tb->lnum[0]++; - continue; - } - - /* the item cannot be shifted entirely, try to split it */ - /* - * check whether L[0] can hold ih and at least one byte - * of the item body - */ - - /* cannot shift even a part of the current item */ - if (cur_free <= ih_size) { - tb->lbytes = -1; - return; - } - cur_free -= ih_size; - - tb->lbytes = op_check_left(vi, cur_free, 0, 0); - if (tb->lbytes != -1) - /* count partially shifted item */ - tb->lnum[0]++; - - break; - } - - return; -} - -/* - * Using virtual node check, how many items can be - * shifted to right neighbor - */ -static void check_right(struct tree_balance *tb, int h, int cur_free) -{ - int i; - struct virtual_node *vn = tb->tb_vn; - struct virtual_item *vi; - int d_size, ih_size; - - RFALSE(cur_free < 0, "vs-8070: cur_free < 0"); - - /* internal level */ - if (h > 0) { - tb->rnum[h] = cur_free / (DC_SIZE + KEY_SIZE); - return; - } - - /* leaf level */ - - if (!cur_free || !vn->vn_nr_item) { - /* no free space */ - tb->rnum[h] = 0; - tb->rbytes = -1; - return; - } - - RFALSE(!PATH_H_PPARENT(tb->tb_path, 0), - "vs-8075: parent does not exist or invalid"); - - vi = vn->vn_vi + vn->vn_nr_item - 1; - if ((unsigned int)cur_free >= - (vn->vn_size - - ((vi->vi_type & VI_TYPE_RIGHT_MERGEABLE) ? IH_SIZE : 0))) { - /* all contents of S[0] fits into R[0] */ - - RFALSE(vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE, - "vs-8080: invalid mode or balance condition failed"); - - tb->rnum[h] = vn->vn_nr_item; - tb->rbytes = -1; - return; - } - - d_size = 0, ih_size = IH_SIZE; - - /* last item may be merge with first item in right neighbor */ - if (vi->vi_type & VI_TYPE_RIGHT_MERGEABLE) - d_size = -(int)IH_SIZE, ih_size = 0; - - tb->rnum[0] = 0; - for (i = vn->vn_nr_item - 1; i >= 0; - i--, d_size = 0, ih_size = IH_SIZE, vi--) { - d_size += vi->vi_item_len; - if (cur_free >= d_size) { - /* the item can be shifted entirely */ - cur_free -= d_size; - tb->rnum[0]++; - continue; - } - - /* - * check whether R[0] can hold ih and at least one - * byte of the item body - */ - - /* cannot shift even a part of the current item */ - if (cur_free <= ih_size) { - tb->rbytes = -1; - return; - } - - /* - * R[0] can hold the header of the item and at least - * one byte of its body - */ - cur_free -= ih_size; /* cur_free is still > 0 */ - - tb->rbytes = op_check_right(vi, cur_free); - if (tb->rbytes != -1) - /* count partially shifted item */ - tb->rnum[0]++; - - break; - } - - return; -} - -/* - * from - number of items, which are shifted to left neighbor entirely - * to - number of item, which are shifted to right neighbor entirely - * from_bytes - number of bytes of boundary item (or directory entries) - * which are shifted to left neighbor - * to_bytes - number of bytes of boundary item (or directory entries) - * which are shifted to right neighbor - */ -static int get_num_ver(int mode, struct tree_balance *tb, int h, - int from, int from_bytes, - int to, int to_bytes, short *snum012, int flow) -{ - int i; - int units; - struct virtual_node *vn = tb->tb_vn; - int total_node_size, max_node_size, current_item_size; - int needed_nodes; - - /* position of item we start filling node from */ - int start_item; - - /* position of item we finish filling node by */ - int end_item; - - /* - * number of first bytes (entries for directory) of start_item-th item - * we do not include into node that is being filled - */ - int start_bytes; - - /* - * number of last bytes (entries for directory) of end_item-th item - * we do node include into node that is being filled - */ - int end_bytes; - - /* - * these are positions in virtual item of items, that are split - * between S[0] and S1new and S1new and S2new - */ - int split_item_positions[2]; - - split_item_positions[0] = -1; - split_item_positions[1] = -1; - - /* - * We only create additional nodes if we are in insert or paste mode - * or we are in replace mode at the internal level. If h is 0 and - * the mode is M_REPLACE then in fix_nodes we change the mode to - * paste or insert before we get here in the code. - */ - RFALSE(tb->insert_size[h] < 0 || (mode != M_INSERT && mode != M_PASTE), - "vs-8100: insert_size < 0 in overflow"); - - max_node_size = MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, h)); - - /* - * snum012 [0-2] - number of items, that lay - * to S[0], first new node and second new node - */ - snum012[3] = -1; /* s1bytes */ - snum012[4] = -1; /* s2bytes */ - - /* internal level */ - if (h > 0) { - i = ((to - from) * (KEY_SIZE + DC_SIZE) + DC_SIZE); - if (i == max_node_size) - return 1; - return (i / max_node_size + 1); - } - - /* leaf level */ - needed_nodes = 1; - total_node_size = 0; - - /* start from 'from'-th item */ - start_item = from; - /* skip its first 'start_bytes' units */ - start_bytes = ((from_bytes != -1) ? from_bytes : 0); - - /* last included item is the 'end_item'-th one */ - end_item = vn->vn_nr_item - to - 1; - /* do not count last 'end_bytes' units of 'end_item'-th item */ - end_bytes = (to_bytes != -1) ? to_bytes : 0; - - /* - * go through all item beginning from the start_item-th item - * and ending by the end_item-th item. Do not count first - * 'start_bytes' units of 'start_item'-th item and last - * 'end_bytes' of 'end_item'-th item - */ - for (i = start_item; i <= end_item; i++) { - struct virtual_item *vi = vn->vn_vi + i; - int skip_from_end = ((i == end_item) ? end_bytes : 0); - - RFALSE(needed_nodes > 3, "vs-8105: too many nodes are needed"); - - /* get size of current item */ - current_item_size = vi->vi_item_len; - - /* - * do not take in calculation head part (from_bytes) - * of from-th item - */ - current_item_size -= - op_part_size(vi, 0 /*from start */ , start_bytes); - - /* do not take in calculation tail part of last item */ - current_item_size -= - op_part_size(vi, 1 /*from end */ , skip_from_end); - - /* if item fits into current node entierly */ - if (total_node_size + current_item_size <= max_node_size) { - snum012[needed_nodes - 1]++; - total_node_size += current_item_size; - start_bytes = 0; - continue; - } - - /* - * virtual item length is longer, than max size of item in - * a node. It is impossible for direct item - */ - if (current_item_size > max_node_size) { - RFALSE(is_direct_le_ih(vi->vi_ih), - "vs-8110: " - "direct item length is %d. It can not be longer than %d", - current_item_size, max_node_size); - /* we will try to split it */ - flow = 1; - } - - /* as we do not split items, take new node and continue */ - if (!flow) { - needed_nodes++; - i--; - total_node_size = 0; - continue; - } - - /* - * calculate number of item units which fit into node being - * filled - */ - { - int free_space; - - free_space = max_node_size - total_node_size - IH_SIZE; - units = - op_check_left(vi, free_space, start_bytes, - skip_from_end); - /* - * nothing fits into current node, take new - * node and continue - */ - if (units == -1) { - needed_nodes++, i--, total_node_size = 0; - continue; - } - } - - /* something fits into the current node */ - start_bytes += units; - snum012[needed_nodes - 1 + 3] = units; - - if (needed_nodes > 2) - reiserfs_warning(tb->tb_sb, "vs-8111", - "split_item_position is out of range"); - snum012[needed_nodes - 1]++; - split_item_positions[needed_nodes - 1] = i; - needed_nodes++; - /* continue from the same item with start_bytes != -1 */ - start_item = i; - i--; - total_node_size = 0; - } - - /* - * sum012[4] (if it is not -1) contains number of units of which - * are to be in S1new, snum012[3] - to be in S0. They are supposed - * to be S1bytes and S2bytes correspondingly, so recalculate - */ - if (snum012[4] > 0) { - int split_item_num; - int bytes_to_r, bytes_to_l; - int bytes_to_S1new; - - split_item_num = split_item_positions[1]; - bytes_to_l = - ((from == split_item_num - && from_bytes != -1) ? from_bytes : 0); - bytes_to_r = - ((end_item == split_item_num - && end_bytes != -1) ? end_bytes : 0); - bytes_to_S1new = - ((split_item_positions[0] == - split_item_positions[1]) ? snum012[3] : 0); - - /* s2bytes */ - snum012[4] = - op_unit_num(&vn->vn_vi[split_item_num]) - snum012[4] - - bytes_to_r - bytes_to_l - bytes_to_S1new; - - if (vn->vn_vi[split_item_num].vi_index != TYPE_DIRENTRY && - vn->vn_vi[split_item_num].vi_index != TYPE_INDIRECT) - reiserfs_warning(tb->tb_sb, "vs-8115", - "not directory or indirect item"); - } - - /* now we know S2bytes, calculate S1bytes */ - if (snum012[3] > 0) { - int split_item_num; - int bytes_to_r, bytes_to_l; - int bytes_to_S2new; - - split_item_num = split_item_positions[0]; - bytes_to_l = - ((from == split_item_num - && from_bytes != -1) ? from_bytes : 0); - bytes_to_r = - ((end_item == split_item_num - && end_bytes != -1) ? end_bytes : 0); - bytes_to_S2new = - ((split_item_positions[0] == split_item_positions[1] - && snum012[4] != -1) ? snum012[4] : 0); - - /* s1bytes */ - snum012[3] = - op_unit_num(&vn->vn_vi[split_item_num]) - snum012[3] - - bytes_to_r - bytes_to_l - bytes_to_S2new; - } - - return needed_nodes; -} - - -/* - * Set parameters for balancing. - * Performs write of results of analysis of balancing into structure tb, - * where it will later be used by the functions that actually do the balancing. - * Parameters: - * tb tree_balance structure; - * h current level of the node; - * lnum number of items from S[h] that must be shifted to L[h]; - * rnum number of items from S[h] that must be shifted to R[h]; - * blk_num number of blocks that S[h] will be splitted into; - * s012 number of items that fall into splitted nodes. - * lbytes number of bytes which flow to the left neighbor from the - * item that is not shifted entirely - * rbytes number of bytes which flow to the right neighbor from the - * item that is not shifted entirely - * s1bytes number of bytes which flow to the first new node when - * S[0] splits (this number is contained in s012 array) - */ - -static void set_parameters(struct tree_balance *tb, int h, int lnum, - int rnum, int blk_num, short *s012, int lb, int rb) -{ - - tb->lnum[h] = lnum; - tb->rnum[h] = rnum; - tb->blknum[h] = blk_num; - - /* only for leaf level */ - if (h == 0) { - if (s012 != NULL) { - tb->s0num = *s012++; - tb->snum[0] = *s012++; - tb->snum[1] = *s012++; - tb->sbytes[0] = *s012++; - tb->sbytes[1] = *s012; - } - tb->lbytes = lb; - tb->rbytes = rb; - } - PROC_INFO_ADD(tb->tb_sb, lnum[h], lnum); - PROC_INFO_ADD(tb->tb_sb, rnum[h], rnum); - - PROC_INFO_ADD(tb->tb_sb, lbytes[h], lb); - PROC_INFO_ADD(tb->tb_sb, rbytes[h], rb); -} - -/* - * check if node disappears if we shift tb->lnum[0] items to left - * neighbor and tb->rnum[0] to the right one. - */ -static int is_leaf_removable(struct tree_balance *tb) -{ - struct virtual_node *vn = tb->tb_vn; - int to_left, to_right; - int size; - int remain_items; - - /* - * number of items that will be shifted to left (right) neighbor - * entirely - */ - to_left = tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0); - to_right = tb->rnum[0] - ((tb->rbytes != -1) ? 1 : 0); - remain_items = vn->vn_nr_item; - - /* how many items remain in S[0] after shiftings to neighbors */ - remain_items -= (to_left + to_right); - - /* all content of node can be shifted to neighbors */ - if (remain_items < 1) { - set_parameters(tb, 0, to_left, vn->vn_nr_item - to_left, 0, - NULL, -1, -1); - return 1; - } - - /* S[0] is not removable */ - if (remain_items > 1 || tb->lbytes == -1 || tb->rbytes == -1) - return 0; - - /* check whether we can divide 1 remaining item between neighbors */ - - /* get size of remaining item (in item units) */ - size = op_unit_num(&vn->vn_vi[to_left]); - - if (tb->lbytes + tb->rbytes >= size) { - set_parameters(tb, 0, to_left + 1, to_right + 1, 0, NULL, - tb->lbytes, -1); - return 1; - } - - return 0; -} - -/* check whether L, S, R can be joined in one node */ -static int are_leaves_removable(struct tree_balance *tb, int lfree, int rfree) -{ - struct virtual_node *vn = tb->tb_vn; - int ih_size; - struct buffer_head *S0; - - S0 = PATH_H_PBUFFER(tb->tb_path, 0); - - ih_size = 0; - if (vn->vn_nr_item) { - if (vn->vn_vi[0].vi_type & VI_TYPE_LEFT_MERGEABLE) - ih_size += IH_SIZE; - - if (vn->vn_vi[vn->vn_nr_item - 1]. - vi_type & VI_TYPE_RIGHT_MERGEABLE) - ih_size += IH_SIZE; - } else { - /* there was only one item and it will be deleted */ - struct item_head *ih; - - RFALSE(B_NR_ITEMS(S0) != 1, - "vs-8125: item number must be 1: it is %d", - B_NR_ITEMS(S0)); - - ih = item_head(S0, 0); - if (tb->CFR[0] - && !comp_short_le_keys(&ih->ih_key, - internal_key(tb->CFR[0], - tb->rkey[0]))) - /* - * Directory must be in correct state here: that is - * somewhere at the left side should exist first - * directory item. But the item being deleted can - * not be that first one because its right neighbor - * is item of the same directory. (But first item - * always gets deleted in last turn). So, neighbors - * of deleted item can be merged, so we can save - * ih_size - */ - if (is_direntry_le_ih(ih)) { - ih_size = IH_SIZE; - - /* - * we might check that left neighbor exists - * and is of the same directory - */ - RFALSE(le_ih_k_offset(ih) == DOT_OFFSET, - "vs-8130: first directory item can not be removed until directory is not empty"); - } - - } - - if (MAX_CHILD_SIZE(S0) + vn->vn_size <= rfree + lfree + ih_size) { - set_parameters(tb, 0, -1, -1, -1, NULL, -1, -1); - PROC_INFO_INC(tb->tb_sb, leaves_removable); - return 1; - } - return 0; - -} - -/* when we do not split item, lnum and rnum are numbers of entire items */ -#define SET_PAR_SHIFT_LEFT \ -if (h)\ -{\ - int to_l;\ - \ - to_l = (MAX_NR_KEY(Sh)+1 - lpar + vn->vn_nr_item + 1) / 2 -\ - (MAX_NR_KEY(Sh) + 1 - lpar);\ - \ - set_parameters (tb, h, to_l, 0, lnver, NULL, -1, -1);\ -}\ -else \ -{\ - if (lset==LEFT_SHIFT_FLOW)\ - set_parameters (tb, h, lpar, 0, lnver, snum012+lset,\ - tb->lbytes, -1);\ - else\ - set_parameters (tb, h, lpar - (tb->lbytes!=-1), 0, lnver, snum012+lset,\ - -1, -1);\ -} - -#define SET_PAR_SHIFT_RIGHT \ -if (h)\ -{\ - int to_r;\ - \ - to_r = (MAX_NR_KEY(Sh)+1 - rpar + vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - rpar);\ - \ - set_parameters (tb, h, 0, to_r, rnver, NULL, -1, -1);\ -}\ -else \ -{\ - if (rset==RIGHT_SHIFT_FLOW)\ - set_parameters (tb, h, 0, rpar, rnver, snum012+rset,\ - -1, tb->rbytes);\ - else\ - set_parameters (tb, h, 0, rpar - (tb->rbytes!=-1), rnver, snum012+rset,\ - -1, -1);\ -} - -static void free_buffers_in_tb(struct tree_balance *tb) -{ - int i; - - pathrelse(tb->tb_path); - - for (i = 0; i < MAX_HEIGHT; i++) { - brelse(tb->L[i]); - brelse(tb->R[i]); - brelse(tb->FL[i]); - brelse(tb->FR[i]); - brelse(tb->CFL[i]); - brelse(tb->CFR[i]); - - tb->L[i] = NULL; - tb->R[i] = NULL; - tb->FL[i] = NULL; - tb->FR[i] = NULL; - tb->CFL[i] = NULL; - tb->CFR[i] = NULL; - } -} - -/* - * Get new buffers for storing new nodes that are created while balancing. - * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; - * CARRY_ON - schedule didn't occur while the function worked; - * NO_DISK_SPACE - no disk space. - */ -/* The function is NOT SCHEDULE-SAFE! */ -static int get_empty_nodes(struct tree_balance *tb, int h) -{ - struct buffer_head *new_bh, *Sh = PATH_H_PBUFFER(tb->tb_path, h); - b_blocknr_t *blocknr, blocknrs[MAX_AMOUNT_NEEDED] = { 0, }; - int counter, number_of_freeblk; - int amount_needed; /* number of needed empty blocks */ - int retval = CARRY_ON; - struct super_block *sb = tb->tb_sb; - - /* - * number_of_freeblk is the number of empty blocks which have been - * acquired for use by the balancing algorithm minus the number of - * empty blocks used in the previous levels of the analysis, - * number_of_freeblk = tb->cur_blknum can be non-zero if a schedule - * occurs after empty blocks are acquired, and the balancing analysis - * is then restarted, amount_needed is the number needed by this - * level (h) of the balancing analysis. - * - * Note that for systems with many processes writing, it would be - * more layout optimal to calculate the total number needed by all - * levels and then to run reiserfs_new_blocks to get all of them at - * once. - */ - - /* - * Initiate number_of_freeblk to the amount acquired prior to the - * restart of the analysis or 0 if not restarted, then subtract the - * amount needed by all of the levels of the tree below h. - */ - /* blknum includes S[h], so we subtract 1 in this calculation */ - for (counter = 0, number_of_freeblk = tb->cur_blknum; - counter < h; counter++) - number_of_freeblk -= - (tb->blknum[counter]) ? (tb->blknum[counter] - - 1) : 0; - - /* Allocate missing empty blocks. */ - /* if Sh == 0 then we are getting a new root */ - amount_needed = (Sh) ? (tb->blknum[h] - 1) : 1; - /* - * Amount_needed = the amount that we need more than the - * amount that we have. - */ - if (amount_needed > number_of_freeblk) - amount_needed -= number_of_freeblk; - else /* If we have enough already then there is nothing to do. */ - return CARRY_ON; - - /* - * No need to check quota - is not allocated for blocks used - * for formatted nodes - */ - if (reiserfs_new_form_blocknrs(tb, blocknrs, - amount_needed) == NO_DISK_SPACE) - return NO_DISK_SPACE; - - /* for each blocknumber we just got, get a buffer and stick it on FEB */ - for (blocknr = blocknrs, counter = 0; - counter < amount_needed; blocknr++, counter++) { - - RFALSE(!*blocknr, - "PAP-8135: reiserfs_new_blocknrs failed when got new blocks"); - - new_bh = sb_getblk(sb, *blocknr); - RFALSE(buffer_dirty(new_bh) || - buffer_journaled(new_bh) || - buffer_journal_dirty(new_bh), - "PAP-8140: journaled or dirty buffer %b for the new block", - new_bh); - - /* Put empty buffers into the array. */ - RFALSE(tb->FEB[tb->cur_blknum], - "PAP-8141: busy slot for new buffer"); - - set_buffer_journal_new(new_bh); - tb->FEB[tb->cur_blknum++] = new_bh; - } - - if (retval == CARRY_ON && FILESYSTEM_CHANGED_TB(tb)) - retval = REPEAT_SEARCH; - - return retval; -} - -/* - * Get free space of the left neighbor, which is stored in the parent - * node of the left neighbor. - */ -static int get_lfree(struct tree_balance *tb, int h) -{ - struct buffer_head *l, *f; - int order; - - if ((f = PATH_H_PPARENT(tb->tb_path, h)) == NULL || - (l = tb->FL[h]) == NULL) - return 0; - - if (f == l) - order = PATH_H_B_ITEM_ORDER(tb->tb_path, h) - 1; - else { - order = B_NR_ITEMS(l); - f = l; - } - - return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order))); -} - -/* - * Get free space of the right neighbor, - * which is stored in the parent node of the right neighbor. - */ -static int get_rfree(struct tree_balance *tb, int h) -{ - struct buffer_head *r, *f; - int order; - - if ((f = PATH_H_PPARENT(tb->tb_path, h)) == NULL || - (r = tb->FR[h]) == NULL) - return 0; - - if (f == r) - order = PATH_H_B_ITEM_ORDER(tb->tb_path, h) + 1; - else { - order = 0; - f = r; - } - - return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order))); - -} - -/* Check whether left neighbor is in memory. */ -static int is_left_neighbor_in_cache(struct tree_balance *tb, int h) -{ - struct buffer_head *father, *left; - struct super_block *sb = tb->tb_sb; - b_blocknr_t left_neighbor_blocknr; - int left_neighbor_position; - - /* Father of the left neighbor does not exist. */ - if (!tb->FL[h]) - return 0; - - /* Calculate father of the node to be balanced. */ - father = PATH_H_PBUFFER(tb->tb_path, h + 1); - - RFALSE(!father || - !B_IS_IN_TREE(father) || - !B_IS_IN_TREE(tb->FL[h]) || - !buffer_uptodate(father) || - !buffer_uptodate(tb->FL[h]), - "vs-8165: F[h] (%b) or FL[h] (%b) is invalid", - father, tb->FL[h]); - - /* - * Get position of the pointer to the left neighbor - * into the left father. - */ - left_neighbor_position = (father == tb->FL[h]) ? - tb->lkey[h] : B_NR_ITEMS(tb->FL[h]); - /* Get left neighbor block number. */ - left_neighbor_blocknr = - B_N_CHILD_NUM(tb->FL[h], left_neighbor_position); - /* Look for the left neighbor in the cache. */ - if ((left = sb_find_get_block(sb, left_neighbor_blocknr))) { - - RFALSE(buffer_uptodate(left) && !B_IS_IN_TREE(left), - "vs-8170: left neighbor (%b %z) is not in the tree", - left, left); - put_bh(left); - return 1; - } - - return 0; -} - -#define LEFT_PARENTS 'l' -#define RIGHT_PARENTS 'r' - -static void decrement_key(struct cpu_key *key) -{ - /* call item specific function for this key */ - item_ops[cpu_key_k_type(key)]->decrement_key(key); -} - -/* - * Calculate far left/right parent of the left/right neighbor of the - * current node, that is calculate the left/right (FL[h]/FR[h]) neighbor - * of the parent F[h]. - * Calculate left/right common parent of the current node and L[h]/R[h]. - * Calculate left/right delimiting key position. - * Returns: PATH_INCORRECT - path in the tree is not correct - * SCHEDULE_OCCURRED - schedule occurred while the function worked - * CARRY_ON - schedule didn't occur while the function - * worked - */ -static int get_far_parent(struct tree_balance *tb, - int h, - struct buffer_head **pfather, - struct buffer_head **pcom_father, char c_lr_par) -{ - struct buffer_head *parent; - INITIALIZE_PATH(s_path_to_neighbor_father); - struct treepath *path = tb->tb_path; - struct cpu_key s_lr_father_key; - int counter, - position = INT_MAX, - first_last_position = 0, - path_offset = PATH_H_PATH_OFFSET(path, h); - - /* - * Starting from F[h] go upwards in the tree, and look for the common - * ancestor of F[h], and its neighbor l/r, that should be obtained. - */ - - counter = path_offset; - - RFALSE(counter < FIRST_PATH_ELEMENT_OFFSET, - "PAP-8180: invalid path length"); - - for (; counter > FIRST_PATH_ELEMENT_OFFSET; counter--) { - /* - * Check whether parent of the current buffer in the path - * is really parent in the tree. - */ - if (!B_IS_IN_TREE - (parent = PATH_OFFSET_PBUFFER(path, counter - 1))) - return REPEAT_SEARCH; - - /* Check whether position in the parent is correct. */ - if ((position = - PATH_OFFSET_POSITION(path, - counter - 1)) > - B_NR_ITEMS(parent)) - return REPEAT_SEARCH; - - /* - * Check whether parent at the path really points - * to the child. - */ - if (B_N_CHILD_NUM(parent, position) != - PATH_OFFSET_PBUFFER(path, counter)->b_blocknr) - return REPEAT_SEARCH; - - /* - * Return delimiting key if position in the parent is not - * equal to first/last one. - */ - if (c_lr_par == RIGHT_PARENTS) - first_last_position = B_NR_ITEMS(parent); - if (position != first_last_position) { - *pcom_father = parent; - get_bh(*pcom_father); - /*(*pcom_father = parent)->b_count++; */ - break; - } - } - - /* if we are in the root of the tree, then there is no common father */ - if (counter == FIRST_PATH_ELEMENT_OFFSET) { - /* - * Check whether first buffer in the path is the - * root of the tree. - */ - if (PATH_OFFSET_PBUFFER - (tb->tb_path, - FIRST_PATH_ELEMENT_OFFSET)->b_blocknr == - SB_ROOT_BLOCK(tb->tb_sb)) { - *pfather = *pcom_father = NULL; - return CARRY_ON; - } - return REPEAT_SEARCH; - } - - RFALSE(B_LEVEL(*pcom_father) <= DISK_LEAF_NODE_LEVEL, - "PAP-8185: (%b %z) level too small", - *pcom_father, *pcom_father); - - /* Check whether the common parent is locked. */ - - if (buffer_locked(*pcom_father)) { - - /* Release the write lock while the buffer is busy */ - int depth = reiserfs_write_unlock_nested(tb->tb_sb); - __wait_on_buffer(*pcom_father); - reiserfs_write_lock_nested(tb->tb_sb, depth); - if (FILESYSTEM_CHANGED_TB(tb)) { - brelse(*pcom_father); - return REPEAT_SEARCH; - } - } - - /* - * So, we got common parent of the current node and its - * left/right neighbor. Now we are getting the parent of the - * left/right neighbor. - */ - - /* Form key to get parent of the left/right neighbor. */ - le_key2cpu_key(&s_lr_father_key, - internal_key(*pcom_father, - (c_lr_par == - LEFT_PARENTS) ? (tb->lkey[h - 1] = - position - - 1) : (tb->rkey[h - - 1] = - position))); - - if (c_lr_par == LEFT_PARENTS) - decrement_key(&s_lr_father_key); - - if (search_by_key - (tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father, - h + 1) == IO_ERROR) - /* path is released */ - return IO_ERROR; - - if (FILESYSTEM_CHANGED_TB(tb)) { - pathrelse(&s_path_to_neighbor_father); - brelse(*pcom_father); - return REPEAT_SEARCH; - } - - *pfather = PATH_PLAST_BUFFER(&s_path_to_neighbor_father); - - RFALSE(B_LEVEL(*pfather) != h + 1, - "PAP-8190: (%b %z) level too small", *pfather, *pfather); - RFALSE(s_path_to_neighbor_father.path_length < - FIRST_PATH_ELEMENT_OFFSET, "PAP-8192: path length is too small"); - - s_path_to_neighbor_father.path_length--; - pathrelse(&s_path_to_neighbor_father); - return CARRY_ON; -} - -/* - * Get parents of neighbors of node in the path(S[path_offset]) and - * common parents of S[path_offset] and L[path_offset]/R[path_offset]: - * F[path_offset], FL[path_offset], FR[path_offset], CFL[path_offset], - * CFR[path_offset]. - * Calculate numbers of left and right delimiting keys position: - * lkey[path_offset], rkey[path_offset]. - * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked - * CARRY_ON - schedule didn't occur while the function worked - */ -static int get_parents(struct tree_balance *tb, int h) -{ - struct treepath *path = tb->tb_path; - int position, - ret, - path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h); - struct buffer_head *curf, *curcf; - - /* Current node is the root of the tree or will be root of the tree */ - if (path_offset <= FIRST_PATH_ELEMENT_OFFSET) { - /* - * The root can not have parents. - * Release nodes which previously were obtained as - * parents of the current node neighbors. - */ - brelse(tb->FL[h]); - brelse(tb->CFL[h]); - brelse(tb->FR[h]); - brelse(tb->CFR[h]); - tb->FL[h] = NULL; - tb->CFL[h] = NULL; - tb->FR[h] = NULL; - tb->CFR[h] = NULL; - return CARRY_ON; - } - - /* Get parent FL[path_offset] of L[path_offset]. */ - position = PATH_OFFSET_POSITION(path, path_offset - 1); - if (position) { - /* Current node is not the first child of its parent. */ - curf = PATH_OFFSET_PBUFFER(path, path_offset - 1); - curcf = PATH_OFFSET_PBUFFER(path, path_offset - 1); - get_bh(curf); - get_bh(curf); - tb->lkey[h] = position - 1; - } else { - /* - * Calculate current parent of L[path_offset], which is the - * left neighbor of the current node. Calculate current - * common parent of L[path_offset] and the current node. - * Note that CFL[path_offset] not equal FL[path_offset] and - * CFL[path_offset] not equal F[path_offset]. - * Calculate lkey[path_offset]. - */ - if ((ret = get_far_parent(tb, h + 1, &curf, - &curcf, - LEFT_PARENTS)) != CARRY_ON) - return ret; - } - - brelse(tb->FL[h]); - tb->FL[h] = curf; /* New initialization of FL[h]. */ - brelse(tb->CFL[h]); - tb->CFL[h] = curcf; /* New initialization of CFL[h]. */ - - RFALSE((curf && !B_IS_IN_TREE(curf)) || - (curcf && !B_IS_IN_TREE(curcf)), - "PAP-8195: FL (%b) or CFL (%b) is invalid", curf, curcf); - - /* Get parent FR[h] of R[h]. */ - - /* Current node is the last child of F[h]. FR[h] != F[h]. */ - if (position == B_NR_ITEMS(PATH_H_PBUFFER(path, h + 1))) { - /* - * Calculate current parent of R[h], which is the right - * neighbor of F[h]. Calculate current common parent of - * R[h] and current node. Note that CFR[h] not equal - * FR[path_offset] and CFR[h] not equal F[h]. - */ - if ((ret = - get_far_parent(tb, h + 1, &curf, &curcf, - RIGHT_PARENTS)) != CARRY_ON) - return ret; - } else { - /* Current node is not the last child of its parent F[h]. */ - curf = PATH_OFFSET_PBUFFER(path, path_offset - 1); - curcf = PATH_OFFSET_PBUFFER(path, path_offset - 1); - get_bh(curf); - get_bh(curf); - tb->rkey[h] = position; - } - - brelse(tb->FR[h]); - /* New initialization of FR[path_offset]. */ - tb->FR[h] = curf; - - brelse(tb->CFR[h]); - /* New initialization of CFR[path_offset]. */ - tb->CFR[h] = curcf; - - RFALSE((curf && !B_IS_IN_TREE(curf)) || - (curcf && !B_IS_IN_TREE(curcf)), - "PAP-8205: FR (%b) or CFR (%b) is invalid", curf, curcf); - - return CARRY_ON; -} - -/* - * it is possible to remove node as result of shiftings to - * neighbors even when we insert or paste item. - */ -static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree, - struct tree_balance *tb, int h) -{ - struct buffer_head *Sh = PATH_H_PBUFFER(tb->tb_path, h); - int levbytes = tb->insert_size[h]; - struct item_head *ih; - struct reiserfs_key *r_key = NULL; - - ih = item_head(Sh, 0); - if (tb->CFR[h]) - r_key = internal_key(tb->CFR[h], tb->rkey[h]); - - if (lfree + rfree + sfree < MAX_CHILD_SIZE(Sh) + levbytes - /* shifting may merge items which might save space */ - - - ((!h - && op_is_left_mergeable(&ih->ih_key, Sh->b_size)) ? IH_SIZE : 0) - - - ((!h && r_key - && op_is_left_mergeable(r_key, Sh->b_size)) ? IH_SIZE : 0) - + ((h) ? KEY_SIZE : 0)) { - /* node can not be removed */ - if (sfree >= levbytes) { - /* new item fits into node S[h] without any shifting */ - if (!h) - tb->s0num = - B_NR_ITEMS(Sh) + - ((mode == M_INSERT) ? 1 : 0); - set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); - return NO_BALANCING_NEEDED; - } - } - PROC_INFO_INC(tb->tb_sb, can_node_be_removed[h]); - return !NO_BALANCING_NEEDED; -} - -/* - * Check whether current node S[h] is balanced when increasing its size by - * Inserting or Pasting. - * Calculate parameters for balancing for current level h. - * Parameters: - * tb tree_balance structure; - * h current level of the node; - * inum item number in S[h]; - * mode i - insert, p - paste; - * Returns: 1 - schedule occurred; - * 0 - balancing for higher levels needed; - * -1 - no balancing for higher levels needed; - * -2 - no disk space. - */ -/* ip means Inserting or Pasting */ -static int ip_check_balance(struct tree_balance *tb, int h) -{ - struct virtual_node *vn = tb->tb_vn; - /* - * Number of bytes that must be inserted into (value is negative - * if bytes are deleted) buffer which contains node being balanced. - * The mnemonic is that the attempted change in node space used - * level is levbytes bytes. - */ - int levbytes; - int ret; - - int lfree, sfree, rfree /* free space in L, S and R */ ; - - /* - * nver is short for number of vertixes, and lnver is the number if - * we shift to the left, rnver is the number if we shift to the - * right, and lrnver is the number if we shift in both directions. - * The goal is to minimize first the number of vertixes, and second, - * the number of vertixes whose contents are changed by shifting, - * and third the number of uncached vertixes whose contents are - * changed by shifting and must be read from disk. - */ - int nver, lnver, rnver, lrnver; - - /* - * used at leaf level only, S0 = S[0] is the node being balanced, - * sInum [ I = 0,1,2 ] is the number of items that will - * remain in node SI after balancing. S1 and S2 are new - * nodes that might be created. - */ - - /* - * we perform 8 calls to get_num_ver(). For each call we - * calculate five parameters. where 4th parameter is s1bytes - * and 5th - s2bytes - * - * s0num, s1num, s2num for 8 cases - * 0,1 - do not shift and do not shift but bottle - * 2 - shift only whole item to left - * 3 - shift to left and bottle as much as possible - * 4,5 - shift to right (whole items and as much as possible - * 6,7 - shift to both directions (whole items and as much as possible) - */ - short snum012[40] = { 0, }; - - /* Sh is the node whose balance is currently being checked */ - struct buffer_head *Sh; - - Sh = PATH_H_PBUFFER(tb->tb_path, h); - levbytes = tb->insert_size[h]; - - /* Calculate balance parameters for creating new root. */ - if (!Sh) { - if (!h) - reiserfs_panic(tb->tb_sb, "vs-8210", - "S[0] can not be 0"); - switch (ret = get_empty_nodes(tb, h)) { - /* no balancing for higher levels needed */ - case CARRY_ON: - set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); - return NO_BALANCING_NEEDED; - - case NO_DISK_SPACE: - case REPEAT_SEARCH: - return ret; - default: - reiserfs_panic(tb->tb_sb, "vs-8215", "incorrect " - "return value of get_empty_nodes"); - } - } - - /* get parents of S[h] neighbors. */ - ret = get_parents(tb, h); - if (ret != CARRY_ON) - return ret; - - sfree = B_FREE_SPACE(Sh); - - /* get free space of neighbors */ - rfree = get_rfree(tb, h); - lfree = get_lfree(tb, h); - - /* and new item fits into node S[h] without any shifting */ - if (can_node_be_removed(vn->vn_mode, lfree, sfree, rfree, tb, h) == - NO_BALANCING_NEEDED) - return NO_BALANCING_NEEDED; - - create_virtual_node(tb, h); - - /* - * determine maximal number of items we can shift to the left - * neighbor (in tb structure) and the maximal number of bytes - * that can flow to the left neighbor from the left most liquid - * item that cannot be shifted from S[0] entirely (returned value) - */ - check_left(tb, h, lfree); - - /* - * determine maximal number of items we can shift to the right - * neighbor (in tb structure) and the maximal number of bytes - * that can flow to the right neighbor from the right most liquid - * item that cannot be shifted from S[0] entirely (returned value) - */ - check_right(tb, h, rfree); - - /* - * all contents of internal node S[h] can be moved into its - * neighbors, S[h] will be removed after balancing - */ - if (h && (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)) { - int to_r; - - /* - * Since we are working on internal nodes, and our internal - * nodes have fixed size entries, then we can balance by the - * number of items rather than the space they consume. In this - * routine we set the left node equal to the right node, - * allowing a difference of less than or equal to 1 child - * pointer. - */ - to_r = - ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] + - vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - - tb->rnum[h]); - set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL, - -1, -1); - return CARRY_ON; - } - - /* - * this checks balance condition, that any two neighboring nodes - * can not fit in one node - */ - RFALSE(h && - (tb->lnum[h] >= vn->vn_nr_item + 1 || - tb->rnum[h] >= vn->vn_nr_item + 1), - "vs-8220: tree is not balanced on internal level"); - RFALSE(!h && ((tb->lnum[h] >= vn->vn_nr_item && (tb->lbytes == -1)) || - (tb->rnum[h] >= vn->vn_nr_item && (tb->rbytes == -1))), - "vs-8225: tree is not balanced on leaf level"); - - /* - * all contents of S[0] can be moved into its neighbors - * S[0] will be removed after balancing. - */ - if (!h && is_leaf_removable(tb)) - return CARRY_ON; - - /* - * why do we perform this check here rather than earlier?? - * Answer: we can win 1 node in some cases above. Moreover we - * checked it above, when we checked, that S[0] is not removable - * in principle - */ - - /* new item fits into node S[h] without any shifting */ - if (sfree >= levbytes) { - if (!h) - tb->s0num = vn->vn_nr_item; - set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); - return NO_BALANCING_NEEDED; - } - - { - int lpar, rpar, nset, lset, rset, lrset; - /* regular overflowing of the node */ - - /* - * get_num_ver works in 2 modes (FLOW & NO_FLOW) - * lpar, rpar - number of items we can shift to left/right - * neighbor (including splitting item) - * nset, lset, rset, lrset - shows, whether flowing items - * give better packing - */ -#define FLOW 1 -#define NO_FLOW 0 /* do not any splitting */ - - /* we choose one of the following */ -#define NOTHING_SHIFT_NO_FLOW 0 -#define NOTHING_SHIFT_FLOW 5 -#define LEFT_SHIFT_NO_FLOW 10 -#define LEFT_SHIFT_FLOW 15 -#define RIGHT_SHIFT_NO_FLOW 20 -#define RIGHT_SHIFT_FLOW 25 -#define LR_SHIFT_NO_FLOW 30 -#define LR_SHIFT_FLOW 35 - - lpar = tb->lnum[h]; - rpar = tb->rnum[h]; - - /* - * calculate number of blocks S[h] must be split into when - * nothing is shifted to the neighbors, as well as number of - * items in each part of the split node (s012 numbers), - * and number of bytes (s1bytes) of the shared drop which - * flow to S1 if any - */ - nset = NOTHING_SHIFT_NO_FLOW; - nver = get_num_ver(vn->vn_mode, tb, h, - 0, -1, h ? vn->vn_nr_item : 0, -1, - snum012, NO_FLOW); - - if (!h) { - int nver1; - - /* - * note, that in this case we try to bottle - * between S[0] and S1 (S1 - the first new node) - */ - nver1 = get_num_ver(vn->vn_mode, tb, h, - 0, -1, 0, -1, - snum012 + NOTHING_SHIFT_FLOW, FLOW); - if (nver > nver1) - nset = NOTHING_SHIFT_FLOW, nver = nver1; - } - - /* - * calculate number of blocks S[h] must be split into when - * l_shift_num first items and l_shift_bytes of the right - * most liquid item to be shifted are shifted to the left - * neighbor, as well as number of items in each part of the - * splitted node (s012 numbers), and number of bytes - * (s1bytes) of the shared drop which flow to S1 if any - */ - lset = LEFT_SHIFT_NO_FLOW; - lnver = get_num_ver(vn->vn_mode, tb, h, - lpar - ((h || tb->lbytes == -1) ? 0 : 1), - -1, h ? vn->vn_nr_item : 0, -1, - snum012 + LEFT_SHIFT_NO_FLOW, NO_FLOW); - if (!h) { - int lnver1; - - lnver1 = get_num_ver(vn->vn_mode, tb, h, - lpar - - ((tb->lbytes != -1) ? 1 : 0), - tb->lbytes, 0, -1, - snum012 + LEFT_SHIFT_FLOW, FLOW); - if (lnver > lnver1) - lset = LEFT_SHIFT_FLOW, lnver = lnver1; - } - - /* - * calculate number of blocks S[h] must be split into when - * r_shift_num first items and r_shift_bytes of the left most - * liquid item to be shifted are shifted to the right neighbor, - * as well as number of items in each part of the splitted - * node (s012 numbers), and number of bytes (s1bytes) of the - * shared drop which flow to S1 if any - */ - rset = RIGHT_SHIFT_NO_FLOW; - rnver = get_num_ver(vn->vn_mode, tb, h, - 0, -1, - h ? (vn->vn_nr_item - rpar) : (rpar - - ((tb-> - rbytes != - -1) ? 1 : - 0)), -1, - snum012 + RIGHT_SHIFT_NO_FLOW, NO_FLOW); - if (!h) { - int rnver1; - - rnver1 = get_num_ver(vn->vn_mode, tb, h, - 0, -1, - (rpar - - ((tb->rbytes != -1) ? 1 : 0)), - tb->rbytes, - snum012 + RIGHT_SHIFT_FLOW, FLOW); - - if (rnver > rnver1) - rset = RIGHT_SHIFT_FLOW, rnver = rnver1; - } - - /* - * calculate number of blocks S[h] must be split into when - * items are shifted in both directions, as well as number - * of items in each part of the splitted node (s012 numbers), - * and number of bytes (s1bytes) of the shared drop which - * flow to S1 if any - */ - lrset = LR_SHIFT_NO_FLOW; - lrnver = get_num_ver(vn->vn_mode, tb, h, - lpar - ((h || tb->lbytes == -1) ? 0 : 1), - -1, - h ? (vn->vn_nr_item - rpar) : (rpar - - ((tb-> - rbytes != - -1) ? 1 : - 0)), -1, - snum012 + LR_SHIFT_NO_FLOW, NO_FLOW); - if (!h) { - int lrnver1; - - lrnver1 = get_num_ver(vn->vn_mode, tb, h, - lpar - - ((tb->lbytes != -1) ? 1 : 0), - tb->lbytes, - (rpar - - ((tb->rbytes != -1) ? 1 : 0)), - tb->rbytes, - snum012 + LR_SHIFT_FLOW, FLOW); - if (lrnver > lrnver1) - lrset = LR_SHIFT_FLOW, lrnver = lrnver1; - } - - /* - * Our general shifting strategy is: - * 1) to minimized number of new nodes; - * 2) to minimized number of neighbors involved in shifting; - * 3) to minimized number of disk reads; - */ - - /* we can win TWO or ONE nodes by shifting in both directions */ - if (lrnver < lnver && lrnver < rnver) { - RFALSE(h && - (tb->lnum[h] != 1 || - tb->rnum[h] != 1 || - lrnver != 1 || rnver != 2 || lnver != 2 - || h != 1), "vs-8230: bad h"); - if (lrset == LR_SHIFT_FLOW) - set_parameters(tb, h, tb->lnum[h], tb->rnum[h], - lrnver, snum012 + lrset, - tb->lbytes, tb->rbytes); - else - set_parameters(tb, h, - tb->lnum[h] - - ((tb->lbytes == -1) ? 0 : 1), - tb->rnum[h] - - ((tb->rbytes == -1) ? 0 : 1), - lrnver, snum012 + lrset, -1, -1); - - return CARRY_ON; - } - - /* - * if shifting doesn't lead to better packing - * then don't shift - */ - if (nver == lrnver) { - set_parameters(tb, h, 0, 0, nver, snum012 + nset, -1, - -1); - return CARRY_ON; - } - - /* - * now we know that for better packing shifting in only one - * direction either to the left or to the right is required - */ - - /* - * if shifting to the left is better than - * shifting to the right - */ - if (lnver < rnver) { - SET_PAR_SHIFT_LEFT; - return CARRY_ON; - } - - /* - * if shifting to the right is better than - * shifting to the left - */ - if (lnver > rnver) { - SET_PAR_SHIFT_RIGHT; - return CARRY_ON; - } - - /* - * now shifting in either direction gives the same number - * of nodes and we can make use of the cached neighbors - */ - if (is_left_neighbor_in_cache(tb, h)) { - SET_PAR_SHIFT_LEFT; - return CARRY_ON; - } - - /* - * shift to the right independently on whether the - * right neighbor in cache or not - */ - SET_PAR_SHIFT_RIGHT; - return CARRY_ON; - } -} - -/* - * Check whether current node S[h] is balanced when Decreasing its size by - * Deleting or Cutting for INTERNAL node of S+tree. - * Calculate parameters for balancing for current level h. - * Parameters: - * tb tree_balance structure; - * h current level of the node; - * inum item number in S[h]; - * mode i - insert, p - paste; - * Returns: 1 - schedule occurred; - * 0 - balancing for higher levels needed; - * -1 - no balancing for higher levels needed; - * -2 - no disk space. - * - * Note: Items of internal nodes have fixed size, so the balance condition for - * the internal part of S+tree is as for the B-trees. - */ -static int dc_check_balance_internal(struct tree_balance *tb, int h) -{ - struct virtual_node *vn = tb->tb_vn; - - /* - * Sh is the node whose balance is currently being checked, - * and Fh is its father. - */ - struct buffer_head *Sh, *Fh; - int ret; - int lfree, rfree /* free space in L and R */ ; - - Sh = PATH_H_PBUFFER(tb->tb_path, h); - Fh = PATH_H_PPARENT(tb->tb_path, h); - - /* - * using tb->insert_size[h], which is negative in this case, - * create_virtual_node calculates: - * new_nr_item = number of items node would have if operation is - * performed without balancing (new_nr_item); - */ - create_virtual_node(tb, h); - - if (!Fh) { /* S[h] is the root. */ - /* no balancing for higher levels needed */ - if (vn->vn_nr_item > 0) { - set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); - return NO_BALANCING_NEEDED; - } - /* - * new_nr_item == 0. - * Current root will be deleted resulting in - * decrementing the tree height. - */ - set_parameters(tb, h, 0, 0, 0, NULL, -1, -1); - return CARRY_ON; - } - - if ((ret = get_parents(tb, h)) != CARRY_ON) - return ret; - - /* get free space of neighbors */ - rfree = get_rfree(tb, h); - lfree = get_lfree(tb, h); - - /* determine maximal number of items we can fit into neighbors */ - check_left(tb, h, lfree); - check_right(tb, h, rfree); - - /* - * Balance condition for the internal node is valid. - * In this case we balance only if it leads to better packing. - */ - if (vn->vn_nr_item >= MIN_NR_KEY(Sh)) { - /* - * Here we join S[h] with one of its neighbors, - * which is impossible with greater values of new_nr_item. - */ - if (vn->vn_nr_item == MIN_NR_KEY(Sh)) { - /* All contents of S[h] can be moved to L[h]. */ - if (tb->lnum[h] >= vn->vn_nr_item + 1) { - int n; - int order_L; - - order_L = - ((n = - PATH_H_B_ITEM_ORDER(tb->tb_path, - h)) == - 0) ? B_NR_ITEMS(tb->FL[h]) : n - 1; - n = dc_size(B_N_CHILD(tb->FL[h], order_L)) / - (DC_SIZE + KEY_SIZE); - set_parameters(tb, h, -n - 1, 0, 0, NULL, -1, - -1); - return CARRY_ON; - } - - /* All contents of S[h] can be moved to R[h]. */ - if (tb->rnum[h] >= vn->vn_nr_item + 1) { - int n; - int order_R; - - order_R = - ((n = - PATH_H_B_ITEM_ORDER(tb->tb_path, - h)) == - B_NR_ITEMS(Fh)) ? 0 : n + 1; - n = dc_size(B_N_CHILD(tb->FR[h], order_R)) / - (DC_SIZE + KEY_SIZE); - set_parameters(tb, h, 0, -n - 1, 0, NULL, -1, - -1); - return CARRY_ON; - } - } - - /* - * All contents of S[h] can be moved to the neighbors - * (L[h] & R[h]). - */ - if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) { - int to_r; - - to_r = - ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - - tb->rnum[h] + vn->vn_nr_item + 1) / 2 - - (MAX_NR_KEY(Sh) + 1 - tb->rnum[h]); - set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, - 0, NULL, -1, -1); - return CARRY_ON; - } - - /* Balancing does not lead to better packing. */ - set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); - return NO_BALANCING_NEEDED; - } - - /* - * Current node contain insufficient number of items. - * Balancing is required. - */ - /* Check whether we can merge S[h] with left neighbor. */ - if (tb->lnum[h] >= vn->vn_nr_item + 1) - if (is_left_neighbor_in_cache(tb, h) - || tb->rnum[h] < vn->vn_nr_item + 1 || !tb->FR[h]) { - int n; - int order_L; - - order_L = - ((n = - PATH_H_B_ITEM_ORDER(tb->tb_path, - h)) == - 0) ? B_NR_ITEMS(tb->FL[h]) : n - 1; - n = dc_size(B_N_CHILD(tb->FL[h], order_L)) / (DC_SIZE + - KEY_SIZE); - set_parameters(tb, h, -n - 1, 0, 0, NULL, -1, -1); - return CARRY_ON; - } - - /* Check whether we can merge S[h] with right neighbor. */ - if (tb->rnum[h] >= vn->vn_nr_item + 1) { - int n; - int order_R; - - order_R = - ((n = - PATH_H_B_ITEM_ORDER(tb->tb_path, - h)) == B_NR_ITEMS(Fh)) ? 0 : (n + 1); - n = dc_size(B_N_CHILD(tb->FR[h], order_R)) / (DC_SIZE + - KEY_SIZE); - set_parameters(tb, h, 0, -n - 1, 0, NULL, -1, -1); - return CARRY_ON; - } - - /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */ - if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) { - int to_r; - - to_r = - ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] + - vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - - tb->rnum[h]); - set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL, - -1, -1); - return CARRY_ON; - } - - /* For internal nodes try to borrow item from a neighbor */ - RFALSE(!tb->FL[h] && !tb->FR[h], "vs-8235: trying to borrow for root"); - - /* Borrow one or two items from caching neighbor */ - if (is_left_neighbor_in_cache(tb, h) || !tb->FR[h]) { - int from_l; - - from_l = - (MAX_NR_KEY(Sh) + 1 - tb->lnum[h] + vn->vn_nr_item + - 1) / 2 - (vn->vn_nr_item + 1); - set_parameters(tb, h, -from_l, 0, 1, NULL, -1, -1); - return CARRY_ON; - } - - set_parameters(tb, h, 0, - -((MAX_NR_KEY(Sh) + 1 - tb->rnum[h] + vn->vn_nr_item + - 1) / 2 - (vn->vn_nr_item + 1)), 1, NULL, -1, -1); - return CARRY_ON; -} - -/* - * Check whether current node S[h] is balanced when Decreasing its size by - * Deleting or Truncating for LEAF node of S+tree. - * Calculate parameters for balancing for current level h. - * Parameters: - * tb tree_balance structure; - * h current level of the node; - * inum item number in S[h]; - * mode i - insert, p - paste; - * Returns: 1 - schedule occurred; - * 0 - balancing for higher levels needed; - * -1 - no balancing for higher levels needed; - * -2 - no disk space. - */ -static int dc_check_balance_leaf(struct tree_balance *tb, int h) -{ - struct virtual_node *vn = tb->tb_vn; - - /* - * Number of bytes that must be deleted from - * (value is negative if bytes are deleted) buffer which - * contains node being balanced. The mnemonic is that the - * attempted change in node space used level is levbytes bytes. - */ - int levbytes; - - /* the maximal item size */ - int maxsize, ret; - - /* - * S0 is the node whose balance is currently being checked, - * and F0 is its father. - */ - struct buffer_head *S0, *F0; - int lfree, rfree /* free space in L and R */ ; - - S0 = PATH_H_PBUFFER(tb->tb_path, 0); - F0 = PATH_H_PPARENT(tb->tb_path, 0); - - levbytes = tb->insert_size[h]; - - maxsize = MAX_CHILD_SIZE(S0); /* maximal possible size of an item */ - - if (!F0) { /* S[0] is the root now. */ - - RFALSE(-levbytes >= maxsize - B_FREE_SPACE(S0), - "vs-8240: attempt to create empty buffer tree"); - - set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); - return NO_BALANCING_NEEDED; - } - - if ((ret = get_parents(tb, h)) != CARRY_ON) - return ret; - - /* get free space of neighbors */ - rfree = get_rfree(tb, h); - lfree = get_lfree(tb, h); - - create_virtual_node(tb, h); - - /* if 3 leaves can be merge to one, set parameters and return */ - if (are_leaves_removable(tb, lfree, rfree)) - return CARRY_ON; - - /* - * determine maximal number of items we can shift to the left/right - * neighbor and the maximal number of bytes that can flow to the - * left/right neighbor from the left/right most liquid item that - * cannot be shifted from S[0] entirely - */ - check_left(tb, h, lfree); - check_right(tb, h, rfree); - - /* check whether we can merge S with left neighbor. */ - if (tb->lnum[0] >= vn->vn_nr_item && tb->lbytes == -1) - if (is_left_neighbor_in_cache(tb, h) || ((tb->rnum[0] - ((tb->rbytes == -1) ? 0 : 1)) < vn->vn_nr_item) || /* S can not be merged with R */ - !tb->FR[h]) { - - RFALSE(!tb->FL[h], - "vs-8245: dc_check_balance_leaf: FL[h] must exist"); - - /* set parameter to merge S[0] with its left neighbor */ - set_parameters(tb, h, -1, 0, 0, NULL, -1, -1); - return CARRY_ON; - } - - /* check whether we can merge S[0] with right neighbor. */ - if (tb->rnum[0] >= vn->vn_nr_item && tb->rbytes == -1) { - set_parameters(tb, h, 0, -1, 0, NULL, -1, -1); - return CARRY_ON; - } - - /* - * All contents of S[0] can be moved to the neighbors (L[0] & R[0]). - * Set parameters and return - */ - if (is_leaf_removable(tb)) - return CARRY_ON; - - /* Balancing is not required. */ - tb->s0num = vn->vn_nr_item; - set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); - return NO_BALANCING_NEEDED; -} - -/* - * Check whether current node S[h] is balanced when Decreasing its size by - * Deleting or Cutting. - * Calculate parameters for balancing for current level h. - * Parameters: - * tb tree_balance structure; - * h current level of the node; - * inum item number in S[h]; - * mode d - delete, c - cut. - * Returns: 1 - schedule occurred; - * 0 - balancing for higher levels needed; - * -1 - no balancing for higher levels needed; - * -2 - no disk space. - */ -static int dc_check_balance(struct tree_balance *tb, int h) -{ - RFALSE(!(PATH_H_PBUFFER(tb->tb_path, h)), - "vs-8250: S is not initialized"); - - if (h) - return dc_check_balance_internal(tb, h); - else - return dc_check_balance_leaf(tb, h); -} - -/* - * Check whether current node S[h] is balanced. - * Calculate parameters for balancing for current level h. - * Parameters: - * - * tb tree_balance structure: - * - * tb is a large structure that must be read about in the header - * file at the same time as this procedure if the reader is - * to successfully understand this procedure - * - * h current level of the node; - * inum item number in S[h]; - * mode i - insert, p - paste, d - delete, c - cut. - * Returns: 1 - schedule occurred; - * 0 - balancing for higher levels needed; - * -1 - no balancing for higher levels needed; - * -2 - no disk space. - */ -static int check_balance(int mode, - struct tree_balance *tb, - int h, - int inum, - int pos_in_item, - struct item_head *ins_ih, const void *data) -{ - struct virtual_node *vn; - - vn = tb->tb_vn = (struct virtual_node *)(tb->vn_buf); - vn->vn_free_ptr = (char *)(tb->tb_vn + 1); - vn->vn_mode = mode; - vn->vn_affected_item_num = inum; - vn->vn_pos_in_item = pos_in_item; - vn->vn_ins_ih = ins_ih; - vn->vn_data = data; - - RFALSE(mode == M_INSERT && !vn->vn_ins_ih, - "vs-8255: ins_ih can not be 0 in insert mode"); - - /* Calculate balance parameters when size of node is increasing. */ - if (tb->insert_size[h] > 0) - return ip_check_balance(tb, h); - - /* Calculate balance parameters when size of node is decreasing. */ - return dc_check_balance(tb, h); -} - -/* Check whether parent at the path is the really parent of the current node.*/ -static int get_direct_parent(struct tree_balance *tb, int h) -{ - struct buffer_head *bh; - struct treepath *path = tb->tb_path; - int position, - path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h); - - /* We are in the root or in the new root. */ - if (path_offset <= FIRST_PATH_ELEMENT_OFFSET) { - - RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET - 1, - "PAP-8260: invalid offset in the path"); - - if (PATH_OFFSET_PBUFFER(path, FIRST_PATH_ELEMENT_OFFSET)-> - b_blocknr == SB_ROOT_BLOCK(tb->tb_sb)) { - /* Root is not changed. */ - PATH_OFFSET_PBUFFER(path, path_offset - 1) = NULL; - PATH_OFFSET_POSITION(path, path_offset - 1) = 0; - return CARRY_ON; - } - /* Root is changed and we must recalculate the path. */ - return REPEAT_SEARCH; - } - - /* Parent in the path is not in the tree. */ - if (!B_IS_IN_TREE - (bh = PATH_OFFSET_PBUFFER(path, path_offset - 1))) - return REPEAT_SEARCH; - - if ((position = - PATH_OFFSET_POSITION(path, - path_offset - 1)) > B_NR_ITEMS(bh)) - return REPEAT_SEARCH; - - /* Parent in the path is not parent of the current node in the tree. */ - if (B_N_CHILD_NUM(bh, position) != - PATH_OFFSET_PBUFFER(path, path_offset)->b_blocknr) - return REPEAT_SEARCH; - - if (buffer_locked(bh)) { - int depth = reiserfs_write_unlock_nested(tb->tb_sb); - __wait_on_buffer(bh); - reiserfs_write_lock_nested(tb->tb_sb, depth); - if (FILESYSTEM_CHANGED_TB(tb)) - return REPEAT_SEARCH; - } - - /* - * Parent in the path is unlocked and really parent - * of the current node. - */ - return CARRY_ON; -} - -/* - * Using lnum[h] and rnum[h] we should determine what neighbors - * of S[h] we - * need in order to balance S[h], and get them if necessary. - * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; - * CARRY_ON - schedule didn't occur while the function worked; - */ -static int get_neighbors(struct tree_balance *tb, int h) -{ - int child_position, - path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h + 1); - unsigned long son_number; - struct super_block *sb = tb->tb_sb; - struct buffer_head *bh; - int depth; - - PROC_INFO_INC(sb, get_neighbors[h]); - - if (tb->lnum[h]) { - /* We need left neighbor to balance S[h]. */ - PROC_INFO_INC(sb, need_l_neighbor[h]); - bh = PATH_OFFSET_PBUFFER(tb->tb_path, path_offset); - - RFALSE(bh == tb->FL[h] && - !PATH_OFFSET_POSITION(tb->tb_path, path_offset), - "PAP-8270: invalid position in the parent"); - - child_position = - (bh == - tb->FL[h]) ? tb->lkey[h] : B_NR_ITEMS(tb-> - FL[h]); - son_number = B_N_CHILD_NUM(tb->FL[h], child_position); - depth = reiserfs_write_unlock_nested(tb->tb_sb); - bh = sb_bread(sb, son_number); - reiserfs_write_lock_nested(tb->tb_sb, depth); - if (!bh) - return IO_ERROR; - if (FILESYSTEM_CHANGED_TB(tb)) { - brelse(bh); - PROC_INFO_INC(sb, get_neighbors_restart[h]); - return REPEAT_SEARCH; - } - - RFALSE(!B_IS_IN_TREE(tb->FL[h]) || - child_position > B_NR_ITEMS(tb->FL[h]) || - B_N_CHILD_NUM(tb->FL[h], child_position) != - bh->b_blocknr, "PAP-8275: invalid parent"); - RFALSE(!B_IS_IN_TREE(bh), "PAP-8280: invalid child"); - RFALSE(!h && - B_FREE_SPACE(bh) != - MAX_CHILD_SIZE(bh) - - dc_size(B_N_CHILD(tb->FL[0], child_position)), - "PAP-8290: invalid child size of left neighbor"); - - brelse(tb->L[h]); - tb->L[h] = bh; - } - - /* We need right neighbor to balance S[path_offset]. */ - if (tb->rnum[h]) { - PROC_INFO_INC(sb, need_r_neighbor[h]); - bh = PATH_OFFSET_PBUFFER(tb->tb_path, path_offset); - - RFALSE(bh == tb->FR[h] && - PATH_OFFSET_POSITION(tb->tb_path, - path_offset) >= - B_NR_ITEMS(bh), - "PAP-8295: invalid position in the parent"); - - child_position = - (bh == tb->FR[h]) ? tb->rkey[h] + 1 : 0; - son_number = B_N_CHILD_NUM(tb->FR[h], child_position); - depth = reiserfs_write_unlock_nested(tb->tb_sb); - bh = sb_bread(sb, son_number); - reiserfs_write_lock_nested(tb->tb_sb, depth); - if (!bh) - return IO_ERROR; - if (FILESYSTEM_CHANGED_TB(tb)) { - brelse(bh); - PROC_INFO_INC(sb, get_neighbors_restart[h]); - return REPEAT_SEARCH; - } - brelse(tb->R[h]); - tb->R[h] = bh; - - RFALSE(!h - && B_FREE_SPACE(bh) != - MAX_CHILD_SIZE(bh) - - dc_size(B_N_CHILD(tb->FR[0], child_position)), - "PAP-8300: invalid child size of right neighbor (%d != %d - %d)", - B_FREE_SPACE(bh), MAX_CHILD_SIZE(bh), - dc_size(B_N_CHILD(tb->FR[0], child_position))); - - } - return CARRY_ON; -} - -static int get_virtual_node_size(struct super_block *sb, struct buffer_head *bh) -{ - int max_num_of_items; - int max_num_of_entries; - unsigned long blocksize = sb->s_blocksize; - -#define MIN_NAME_LEN 1 - - max_num_of_items = (blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN); - max_num_of_entries = (blocksize - BLKH_SIZE - IH_SIZE) / - (DEH_SIZE + MIN_NAME_LEN); - - return sizeof(struct virtual_node) + - max(max_num_of_items * sizeof(struct virtual_item), - sizeof(struct virtual_item) + - struct_size_t(struct direntry_uarea, entry_sizes, - max_num_of_entries)); -} - -/* - * maybe we should fail balancing we are going to perform when kmalloc - * fails several times. But now it will loop until kmalloc gets - * required memory - */ -static int get_mem_for_virtual_node(struct tree_balance *tb) -{ - int check_fs = 0; - int size; - char *buf; - - size = get_virtual_node_size(tb->tb_sb, PATH_PLAST_BUFFER(tb->tb_path)); - - /* we have to allocate more memory for virtual node */ - if (size > tb->vn_buf_size) { - if (tb->vn_buf) { - /* free memory allocated before */ - kfree(tb->vn_buf); - /* this is not needed if kfree is atomic */ - check_fs = 1; - } - - /* virtual node requires now more memory */ - tb->vn_buf_size = size; - - /* get memory for virtual item */ - buf = kmalloc(size, GFP_ATOMIC | __GFP_NOWARN); - if (!buf) { - /* - * getting memory with GFP_KERNEL priority may involve - * balancing now (due to indirect_to_direct conversion - * on dcache shrinking). So, release path and collected - * resources here - */ - free_buffers_in_tb(tb); - buf = kmalloc(size, GFP_NOFS); - if (!buf) { - tb->vn_buf_size = 0; - } - tb->vn_buf = buf; - schedule(); - return REPEAT_SEARCH; - } - - tb->vn_buf = buf; - } - - if (check_fs && FILESYSTEM_CHANGED_TB(tb)) - return REPEAT_SEARCH; - - return CARRY_ON; -} - -#ifdef CONFIG_REISERFS_CHECK -static void tb_buffer_sanity_check(struct super_block *sb, - struct buffer_head *bh, - const char *descr, int level) -{ - if (bh) { - if (atomic_read(&(bh->b_count)) <= 0) - - reiserfs_panic(sb, "jmacd-1", "negative or zero " - "reference counter for buffer %s[%d] " - "(%b)", descr, level, bh); - - if (!buffer_uptodate(bh)) - reiserfs_panic(sb, "jmacd-2", "buffer is not up " - "to date %s[%d] (%b)", - descr, level, bh); - - if (!B_IS_IN_TREE(bh)) - reiserfs_panic(sb, "jmacd-3", "buffer is not " - "in tree %s[%d] (%b)", - descr, level, bh); - - if (bh->b_bdev != sb->s_bdev) - reiserfs_panic(sb, "jmacd-4", "buffer has wrong " - "device %s[%d] (%b)", - descr, level, bh); - - if (bh->b_size != sb->s_blocksize) - reiserfs_panic(sb, "jmacd-5", "buffer has wrong " - "blocksize %s[%d] (%b)", - descr, level, bh); - - if (bh->b_blocknr > SB_BLOCK_COUNT(sb)) - reiserfs_panic(sb, "jmacd-6", "buffer block " - "number too high %s[%d] (%b)", - descr, level, bh); - } -} -#else -static void tb_buffer_sanity_check(struct super_block *sb, - struct buffer_head *bh, - const char *descr, int level) -{; -} -#endif - -static int clear_all_dirty_bits(struct super_block *s, struct buffer_head *bh) -{ - return reiserfs_prepare_for_journal(s, bh, 0); -} - -static int wait_tb_buffers_until_unlocked(struct tree_balance *tb) -{ - struct buffer_head *locked; -#ifdef CONFIG_REISERFS_CHECK - int repeat_counter = 0; -#endif - int i; - - do { - - locked = NULL; - - for (i = tb->tb_path->path_length; - !locked && i > ILLEGAL_PATH_ELEMENT_OFFSET; i--) { - if (PATH_OFFSET_PBUFFER(tb->tb_path, i)) { - /* - * if I understand correctly, we can only - * be sure the last buffer in the path is - * in the tree --clm - */ -#ifdef CONFIG_REISERFS_CHECK - if (PATH_PLAST_BUFFER(tb->tb_path) == - PATH_OFFSET_PBUFFER(tb->tb_path, i)) - tb_buffer_sanity_check(tb->tb_sb, - PATH_OFFSET_PBUFFER - (tb->tb_path, - i), "S", - tb->tb_path-> - path_length - i); -#endif - if (!clear_all_dirty_bits(tb->tb_sb, - PATH_OFFSET_PBUFFER - (tb->tb_path, - i))) { - locked = - PATH_OFFSET_PBUFFER(tb->tb_path, - i); - } - } - } - - for (i = 0; !locked && i < MAX_HEIGHT && tb->insert_size[i]; - i++) { - - if (tb->lnum[i]) { - - if (tb->L[i]) { - tb_buffer_sanity_check(tb->tb_sb, - tb->L[i], - "L", i); - if (!clear_all_dirty_bits - (tb->tb_sb, tb->L[i])) - locked = tb->L[i]; - } - - if (!locked && tb->FL[i]) { - tb_buffer_sanity_check(tb->tb_sb, - tb->FL[i], - "FL", i); - if (!clear_all_dirty_bits - (tb->tb_sb, tb->FL[i])) - locked = tb->FL[i]; - } - - if (!locked && tb->CFL[i]) { - tb_buffer_sanity_check(tb->tb_sb, - tb->CFL[i], - "CFL", i); - if (!clear_all_dirty_bits - (tb->tb_sb, tb->CFL[i])) - locked = tb->CFL[i]; - } - - } - - if (!locked && (tb->rnum[i])) { - - if (tb->R[i]) { - tb_buffer_sanity_check(tb->tb_sb, - tb->R[i], - "R", i); - if (!clear_all_dirty_bits - (tb->tb_sb, tb->R[i])) - locked = tb->R[i]; - } - - if (!locked && tb->FR[i]) { - tb_buffer_sanity_check(tb->tb_sb, - tb->FR[i], - "FR", i); - if (!clear_all_dirty_bits - (tb->tb_sb, tb->FR[i])) - locked = tb->FR[i]; - } - - if (!locked && tb->CFR[i]) { - tb_buffer_sanity_check(tb->tb_sb, - tb->CFR[i], - "CFR", i); - if (!clear_all_dirty_bits - (tb->tb_sb, tb->CFR[i])) - locked = tb->CFR[i]; - } - } - } - - /* - * as far as I can tell, this is not required. The FEB list - * seems to be full of newly allocated nodes, which will - * never be locked, dirty, or anything else. - * To be safe, I'm putting in the checks and waits in. - * For the moment, they are needed to keep the code in - * journal.c from complaining about the buffer. - * That code is inside CONFIG_REISERFS_CHECK as well. --clm - */ - for (i = 0; !locked && i < MAX_FEB_SIZE; i++) { - if (tb->FEB[i]) { - if (!clear_all_dirty_bits - (tb->tb_sb, tb->FEB[i])) - locked = tb->FEB[i]; - } - } - - if (locked) { - int depth; -#ifdef CONFIG_REISERFS_CHECK - repeat_counter++; - if ((repeat_counter % 10000) == 0) { - reiserfs_warning(tb->tb_sb, "reiserfs-8200", - "too many iterations waiting " - "for buffer to unlock " - "(%b)", locked); - - /* Don't loop forever. Try to recover from possible error. */ - - return (FILESYSTEM_CHANGED_TB(tb)) ? - REPEAT_SEARCH : CARRY_ON; - } -#endif - depth = reiserfs_write_unlock_nested(tb->tb_sb); - __wait_on_buffer(locked); - reiserfs_write_lock_nested(tb->tb_sb, depth); - if (FILESYSTEM_CHANGED_TB(tb)) - return REPEAT_SEARCH; - } - - } while (locked); - - return CARRY_ON; -} - -/* - * Prepare for balancing, that is - * get all necessary parents, and neighbors; - * analyze what and where should be moved; - * get sufficient number of new nodes; - * Balancing will start only after all resources will be collected at a time. - * - * When ported to SMP kernels, only at the last moment after all needed nodes - * are collected in cache, will the resources be locked using the usual - * textbook ordered lock acquisition algorithms. Note that ensuring that - * this code neither write locks what it does not need to write lock nor locks - * out of order will be a pain in the butt that could have been avoided. - * Grumble grumble. -Hans - * - * fix is meant in the sense of render unchanging - * - * Latency might be improved by first gathering a list of what buffers - * are needed and then getting as many of them in parallel as possible? -Hans - * - * Parameters: - * op_mode i - insert, d - delete, c - cut (truncate), p - paste (append) - * tb tree_balance structure; - * inum item number in S[h]; - * pos_in_item - comment this if you can - * ins_ih item head of item being inserted - * data inserted item or data to be pasted - * Returns: 1 - schedule occurred while the function worked; - * 0 - schedule didn't occur while the function worked; - * -1 - if no_disk_space - */ - -int fix_nodes(int op_mode, struct tree_balance *tb, - struct item_head *ins_ih, const void *data) -{ - int ret, h, item_num = PATH_LAST_POSITION(tb->tb_path); - int pos_in_item; - - /* - * we set wait_tb_buffers_run when we have to restore any dirty - * bits cleared during wait_tb_buffers_run - */ - int wait_tb_buffers_run = 0; - struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); - - ++REISERFS_SB(tb->tb_sb)->s_fix_nodes; - - pos_in_item = tb->tb_path->pos_in_item; - - tb->fs_gen = get_generation(tb->tb_sb); - - /* - * we prepare and log the super here so it will already be in the - * transaction when do_balance needs to change it. - * This way do_balance won't have to schedule when trying to prepare - * the super for logging - */ - reiserfs_prepare_for_journal(tb->tb_sb, - SB_BUFFER_WITH_SB(tb->tb_sb), 1); - journal_mark_dirty(tb->transaction_handle, - SB_BUFFER_WITH_SB(tb->tb_sb)); - if (FILESYSTEM_CHANGED_TB(tb)) - return REPEAT_SEARCH; - - /* if it possible in indirect_to_direct conversion */ - if (buffer_locked(tbS0)) { - int depth = reiserfs_write_unlock_nested(tb->tb_sb); - __wait_on_buffer(tbS0); - reiserfs_write_lock_nested(tb->tb_sb, depth); - if (FILESYSTEM_CHANGED_TB(tb)) - return REPEAT_SEARCH; - } -#ifdef CONFIG_REISERFS_CHECK - if (REISERFS_SB(tb->tb_sb)->cur_tb) { - print_cur_tb("fix_nodes"); - reiserfs_panic(tb->tb_sb, "PAP-8305", - "there is pending do_balance"); - } - - if (!buffer_uptodate(tbS0) || !B_IS_IN_TREE(tbS0)) - reiserfs_panic(tb->tb_sb, "PAP-8320", "S[0] (%b %z) is " - "not uptodate at the beginning of fix_nodes " - "or not in tree (mode %c)", - tbS0, tbS0, op_mode); - - /* Check parameters. */ - switch (op_mode) { - case M_INSERT: - if (item_num <= 0 || item_num > B_NR_ITEMS(tbS0)) - reiserfs_panic(tb->tb_sb, "PAP-8330", "Incorrect " - "item number %d (in S0 - %d) in case " - "of insert", item_num, - B_NR_ITEMS(tbS0)); - break; - case M_PASTE: - case M_DELETE: - case M_CUT: - if (item_num < 0 || item_num >= B_NR_ITEMS(tbS0)) { - print_block(tbS0, 0, -1, -1); - reiserfs_panic(tb->tb_sb, "PAP-8335", "Incorrect " - "item number(%d); mode = %c " - "insert_size = %d", - item_num, op_mode, - tb->insert_size[0]); - } - break; - default: - reiserfs_panic(tb->tb_sb, "PAP-8340", "Incorrect mode " - "of operation"); - } -#endif - - if (get_mem_for_virtual_node(tb) == REPEAT_SEARCH) - /* FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat */ - return REPEAT_SEARCH; - - /* Starting from the leaf level; for all levels h of the tree. */ - for (h = 0; h < MAX_HEIGHT && tb->insert_size[h]; h++) { - ret = get_direct_parent(tb, h); - if (ret != CARRY_ON) - goto repeat; - - ret = check_balance(op_mode, tb, h, item_num, - pos_in_item, ins_ih, data); - if (ret != CARRY_ON) { - if (ret == NO_BALANCING_NEEDED) { - /* No balancing for higher levels needed. */ - ret = get_neighbors(tb, h); - if (ret != CARRY_ON) - goto repeat; - if (h != MAX_HEIGHT - 1) - tb->insert_size[h + 1] = 0; - /* - * ok, analysis and resource gathering - * are complete - */ - break; - } - goto repeat; - } - - ret = get_neighbors(tb, h); - if (ret != CARRY_ON) - goto repeat; - - /* - * No disk space, or schedule occurred and analysis may be - * invalid and needs to be redone. - */ - ret = get_empty_nodes(tb, h); - if (ret != CARRY_ON) - goto repeat; - - /* - * We have a positive insert size but no nodes exist on this - * level, this means that we are creating a new root. - */ - if (!PATH_H_PBUFFER(tb->tb_path, h)) { - - RFALSE(tb->blknum[h] != 1, - "PAP-8350: creating new empty root"); - - if (h < MAX_HEIGHT - 1) - tb->insert_size[h + 1] = 0; - } else if (!PATH_H_PBUFFER(tb->tb_path, h + 1)) { - /* - * The tree needs to be grown, so this node S[h] - * which is the root node is split into two nodes, - * and a new node (S[h+1]) will be created to - * become the root node. - */ - if (tb->blknum[h] > 1) { - - RFALSE(h == MAX_HEIGHT - 1, - "PAP-8355: attempt to create too high of a tree"); - - tb->insert_size[h + 1] = - (DC_SIZE + - KEY_SIZE) * (tb->blknum[h] - 1) + - DC_SIZE; - } else if (h < MAX_HEIGHT - 1) - tb->insert_size[h + 1] = 0; - } else - tb->insert_size[h + 1] = - (DC_SIZE + KEY_SIZE) * (tb->blknum[h] - 1); - } - - ret = wait_tb_buffers_until_unlocked(tb); - if (ret == CARRY_ON) { - if (FILESYSTEM_CHANGED_TB(tb)) { - wait_tb_buffers_run = 1; - ret = REPEAT_SEARCH; - goto repeat; - } else { - return CARRY_ON; - } - } else { - wait_tb_buffers_run = 1; - goto repeat; - } - -repeat: - /* - * fix_nodes was unable to perform its calculation due to - * filesystem got changed under us, lack of free disk space or i/o - * failure. If the first is the case - the search will be - * repeated. For now - free all resources acquired so far except - * for the new allocated nodes - */ - { - int i; - - /* Release path buffers. */ - if (wait_tb_buffers_run) { - pathrelse_and_restore(tb->tb_sb, tb->tb_path); - } else { - pathrelse(tb->tb_path); - } - /* brelse all resources collected for balancing */ - for (i = 0; i < MAX_HEIGHT; i++) { - if (wait_tb_buffers_run) { - reiserfs_restore_prepared_buffer(tb->tb_sb, - tb->L[i]); - reiserfs_restore_prepared_buffer(tb->tb_sb, - tb->R[i]); - reiserfs_restore_prepared_buffer(tb->tb_sb, - tb->FL[i]); - reiserfs_restore_prepared_buffer(tb->tb_sb, - tb->FR[i]); - reiserfs_restore_prepared_buffer(tb->tb_sb, - tb-> - CFL[i]); - reiserfs_restore_prepared_buffer(tb->tb_sb, - tb-> - CFR[i]); - } - - brelse(tb->L[i]); - brelse(tb->R[i]); - brelse(tb->FL[i]); - brelse(tb->FR[i]); - brelse(tb->CFL[i]); - brelse(tb->CFR[i]); - - tb->L[i] = NULL; - tb->R[i] = NULL; - tb->FL[i] = NULL; - tb->FR[i] = NULL; - tb->CFL[i] = NULL; - tb->CFR[i] = NULL; - } - - if (wait_tb_buffers_run) { - for (i = 0; i < MAX_FEB_SIZE; i++) { - if (tb->FEB[i]) - reiserfs_restore_prepared_buffer - (tb->tb_sb, tb->FEB[i]); - } - } - return ret; - } - -} - -void unfix_nodes(struct tree_balance *tb) -{ - int i; - - /* Release path buffers. */ - pathrelse_and_restore(tb->tb_sb, tb->tb_path); - - /* brelse all resources collected for balancing */ - for (i = 0; i < MAX_HEIGHT; i++) { - reiserfs_restore_prepared_buffer(tb->tb_sb, tb->L[i]); - reiserfs_restore_prepared_buffer(tb->tb_sb, tb->R[i]); - reiserfs_restore_prepared_buffer(tb->tb_sb, tb->FL[i]); - reiserfs_restore_prepared_buffer(tb->tb_sb, tb->FR[i]); - reiserfs_restore_prepared_buffer(tb->tb_sb, tb->CFL[i]); - reiserfs_restore_prepared_buffer(tb->tb_sb, tb->CFR[i]); - - brelse(tb->L[i]); - brelse(tb->R[i]); - brelse(tb->FL[i]); - brelse(tb->FR[i]); - brelse(tb->CFL[i]); - brelse(tb->CFR[i]); - } - - /* deal with list of allocated (used and unused) nodes */ - for (i = 0; i < MAX_FEB_SIZE; i++) { - if (tb->FEB[i]) { - b_blocknr_t blocknr = tb->FEB[i]->b_blocknr; - /* - * de-allocated block which was not used by - * balancing and bforget about buffer for it - */ - brelse(tb->FEB[i]); - reiserfs_free_block(tb->transaction_handle, NULL, - blocknr, 0); - } - if (tb->used[i]) { - /* release used as new nodes including a new root */ - brelse(tb->used[i]); - } - } - - kfree(tb->vn_buf); - -} diff --git a/fs/reiserfs/hashes.c b/fs/reiserfs/hashes.c deleted file mode 100644 index 7a26c4fe6c46..000000000000 --- a/fs/reiserfs/hashes.c +++ /dev/null @@ -1,177 +0,0 @@ - -/* - * Keyed 32-bit hash function using TEA in a Davis-Meyer function - * H0 = Key - * Hi = E Mi(Hi-1) + Hi-1 - * - * (see Applied Cryptography, 2nd edition, p448). - * - * Jeremy Fitzhardinge 1998 - * - * Jeremy has agreed to the contents of reiserfs/README. -Hans - * Yura's function is added (04/07/2000) - */ - -#include -#include "reiserfs.h" -#include - -#define DELTA 0x9E3779B9 -#define FULLROUNDS 10 /* 32 is overkill, 16 is strong crypto */ -#define PARTROUNDS 6 /* 6 gets complete mixing */ - -/* a, b, c, d - data; h0, h1 - accumulated hash */ -#define TEACORE(rounds) \ - do { \ - u32 sum = 0; \ - int n = rounds; \ - u32 b0, b1; \ - \ - b0 = h0; \ - b1 = h1; \ - \ - do \ - { \ - sum += DELTA; \ - b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); \ - b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); \ - } while(--n); \ - \ - h0 += b0; \ - h1 += b1; \ - } while(0) - -u32 keyed_hash(const signed char *msg, int len) -{ - u32 k[] = { 0x9464a485, 0x542e1a94, 0x3e846bff, 0xb75bcfc3 }; - - u32 h0 = k[0], h1 = k[1]; - u32 a, b, c, d; - u32 pad; - int i; - - /* assert(len >= 0 && len < 256); */ - - pad = (u32) len | ((u32) len << 8); - pad |= pad << 16; - - while (len >= 16) { - a = (u32) msg[0] | - (u32) msg[1] << 8 | (u32) msg[2] << 16 | (u32) msg[3] << 24; - b = (u32) msg[4] | - (u32) msg[5] << 8 | (u32) msg[6] << 16 | (u32) msg[7] << 24; - c = (u32) msg[8] | - (u32) msg[9] << 8 | - (u32) msg[10] << 16 | (u32) msg[11] << 24; - d = (u32) msg[12] | - (u32) msg[13] << 8 | - (u32) msg[14] << 16 | (u32) msg[15] << 24; - - TEACORE(PARTROUNDS); - - len -= 16; - msg += 16; - } - - if (len >= 12) { - a = (u32) msg[0] | - (u32) msg[1] << 8 | (u32) msg[2] << 16 | (u32) msg[3] << 24; - b = (u32) msg[4] | - (u32) msg[5] << 8 | (u32) msg[6] << 16 | (u32) msg[7] << 24; - c = (u32) msg[8] | - (u32) msg[9] << 8 | - (u32) msg[10] << 16 | (u32) msg[11] << 24; - - d = pad; - for (i = 12; i < len; i++) { - d <<= 8; - d |= msg[i]; - } - } else if (len >= 8) { - a = (u32) msg[0] | - (u32) msg[1] << 8 | (u32) msg[2] << 16 | (u32) msg[3] << 24; - b = (u32) msg[4] | - (u32) msg[5] << 8 | (u32) msg[6] << 16 | (u32) msg[7] << 24; - - c = d = pad; - for (i = 8; i < len; i++) { - c <<= 8; - c |= msg[i]; - } - } else if (len >= 4) { - a = (u32) msg[0] | - (u32) msg[1] << 8 | (u32) msg[2] << 16 | (u32) msg[3] << 24; - - b = c = d = pad; - for (i = 4; i < len; i++) { - b <<= 8; - b |= msg[i]; - } - } else { - a = b = c = d = pad; - for (i = 0; i < len; i++) { - a <<= 8; - a |= msg[i]; - } - } - - TEACORE(FULLROUNDS); - -/* return 0;*/ - return h0 ^ h1; -} - -/* - * What follows in this file is copyright 2000 by Hans Reiser, and the - * licensing of what follows is governed by reiserfs/README - */ -u32 yura_hash(const signed char *msg, int len) -{ - int j, pow; - u32 a, c; - int i; - - for (pow = 1, i = 1; i < len; i++) - pow = pow * 10; - - if (len == 1) - a = msg[0] - 48; - else - a = (msg[0] - 48) * pow; - - for (i = 1; i < len; i++) { - c = msg[i] - 48; - for (pow = 1, j = i; j < len - 1; j++) - pow = pow * 10; - a = a + c * pow; - } - - for (; i < 40; i++) { - c = '0' - 48; - for (pow = 1, j = i; j < len - 1; j++) - pow = pow * 10; - a = a + c * pow; - } - - for (; i < 256; i++) { - c = i; - for (pow = 1, j = i; j < len - 1; j++) - pow = pow * 10; - a = a + c * pow; - } - - a = a << 7; - return a; -} - -u32 r5_hash(const signed char *msg, int len) -{ - u32 a = 0; - while (*msg) { - a += *msg << 4; - a += *msg >> 4; - a *= 11; - msg++; - } - return a; -} diff --git a/fs/reiserfs/ibalance.c b/fs/reiserfs/ibalance.c deleted file mode 100644 index 5db6f45b3fed..000000000000 --- a/fs/reiserfs/ibalance.c +++ /dev/null @@ -1,1161 +0,0 @@ -/* - * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README - */ - -#include -#include -#include -#include "reiserfs.h" -#include - -/* this is one and only function that is used outside (do_balance.c) */ -int balance_internal(struct tree_balance *, - int, int, struct item_head *, struct buffer_head **); - -/* - * modes of internal_shift_left, internal_shift_right and - * internal_insert_childs - */ -#define INTERNAL_SHIFT_FROM_S_TO_L 0 -#define INTERNAL_SHIFT_FROM_R_TO_S 1 -#define INTERNAL_SHIFT_FROM_L_TO_S 2 -#define INTERNAL_SHIFT_FROM_S_TO_R 3 -#define INTERNAL_INSERT_TO_S 4 -#define INTERNAL_INSERT_TO_L 5 -#define INTERNAL_INSERT_TO_R 6 - -static void internal_define_dest_src_infos(int shift_mode, - struct tree_balance *tb, - int h, - struct buffer_info *dest_bi, - struct buffer_info *src_bi, - int *d_key, struct buffer_head **cf) -{ - memset(dest_bi, 0, sizeof(struct buffer_info)); - memset(src_bi, 0, sizeof(struct buffer_info)); - /* define dest, src, dest parent, dest position */ - switch (shift_mode) { - - /* used in internal_shift_left */ - case INTERNAL_SHIFT_FROM_S_TO_L: - src_bi->tb = tb; - src_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h); - src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h); - src_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1); - dest_bi->tb = tb; - dest_bi->bi_bh = tb->L[h]; - dest_bi->bi_parent = tb->FL[h]; - dest_bi->bi_position = get_left_neighbor_position(tb, h); - *d_key = tb->lkey[h]; - *cf = tb->CFL[h]; - break; - case INTERNAL_SHIFT_FROM_L_TO_S: - src_bi->tb = tb; - src_bi->bi_bh = tb->L[h]; - src_bi->bi_parent = tb->FL[h]; - src_bi->bi_position = get_left_neighbor_position(tb, h); - dest_bi->tb = tb; - dest_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h); - dest_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h); - /* dest position is analog of dest->b_item_order */ - dest_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1); - *d_key = tb->lkey[h]; - *cf = tb->CFL[h]; - break; - - /* used in internal_shift_left */ - case INTERNAL_SHIFT_FROM_R_TO_S: - src_bi->tb = tb; - src_bi->bi_bh = tb->R[h]; - src_bi->bi_parent = tb->FR[h]; - src_bi->bi_position = get_right_neighbor_position(tb, h); - dest_bi->tb = tb; - dest_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h); - dest_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h); - dest_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1); - *d_key = tb->rkey[h]; - *cf = tb->CFR[h]; - break; - - case INTERNAL_SHIFT_FROM_S_TO_R: - src_bi->tb = tb; - src_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h); - src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h); - src_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1); - dest_bi->tb = tb; - dest_bi->bi_bh = tb->R[h]; - dest_bi->bi_parent = tb->FR[h]; - dest_bi->bi_position = get_right_neighbor_position(tb, h); - *d_key = tb->rkey[h]; - *cf = tb->CFR[h]; - break; - - case INTERNAL_INSERT_TO_L: - dest_bi->tb = tb; - dest_bi->bi_bh = tb->L[h]; - dest_bi->bi_parent = tb->FL[h]; - dest_bi->bi_position = get_left_neighbor_position(tb, h); - break; - - case INTERNAL_INSERT_TO_S: - dest_bi->tb = tb; - dest_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h); - dest_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h); - dest_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1); - break; - - case INTERNAL_INSERT_TO_R: - dest_bi->tb = tb; - dest_bi->bi_bh = tb->R[h]; - dest_bi->bi_parent = tb->FR[h]; - dest_bi->bi_position = get_right_neighbor_position(tb, h); - break; - - default: - reiserfs_panic(tb->tb_sb, "ibalance-1", - "shift type is unknown (%d)", - shift_mode); - } -} - -/* - * Insert count node pointers into buffer cur before position to + 1. - * Insert count items into buffer cur before position to. - * Items and node pointers are specified by inserted and bh respectively. - */ -static void internal_insert_childs(struct buffer_info *cur_bi, - int to, int count, - struct item_head *inserted, - struct buffer_head **bh) -{ - struct buffer_head *cur = cur_bi->bi_bh; - struct block_head *blkh; - int nr; - struct reiserfs_key *ih; - struct disk_child new_dc[2]; - struct disk_child *dc; - int i; - - if (count <= 0) - return; - - blkh = B_BLK_HEAD(cur); - nr = blkh_nr_item(blkh); - - RFALSE(count > 2, "too many children (%d) are to be inserted", count); - RFALSE(B_FREE_SPACE(cur) < count * (KEY_SIZE + DC_SIZE), - "no enough free space (%d), needed %d bytes", - B_FREE_SPACE(cur), count * (KEY_SIZE + DC_SIZE)); - - /* prepare space for count disk_child */ - dc = B_N_CHILD(cur, to + 1); - - memmove(dc + count, dc, (nr + 1 - (to + 1)) * DC_SIZE); - - /* copy to_be_insert disk children */ - for (i = 0; i < count; i++) { - put_dc_size(&new_dc[i], - MAX_CHILD_SIZE(bh[i]) - B_FREE_SPACE(bh[i])); - put_dc_block_number(&new_dc[i], bh[i]->b_blocknr); - } - memcpy(dc, new_dc, DC_SIZE * count); - - /* prepare space for count items */ - ih = internal_key(cur, ((to == -1) ? 0 : to)); - - memmove(ih + count, ih, - (nr - to) * KEY_SIZE + (nr + 1 + count) * DC_SIZE); - - /* copy item headers (keys) */ - memcpy(ih, inserted, KEY_SIZE); - if (count > 1) - memcpy(ih + 1, inserted + 1, KEY_SIZE); - - /* sizes, item number */ - set_blkh_nr_item(blkh, blkh_nr_item(blkh) + count); - set_blkh_free_space(blkh, - blkh_free_space(blkh) - count * (DC_SIZE + - KEY_SIZE)); - - do_balance_mark_internal_dirty(cur_bi->tb, cur, 0); - - /*&&&&&&&&&&&&&&&&&&&&&&&& */ - check_internal(cur); - /*&&&&&&&&&&&&&&&&&&&&&&&& */ - - if (cur_bi->bi_parent) { - struct disk_child *t_dc = - B_N_CHILD(cur_bi->bi_parent, cur_bi->bi_position); - put_dc_size(t_dc, - dc_size(t_dc) + (count * (DC_SIZE + KEY_SIZE))); - do_balance_mark_internal_dirty(cur_bi->tb, cur_bi->bi_parent, - 0); - - /*&&&&&&&&&&&&&&&&&&&&&&&& */ - check_internal(cur_bi->bi_parent); - /*&&&&&&&&&&&&&&&&&&&&&&&& */ - } - -} - -/* - * Delete del_num items and node pointers from buffer cur starting from - * the first_i'th item and first_p'th pointers respectively. - */ -static void internal_delete_pointers_items(struct buffer_info *cur_bi, - int first_p, - int first_i, int del_num) -{ - struct buffer_head *cur = cur_bi->bi_bh; - int nr; - struct block_head *blkh; - struct reiserfs_key *key; - struct disk_child *dc; - - RFALSE(cur == NULL, "buffer is 0"); - RFALSE(del_num < 0, - "negative number of items (%d) can not be deleted", del_num); - RFALSE(first_p < 0 || first_p + del_num > B_NR_ITEMS(cur) + 1 - || first_i < 0, - "first pointer order (%d) < 0 or " - "no so many pointers (%d), only (%d) or " - "first key order %d < 0", first_p, first_p + del_num, - B_NR_ITEMS(cur) + 1, first_i); - if (del_num == 0) - return; - - blkh = B_BLK_HEAD(cur); - nr = blkh_nr_item(blkh); - - if (first_p == 0 && del_num == nr + 1) { - RFALSE(first_i != 0, - "1st deleted key must have order 0, not %d", first_i); - make_empty_node(cur_bi); - return; - } - - RFALSE(first_i + del_num > B_NR_ITEMS(cur), - "first_i = %d del_num = %d " - "no so many keys (%d) in the node (%b)(%z)", - first_i, del_num, first_i + del_num, cur, cur); - - /* deleting */ - dc = B_N_CHILD(cur, first_p); - - memmove(dc, dc + del_num, (nr + 1 - first_p - del_num) * DC_SIZE); - key = internal_key(cur, first_i); - memmove(key, key + del_num, - (nr - first_i - del_num) * KEY_SIZE + (nr + 1 - - del_num) * DC_SIZE); - - /* sizes, item number */ - set_blkh_nr_item(blkh, blkh_nr_item(blkh) - del_num); - set_blkh_free_space(blkh, - blkh_free_space(blkh) + - (del_num * (KEY_SIZE + DC_SIZE))); - - do_balance_mark_internal_dirty(cur_bi->tb, cur, 0); - /*&&&&&&&&&&&&&&&&&&&&&&& */ - check_internal(cur); - /*&&&&&&&&&&&&&&&&&&&&&&& */ - - if (cur_bi->bi_parent) { - struct disk_child *t_dc; - t_dc = B_N_CHILD(cur_bi->bi_parent, cur_bi->bi_position); - put_dc_size(t_dc, - dc_size(t_dc) - (del_num * (KEY_SIZE + DC_SIZE))); - - do_balance_mark_internal_dirty(cur_bi->tb, cur_bi->bi_parent, - 0); - /*&&&&&&&&&&&&&&&&&&&&&&&& */ - check_internal(cur_bi->bi_parent); - /*&&&&&&&&&&&&&&&&&&&&&&&& */ - } -} - -/* delete n node pointers and items starting from given position */ -static void internal_delete_childs(struct buffer_info *cur_bi, int from, int n) -{ - int i_from; - - i_from = (from == 0) ? from : from - 1; - - /* - * delete n pointers starting from `from' position in CUR; - * delete n keys starting from 'i_from' position in CUR; - */ - internal_delete_pointers_items(cur_bi, from, i_from, n); -} - -/* - * copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer - * dest - * last_first == FIRST_TO_LAST means that we copy first items - * from src to tail of dest - * last_first == LAST_TO_FIRST means that we copy last items - * from src to head of dest - */ -static void internal_copy_pointers_items(struct buffer_info *dest_bi, - struct buffer_head *src, - int last_first, int cpy_num) -{ - /* - * ATTENTION! Number of node pointers in DEST is equal to number - * of items in DEST as delimiting key have already inserted to - * buffer dest. - */ - struct buffer_head *dest = dest_bi->bi_bh; - int nr_dest, nr_src; - int dest_order, src_order; - struct block_head *blkh; - struct reiserfs_key *key; - struct disk_child *dc; - - nr_src = B_NR_ITEMS(src); - - RFALSE(dest == NULL || src == NULL, - "src (%p) or dest (%p) buffer is 0", src, dest); - RFALSE(last_first != FIRST_TO_LAST && last_first != LAST_TO_FIRST, - "invalid last_first parameter (%d)", last_first); - RFALSE(nr_src < cpy_num - 1, - "no so many items (%d) in src (%d)", cpy_num, nr_src); - RFALSE(cpy_num < 0, "cpy_num less than 0 (%d)", cpy_num); - RFALSE(cpy_num - 1 + B_NR_ITEMS(dest) > (int)MAX_NR_KEY(dest), - "cpy_num (%d) + item number in dest (%d) can not be > MAX_NR_KEY(%d)", - cpy_num, B_NR_ITEMS(dest), MAX_NR_KEY(dest)); - - if (cpy_num == 0) - return; - - /* coping */ - blkh = B_BLK_HEAD(dest); - nr_dest = blkh_nr_item(blkh); - - /*dest_order = (last_first == LAST_TO_FIRST) ? 0 : nr_dest; */ - /*src_order = (last_first == LAST_TO_FIRST) ? (nr_src - cpy_num + 1) : 0; */ - (last_first == LAST_TO_FIRST) ? (dest_order = 0, src_order = - nr_src - cpy_num + 1) : (dest_order = - nr_dest, - src_order = - 0); - - /* prepare space for cpy_num pointers */ - dc = B_N_CHILD(dest, dest_order); - - memmove(dc + cpy_num, dc, (nr_dest - dest_order) * DC_SIZE); - - /* insert pointers */ - memcpy(dc, B_N_CHILD(src, src_order), DC_SIZE * cpy_num); - - /* prepare space for cpy_num - 1 item headers */ - key = internal_key(dest, dest_order); - memmove(key + cpy_num - 1, key, - KEY_SIZE * (nr_dest - dest_order) + DC_SIZE * (nr_dest + - cpy_num)); - - /* insert headers */ - memcpy(key, internal_key(src, src_order), KEY_SIZE * (cpy_num - 1)); - - /* sizes, item number */ - set_blkh_nr_item(blkh, blkh_nr_item(blkh) + (cpy_num - 1)); - set_blkh_free_space(blkh, - blkh_free_space(blkh) - (KEY_SIZE * (cpy_num - 1) + - DC_SIZE * cpy_num)); - - do_balance_mark_internal_dirty(dest_bi->tb, dest, 0); - - /*&&&&&&&&&&&&&&&&&&&&&&&& */ - check_internal(dest); - /*&&&&&&&&&&&&&&&&&&&&&&&& */ - - if (dest_bi->bi_parent) { - struct disk_child *t_dc; - t_dc = B_N_CHILD(dest_bi->bi_parent, dest_bi->bi_position); - put_dc_size(t_dc, - dc_size(t_dc) + (KEY_SIZE * (cpy_num - 1) + - DC_SIZE * cpy_num)); - - do_balance_mark_internal_dirty(dest_bi->tb, dest_bi->bi_parent, - 0); - /*&&&&&&&&&&&&&&&&&&&&&&&& */ - check_internal(dest_bi->bi_parent); - /*&&&&&&&&&&&&&&&&&&&&&&&& */ - } - -} - -/* - * Copy cpy_num node pointers and cpy_num - 1 items from buffer src to - * buffer dest. - * Delete cpy_num - del_par items and node pointers from buffer src. - * last_first == FIRST_TO_LAST means, that we copy/delete first items from src. - * last_first == LAST_TO_FIRST means, that we copy/delete last items from src. - */ -static void internal_move_pointers_items(struct buffer_info *dest_bi, - struct buffer_info *src_bi, - int last_first, int cpy_num, - int del_par) -{ - int first_pointer; - int first_item; - - internal_copy_pointers_items(dest_bi, src_bi->bi_bh, last_first, - cpy_num); - - if (last_first == FIRST_TO_LAST) { /* shift_left occurs */ - first_pointer = 0; - first_item = 0; - /* - * delete cpy_num - del_par pointers and keys starting for - * pointers with first_pointer, for key - with first_item - */ - internal_delete_pointers_items(src_bi, first_pointer, - first_item, cpy_num - del_par); - } else { /* shift_right occurs */ - int i, j; - - i = (cpy_num - del_par == - (j = - B_NR_ITEMS(src_bi->bi_bh)) + 1) ? 0 : j - cpy_num + - del_par; - - internal_delete_pointers_items(src_bi, - j + 1 - cpy_num + del_par, i, - cpy_num - del_par); - } -} - -/* Insert n_src'th key of buffer src before n_dest'th key of buffer dest. */ -static void internal_insert_key(struct buffer_info *dest_bi, - /* insert key before key with n_dest number */ - int dest_position_before, - struct buffer_head *src, int src_position) -{ - struct buffer_head *dest = dest_bi->bi_bh; - int nr; - struct block_head *blkh; - struct reiserfs_key *key; - - RFALSE(dest == NULL || src == NULL, - "source(%p) or dest(%p) buffer is 0", src, dest); - RFALSE(dest_position_before < 0 || src_position < 0, - "source(%d) or dest(%d) key number less than 0", - src_position, dest_position_before); - RFALSE(dest_position_before > B_NR_ITEMS(dest) || - src_position >= B_NR_ITEMS(src), - "invalid position in dest (%d (key number %d)) or in src (%d (key number %d))", - dest_position_before, B_NR_ITEMS(dest), - src_position, B_NR_ITEMS(src)); - RFALSE(B_FREE_SPACE(dest) < KEY_SIZE, - "no enough free space (%d) in dest buffer", B_FREE_SPACE(dest)); - - blkh = B_BLK_HEAD(dest); - nr = blkh_nr_item(blkh); - - /* prepare space for inserting key */ - key = internal_key(dest, dest_position_before); - memmove(key + 1, key, - (nr - dest_position_before) * KEY_SIZE + (nr + 1) * DC_SIZE); - - /* insert key */ - memcpy(key, internal_key(src, src_position), KEY_SIZE); - - /* Change dirt, free space, item number fields. */ - - set_blkh_nr_item(blkh, blkh_nr_item(blkh) + 1); - set_blkh_free_space(blkh, blkh_free_space(blkh) - KEY_SIZE); - - do_balance_mark_internal_dirty(dest_bi->tb, dest, 0); - - if (dest_bi->bi_parent) { - struct disk_child *t_dc; - t_dc = B_N_CHILD(dest_bi->bi_parent, dest_bi->bi_position); - put_dc_size(t_dc, dc_size(t_dc) + KEY_SIZE); - - do_balance_mark_internal_dirty(dest_bi->tb, dest_bi->bi_parent, - 0); - } -} - -/* - * Insert d_key'th (delimiting) key from buffer cfl to tail of dest. - * Copy pointer_amount node pointers and pointer_amount - 1 items from - * buffer src to buffer dest. - * Replace d_key'th key in buffer cfl. - * Delete pointer_amount items and node pointers from buffer src. - */ -/* this can be invoked both to shift from S to L and from R to S */ -static void internal_shift_left( - /* - * INTERNAL_FROM_S_TO_L | INTERNAL_FROM_R_TO_S - */ - int mode, - struct tree_balance *tb, - int h, int pointer_amount) -{ - struct buffer_info dest_bi, src_bi; - struct buffer_head *cf; - int d_key_position; - - internal_define_dest_src_infos(mode, tb, h, &dest_bi, &src_bi, - &d_key_position, &cf); - - /*printk("pointer_amount = %d\n",pointer_amount); */ - - if (pointer_amount) { - /* - * insert delimiting key from common father of dest and - * src to node dest into position B_NR_ITEM(dest) - */ - internal_insert_key(&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf, - d_key_position); - - if (B_NR_ITEMS(src_bi.bi_bh) == pointer_amount - 1) { - if (src_bi.bi_position /*src->b_item_order */ == 0) - replace_key(tb, cf, d_key_position, - src_bi. - bi_parent /*src->b_parent */ , 0); - } else - replace_key(tb, cf, d_key_position, src_bi.bi_bh, - pointer_amount - 1); - } - /* last parameter is del_parameter */ - internal_move_pointers_items(&dest_bi, &src_bi, FIRST_TO_LAST, - pointer_amount, 0); - -} - -/* - * Insert delimiting key to L[h]. - * Copy n node pointers and n - 1 items from buffer S[h] to L[h]. - * Delete n - 1 items and node pointers from buffer S[h]. - */ -/* it always shifts from S[h] to L[h] */ -static void internal_shift1_left(struct tree_balance *tb, - int h, int pointer_amount) -{ - struct buffer_info dest_bi, src_bi; - struct buffer_head *cf; - int d_key_position; - - internal_define_dest_src_infos(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, - &dest_bi, &src_bi, &d_key_position, &cf); - - /* insert lkey[h]-th key from CFL[h] to left neighbor L[h] */ - if (pointer_amount > 0) - internal_insert_key(&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf, - d_key_position); - - /* last parameter is del_parameter */ - internal_move_pointers_items(&dest_bi, &src_bi, FIRST_TO_LAST, - pointer_amount, 1); -} - -/* - * Insert d_key'th (delimiting) key from buffer cfr to head of dest. - * Copy n node pointers and n - 1 items from buffer src to buffer dest. - * Replace d_key'th key in buffer cfr. - * Delete n items and node pointers from buffer src. - */ -static void internal_shift_right( - /* - * INTERNAL_FROM_S_TO_R | INTERNAL_FROM_L_TO_S - */ - int mode, - struct tree_balance *tb, - int h, int pointer_amount) -{ - struct buffer_info dest_bi, src_bi; - struct buffer_head *cf; - int d_key_position; - int nr; - - internal_define_dest_src_infos(mode, tb, h, &dest_bi, &src_bi, - &d_key_position, &cf); - - nr = B_NR_ITEMS(src_bi.bi_bh); - - if (pointer_amount > 0) { - /* - * insert delimiting key from common father of dest - * and src to dest node into position 0 - */ - internal_insert_key(&dest_bi, 0, cf, d_key_position); - if (nr == pointer_amount - 1) { - RFALSE(src_bi.bi_bh != PATH_H_PBUFFER(tb->tb_path, h) /*tb->S[h] */ || - dest_bi.bi_bh != tb->R[h], - "src (%p) must be == tb->S[h](%p) when it disappears", - src_bi.bi_bh, PATH_H_PBUFFER(tb->tb_path, h)); - /* when S[h] disappers replace left delemiting key as well */ - if (tb->CFL[h]) - replace_key(tb, cf, d_key_position, tb->CFL[h], - tb->lkey[h]); - } else - replace_key(tb, cf, d_key_position, src_bi.bi_bh, - nr - pointer_amount); - } - - /* last parameter is del_parameter */ - internal_move_pointers_items(&dest_bi, &src_bi, LAST_TO_FIRST, - pointer_amount, 0); -} - -/* - * Insert delimiting key to R[h]. - * Copy n node pointers and n - 1 items from buffer S[h] to R[h]. - * Delete n - 1 items and node pointers from buffer S[h]. - */ -/* it always shift from S[h] to R[h] */ -static void internal_shift1_right(struct tree_balance *tb, - int h, int pointer_amount) -{ - struct buffer_info dest_bi, src_bi; - struct buffer_head *cf; - int d_key_position; - - internal_define_dest_src_infos(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, - &dest_bi, &src_bi, &d_key_position, &cf); - - /* insert rkey from CFR[h] to right neighbor R[h] */ - if (pointer_amount > 0) - internal_insert_key(&dest_bi, 0, cf, d_key_position); - - /* last parameter is del_parameter */ - internal_move_pointers_items(&dest_bi, &src_bi, LAST_TO_FIRST, - pointer_amount, 1); -} - -/* - * Delete insert_num node pointers together with their left items - * and balance current node. - */ -static void balance_internal_when_delete(struct tree_balance *tb, - int h, int child_pos) -{ - int insert_num; - int n; - struct buffer_head *tbSh = PATH_H_PBUFFER(tb->tb_path, h); - struct buffer_info bi; - - insert_num = tb->insert_size[h] / ((int)(DC_SIZE + KEY_SIZE)); - - /* delete child-node-pointer(s) together with their left item(s) */ - bi.tb = tb; - bi.bi_bh = tbSh; - bi.bi_parent = PATH_H_PPARENT(tb->tb_path, h); - bi.bi_position = PATH_H_POSITION(tb->tb_path, h + 1); - - internal_delete_childs(&bi, child_pos, -insert_num); - - RFALSE(tb->blknum[h] > 1, - "tb->blknum[%d]=%d when insert_size < 0", h, tb->blknum[h]); - - n = B_NR_ITEMS(tbSh); - - if (tb->lnum[h] == 0 && tb->rnum[h] == 0) { - if (tb->blknum[h] == 0) { - /* node S[h] (root of the tree) is empty now */ - struct buffer_head *new_root; - - RFALSE(n - || B_FREE_SPACE(tbSh) != - MAX_CHILD_SIZE(tbSh) - DC_SIZE, - "buffer must have only 0 keys (%d)", n); - RFALSE(bi.bi_parent, "root has parent (%p)", - bi.bi_parent); - - /* choose a new root */ - if (!tb->L[h - 1] || !B_NR_ITEMS(tb->L[h - 1])) - new_root = tb->R[h - 1]; - else - new_root = tb->L[h - 1]; - /* - * switch super block's tree root block - * number to the new value */ - PUT_SB_ROOT_BLOCK(tb->tb_sb, new_root->b_blocknr); - /*REISERFS_SB(tb->tb_sb)->s_rs->s_tree_height --; */ - PUT_SB_TREE_HEIGHT(tb->tb_sb, - SB_TREE_HEIGHT(tb->tb_sb) - 1); - - do_balance_mark_sb_dirty(tb, - REISERFS_SB(tb->tb_sb)->s_sbh, - 1); - /*&&&&&&&&&&&&&&&&&&&&&& */ - /* use check_internal if new root is an internal node */ - if (h > 1) - check_internal(new_root); - /*&&&&&&&&&&&&&&&&&&&&&& */ - - /* do what is needed for buffer thrown from tree */ - reiserfs_invalidate_buffer(tb, tbSh); - return; - } - return; - } - - /* join S[h] with L[h] */ - if (tb->L[h] && tb->lnum[h] == -B_NR_ITEMS(tb->L[h]) - 1) { - - RFALSE(tb->rnum[h] != 0, - "invalid tb->rnum[%d]==%d when joining S[h] with L[h]", - h, tb->rnum[h]); - - internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, n + 1); - reiserfs_invalidate_buffer(tb, tbSh); - - return; - } - - /* join S[h] with R[h] */ - if (tb->R[h] && tb->rnum[h] == -B_NR_ITEMS(tb->R[h]) - 1) { - RFALSE(tb->lnum[h] != 0, - "invalid tb->lnum[%d]==%d when joining S[h] with R[h]", - h, tb->lnum[h]); - - internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, n + 1); - - reiserfs_invalidate_buffer(tb, tbSh); - return; - } - - /* borrow from left neighbor L[h] */ - if (tb->lnum[h] < 0) { - RFALSE(tb->rnum[h] != 0, - "wrong tb->rnum[%d]==%d when borrow from L[h]", h, - tb->rnum[h]); - internal_shift_right(INTERNAL_SHIFT_FROM_L_TO_S, tb, h, - -tb->lnum[h]); - return; - } - - /* borrow from right neighbor R[h] */ - if (tb->rnum[h] < 0) { - RFALSE(tb->lnum[h] != 0, - "invalid tb->lnum[%d]==%d when borrow from R[h]", - h, tb->lnum[h]); - internal_shift_left(INTERNAL_SHIFT_FROM_R_TO_S, tb, h, -tb->rnum[h]); /*tb->S[h], tb->CFR[h], tb->rkey[h], tb->R[h], -tb->rnum[h]); */ - return; - } - - /* split S[h] into two parts and put them into neighbors */ - if (tb->lnum[h] > 0) { - RFALSE(tb->rnum[h] == 0 || tb->lnum[h] + tb->rnum[h] != n + 1, - "invalid tb->lnum[%d]==%d or tb->rnum[%d]==%d when S[h](item number == %d) is split between them", - h, tb->lnum[h], h, tb->rnum[h], n); - - internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, tb->lnum[h]); /*tb->L[h], tb->CFL[h], tb->lkey[h], tb->S[h], tb->lnum[h]); */ - internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, - tb->rnum[h]); - - reiserfs_invalidate_buffer(tb, tbSh); - - return; - } - reiserfs_panic(tb->tb_sb, "ibalance-2", - "unexpected tb->lnum[%d]==%d or tb->rnum[%d]==%d", - h, tb->lnum[h], h, tb->rnum[h]); -} - -/* Replace delimiting key of buffers L[h] and S[h] by the given key.*/ -static void replace_lkey(struct tree_balance *tb, int h, struct item_head *key) -{ - RFALSE(tb->L[h] == NULL || tb->CFL[h] == NULL, - "L[h](%p) and CFL[h](%p) must exist in replace_lkey", - tb->L[h], tb->CFL[h]); - - if (B_NR_ITEMS(PATH_H_PBUFFER(tb->tb_path, h)) == 0) - return; - - memcpy(internal_key(tb->CFL[h], tb->lkey[h]), key, KEY_SIZE); - - do_balance_mark_internal_dirty(tb, tb->CFL[h], 0); -} - -/* Replace delimiting key of buffers S[h] and R[h] by the given key.*/ -static void replace_rkey(struct tree_balance *tb, int h, struct item_head *key) -{ - RFALSE(tb->R[h] == NULL || tb->CFR[h] == NULL, - "R[h](%p) and CFR[h](%p) must exist in replace_rkey", - tb->R[h], tb->CFR[h]); - RFALSE(B_NR_ITEMS(tb->R[h]) == 0, - "R[h] can not be empty if it exists (item number=%d)", - B_NR_ITEMS(tb->R[h])); - - memcpy(internal_key(tb->CFR[h], tb->rkey[h]), key, KEY_SIZE); - - do_balance_mark_internal_dirty(tb, tb->CFR[h], 0); -} - - -/* - * if inserting/pasting { - * child_pos is the position of the node-pointer in S[h] that - * pointed to S[h-1] before balancing of the h-1 level; - * this means that new pointers and items must be inserted AFTER - * child_pos - * } else { - * it is the position of the leftmost pointer that must be deleted - * (together with its corresponding key to the left of the pointer) - * as a result of the previous level's balancing. - * } - */ - -int balance_internal(struct tree_balance *tb, - int h, /* level of the tree */ - int child_pos, - /* key for insertion on higher level */ - struct item_head *insert_key, - /* node for insertion on higher level */ - struct buffer_head **insert_ptr) -{ - struct buffer_head *tbSh = PATH_H_PBUFFER(tb->tb_path, h); - struct buffer_info bi; - - /* - * we return this: it is 0 if there is no S[h], - * else it is tb->S[h]->b_item_order - */ - int order; - int insert_num, n, k; - struct buffer_head *S_new; - struct item_head new_insert_key; - struct buffer_head *new_insert_ptr = NULL; - struct item_head *new_insert_key_addr = insert_key; - - RFALSE(h < 1, "h (%d) can not be < 1 on internal level", h); - - PROC_INFO_INC(tb->tb_sb, balance_at[h]); - - order = - (tbSh) ? PATH_H_POSITION(tb->tb_path, - h + 1) /*tb->S[h]->b_item_order */ : 0; - - /* - * Using insert_size[h] calculate the number insert_num of items - * that must be inserted to or deleted from S[h]. - */ - insert_num = tb->insert_size[h] / ((int)(KEY_SIZE + DC_SIZE)); - - /* Check whether insert_num is proper * */ - RFALSE(insert_num < -2 || insert_num > 2, - "incorrect number of items inserted to the internal node (%d)", - insert_num); - RFALSE(h > 1 && (insert_num > 1 || insert_num < -1), - "incorrect number of items (%d) inserted to the internal node on a level (h=%d) higher than last internal level", - insert_num, h); - - /* Make balance in case insert_num < 0 */ - if (insert_num < 0) { - balance_internal_when_delete(tb, h, child_pos); - return order; - } - - k = 0; - if (tb->lnum[h] > 0) { - /* - * shift lnum[h] items from S[h] to the left neighbor L[h]. - * check how many of new items fall into L[h] or CFL[h] after - * shifting - */ - n = B_NR_ITEMS(tb->L[h]); /* number of items in L[h] */ - if (tb->lnum[h] <= child_pos) { - /* new items don't fall into L[h] or CFL[h] */ - internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, - tb->lnum[h]); - child_pos -= tb->lnum[h]; - } else if (tb->lnum[h] > child_pos + insert_num) { - /* all new items fall into L[h] */ - internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, - tb->lnum[h] - insert_num); - /* insert insert_num keys and node-pointers into L[h] */ - bi.tb = tb; - bi.bi_bh = tb->L[h]; - bi.bi_parent = tb->FL[h]; - bi.bi_position = get_left_neighbor_position(tb, h); - internal_insert_childs(&bi, - /*tb->L[h], tb->S[h-1]->b_next */ - n + child_pos + 1, - insert_num, insert_key, - insert_ptr); - - insert_num = 0; - } else { - struct disk_child *dc; - - /* - * some items fall into L[h] or CFL[h], - * but some don't fall - */ - internal_shift1_left(tb, h, child_pos + 1); - /* calculate number of new items that fall into L[h] */ - k = tb->lnum[h] - child_pos - 1; - bi.tb = tb; - bi.bi_bh = tb->L[h]; - bi.bi_parent = tb->FL[h]; - bi.bi_position = get_left_neighbor_position(tb, h); - internal_insert_childs(&bi, - /*tb->L[h], tb->S[h-1]->b_next, */ - n + child_pos + 1, k, - insert_key, insert_ptr); - - replace_lkey(tb, h, insert_key + k); - - /* - * replace the first node-ptr in S[h] by - * node-ptr to insert_ptr[k] - */ - dc = B_N_CHILD(tbSh, 0); - put_dc_size(dc, - MAX_CHILD_SIZE(insert_ptr[k]) - - B_FREE_SPACE(insert_ptr[k])); - put_dc_block_number(dc, insert_ptr[k]->b_blocknr); - - do_balance_mark_internal_dirty(tb, tbSh, 0); - - k++; - insert_key += k; - insert_ptr += k; - insert_num -= k; - child_pos = 0; - } - } - /* tb->lnum[h] > 0 */ - if (tb->rnum[h] > 0) { - /*shift rnum[h] items from S[h] to the right neighbor R[h] */ - /* - * check how many of new items fall into R or CFR - * after shifting - */ - n = B_NR_ITEMS(tbSh); /* number of items in S[h] */ - if (n - tb->rnum[h] >= child_pos) - /* new items fall into S[h] */ - internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, - tb->rnum[h]); - else if (n + insert_num - tb->rnum[h] < child_pos) { - /* all new items fall into R[h] */ - internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, - tb->rnum[h] - insert_num); - - /* insert insert_num keys and node-pointers into R[h] */ - bi.tb = tb; - bi.bi_bh = tb->R[h]; - bi.bi_parent = tb->FR[h]; - bi.bi_position = get_right_neighbor_position(tb, h); - internal_insert_childs(&bi, - /*tb->R[h],tb->S[h-1]->b_next */ - child_pos - n - insert_num + - tb->rnum[h] - 1, - insert_num, insert_key, - insert_ptr); - insert_num = 0; - } else { - struct disk_child *dc; - - /* one of the items falls into CFR[h] */ - internal_shift1_right(tb, h, n - child_pos + 1); - /* calculate number of new items that fall into R[h] */ - k = tb->rnum[h] - n + child_pos - 1; - bi.tb = tb; - bi.bi_bh = tb->R[h]; - bi.bi_parent = tb->FR[h]; - bi.bi_position = get_right_neighbor_position(tb, h); - internal_insert_childs(&bi, - /*tb->R[h], tb->R[h]->b_child, */ - 0, k, insert_key + 1, - insert_ptr + 1); - - replace_rkey(tb, h, insert_key + insert_num - k - 1); - - /* - * replace the first node-ptr in R[h] by - * node-ptr insert_ptr[insert_num-k-1] - */ - dc = B_N_CHILD(tb->R[h], 0); - put_dc_size(dc, - MAX_CHILD_SIZE(insert_ptr - [insert_num - k - 1]) - - B_FREE_SPACE(insert_ptr - [insert_num - k - 1])); - put_dc_block_number(dc, - insert_ptr[insert_num - k - - 1]->b_blocknr); - - do_balance_mark_internal_dirty(tb, tb->R[h], 0); - - insert_num -= (k + 1); - } - } - - /** Fill new node that appears instead of S[h] **/ - RFALSE(tb->blknum[h] > 2, "blknum can not be > 2 for internal level"); - RFALSE(tb->blknum[h] < 0, "blknum can not be < 0"); - - if (!tb->blknum[h]) { /* node S[h] is empty now */ - RFALSE(!tbSh, "S[h] is equal NULL"); - - /* do what is needed for buffer thrown from tree */ - reiserfs_invalidate_buffer(tb, tbSh); - return order; - } - - if (!tbSh) { - /* create new root */ - struct disk_child *dc; - struct buffer_head *tbSh_1 = PATH_H_PBUFFER(tb->tb_path, h - 1); - struct block_head *blkh; - - if (tb->blknum[h] != 1) - reiserfs_panic(NULL, "ibalance-3", "One new node " - "required for creating the new root"); - /* S[h] = empty buffer from the list FEB. */ - tbSh = get_FEB(tb); - blkh = B_BLK_HEAD(tbSh); - set_blkh_level(blkh, h + 1); - - /* Put the unique node-pointer to S[h] that points to S[h-1]. */ - - dc = B_N_CHILD(tbSh, 0); - put_dc_block_number(dc, tbSh_1->b_blocknr); - put_dc_size(dc, - (MAX_CHILD_SIZE(tbSh_1) - B_FREE_SPACE(tbSh_1))); - - tb->insert_size[h] -= DC_SIZE; - set_blkh_free_space(blkh, blkh_free_space(blkh) - DC_SIZE); - - do_balance_mark_internal_dirty(tb, tbSh, 0); - - /*&&&&&&&&&&&&&&&&&&&&&&&& */ - check_internal(tbSh); - /*&&&&&&&&&&&&&&&&&&&&&&&& */ - - /* put new root into path structure */ - PATH_OFFSET_PBUFFER(tb->tb_path, ILLEGAL_PATH_ELEMENT_OFFSET) = - tbSh; - - /* Change root in structure super block. */ - PUT_SB_ROOT_BLOCK(tb->tb_sb, tbSh->b_blocknr); - PUT_SB_TREE_HEIGHT(tb->tb_sb, SB_TREE_HEIGHT(tb->tb_sb) + 1); - do_balance_mark_sb_dirty(tb, REISERFS_SB(tb->tb_sb)->s_sbh, 1); - } - - if (tb->blknum[h] == 2) { - int snum; - struct buffer_info dest_bi, src_bi; - - /* S_new = free buffer from list FEB */ - S_new = get_FEB(tb); - - set_blkh_level(B_BLK_HEAD(S_new), h + 1); - - dest_bi.tb = tb; - dest_bi.bi_bh = S_new; - dest_bi.bi_parent = NULL; - dest_bi.bi_position = 0; - src_bi.tb = tb; - src_bi.bi_bh = tbSh; - src_bi.bi_parent = PATH_H_PPARENT(tb->tb_path, h); - src_bi.bi_position = PATH_H_POSITION(tb->tb_path, h + 1); - - n = B_NR_ITEMS(tbSh); /* number of items in S[h] */ - snum = (insert_num + n + 1) / 2; - if (n - snum >= child_pos) { - /* new items don't fall into S_new */ - /* store the delimiting key for the next level */ - /* new_insert_key = (n - snum)'th key in S[h] */ - memcpy(&new_insert_key, internal_key(tbSh, n - snum), - KEY_SIZE); - /* last parameter is del_par */ - internal_move_pointers_items(&dest_bi, &src_bi, - LAST_TO_FIRST, snum, 0); - } else if (n + insert_num - snum < child_pos) { - /* all new items fall into S_new */ - /* store the delimiting key for the next level */ - /* - * new_insert_key = (n + insert_item - snum)'th - * key in S[h] - */ - memcpy(&new_insert_key, - internal_key(tbSh, n + insert_num - snum), - KEY_SIZE); - /* last parameter is del_par */ - internal_move_pointers_items(&dest_bi, &src_bi, - LAST_TO_FIRST, - snum - insert_num, 0); - - /* - * insert insert_num keys and node-pointers - * into S_new - */ - internal_insert_childs(&dest_bi, - /*S_new,tb->S[h-1]->b_next, */ - child_pos - n - insert_num + - snum - 1, - insert_num, insert_key, - insert_ptr); - - insert_num = 0; - } else { - struct disk_child *dc; - - /* some items fall into S_new, but some don't fall */ - /* last parameter is del_par */ - internal_move_pointers_items(&dest_bi, &src_bi, - LAST_TO_FIRST, - n - child_pos + 1, 1); - /* calculate number of new items that fall into S_new */ - k = snum - n + child_pos - 1; - - internal_insert_childs(&dest_bi, /*S_new, */ 0, k, - insert_key + 1, insert_ptr + 1); - - /* new_insert_key = insert_key[insert_num - k - 1] */ - memcpy(&new_insert_key, insert_key + insert_num - k - 1, - KEY_SIZE); - /* - * replace first node-ptr in S_new by node-ptr - * to insert_ptr[insert_num-k-1] - */ - - dc = B_N_CHILD(S_new, 0); - put_dc_size(dc, - (MAX_CHILD_SIZE - (insert_ptr[insert_num - k - 1]) - - B_FREE_SPACE(insert_ptr - [insert_num - k - 1]))); - put_dc_block_number(dc, - insert_ptr[insert_num - k - - 1]->b_blocknr); - - do_balance_mark_internal_dirty(tb, S_new, 0); - - insert_num -= (k + 1); - } - /* new_insert_ptr = node_pointer to S_new */ - new_insert_ptr = S_new; - - RFALSE(!buffer_journaled(S_new) || buffer_journal_dirty(S_new) - || buffer_dirty(S_new), "cm-00001: bad S_new (%b)", - S_new); - - /* S_new is released in unfix_nodes */ - } - - n = B_NR_ITEMS(tbSh); /*number of items in S[h] */ - - if (0 <= child_pos && child_pos <= n && insert_num > 0) { - bi.tb = tb; - bi.bi_bh = tbSh; - bi.bi_parent = PATH_H_PPARENT(tb->tb_path, h); - bi.bi_position = PATH_H_POSITION(tb->tb_path, h + 1); - internal_insert_childs(&bi, /*tbSh, */ - /* ( tb->S[h-1]->b_parent == tb->S[h] ) ? tb->S[h-1]->b_next : tb->S[h]->b_child->b_next, */ - child_pos, insert_num, insert_key, - insert_ptr); - } - - insert_ptr[0] = new_insert_ptr; - if (new_insert_ptr) - memcpy(new_insert_key_addr, &new_insert_key, KEY_SIZE); - - return order; -} diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c deleted file mode 100644 index d39ee5f6c075..000000000000 --- a/fs/reiserfs/inode.c +++ /dev/null @@ -1,3416 +0,0 @@ -/* - * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README - */ - -#include -#include -#include "reiserfs.h" -#include "acl.h" -#include "xattr.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -int reiserfs_commit_write(struct file *f, struct page *page, - unsigned from, unsigned to); - -void reiserfs_evict_inode(struct inode *inode) -{ - /* - * We need blocks for transaction + (user+group) quota - * update (possibly delete) - */ - int jbegin_count = - JOURNAL_PER_BALANCE_CNT * 2 + - 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb); - struct reiserfs_transaction_handle th; - int err; - - if (!inode->i_nlink && !is_bad_inode(inode)) - dquot_initialize(inode); - - truncate_inode_pages_final(&inode->i_data); - if (inode->i_nlink) - goto no_delete; - - /* - * The = 0 happens when we abort creating a new inode - * for some reason like lack of space.. - * also handles bad_inode case - */ - if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { - - reiserfs_delete_xattrs(inode); - - reiserfs_write_lock(inode->i_sb); - - if (journal_begin(&th, inode->i_sb, jbegin_count)) - goto out; - reiserfs_update_inode_transaction(inode); - - reiserfs_discard_prealloc(&th, inode); - - err = reiserfs_delete_object(&th, inode); - - /* - * Do quota update inside a transaction for journaled quotas. - * We must do that after delete_object so that quota updates - * go into the same transaction as stat data deletion - */ - if (!err) { - int depth = reiserfs_write_unlock_nested(inode->i_sb); - dquot_free_inode(inode); - reiserfs_write_lock_nested(inode->i_sb, depth); - } - - if (journal_end(&th)) - goto out; - - /* - * check return value from reiserfs_delete_object after - * ending the transaction - */ - if (err) - goto out; - - /* - * all items of file are deleted, so we can remove - * "save" link - * we can't do anything about an error here - */ - remove_save_link(inode, 0 /* not truncate */); -out: - reiserfs_write_unlock(inode->i_sb); - } else { - /* no object items are in the tree */ - ; - } - - /* note this must go after the journal_end to prevent deadlock */ - clear_inode(inode); - - dquot_drop(inode); - inode->i_blocks = 0; - return; - -no_delete: - clear_inode(inode); - dquot_drop(inode); -} - -static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid, - __u32 objectid, loff_t offset, int type, int length) -{ - key->version = version; - - key->on_disk_key.k_dir_id = dirid; - key->on_disk_key.k_objectid = objectid; - set_cpu_key_k_offset(key, offset); - set_cpu_key_k_type(key, type); - key->key_length = length; -} - -/* - * take base of inode_key (it comes from inode always) (dirid, objectid) - * and version from an inode, set offset and type of key - */ -void make_cpu_key(struct cpu_key *key, struct inode *inode, loff_t offset, - int type, int length) -{ - _make_cpu_key(key, get_inode_item_key_version(inode), - le32_to_cpu(INODE_PKEY(inode)->k_dir_id), - le32_to_cpu(INODE_PKEY(inode)->k_objectid), offset, type, - length); -} - -/* when key is 0, do not set version and short key */ -inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key, - int version, - loff_t offset, int type, int length, - int entry_count /*or ih_free_space */ ) -{ - if (key) { - ih->ih_key.k_dir_id = cpu_to_le32(key->on_disk_key.k_dir_id); - ih->ih_key.k_objectid = - cpu_to_le32(key->on_disk_key.k_objectid); - } - put_ih_version(ih, version); - set_le_ih_k_offset(ih, offset); - set_le_ih_k_type(ih, type); - put_ih_item_len(ih, length); - /* set_ih_free_space (ih, 0); */ - /* - * for directory items it is entry count, for directs and stat - * datas - 0xffff, for indirects - 0 - */ - put_ih_entry_count(ih, entry_count); -} - -/* - * FIXME: we might cache recently accessed indirect item - * Ugh. Not too eager for that.... - * I cut the code until such time as I see a convincing argument (benchmark). - * I don't want a bloated inode struct..., and I don't like code complexity.... - */ - -/* - * cutting the code is fine, since it really isn't in use yet and is easy - * to add back in. But, Vladimir has a really good idea here. Think - * about what happens for reading a file. For each page, - * The VFS layer calls reiserfs_read_folio, who searches the tree to find - * an indirect item. This indirect item has X number of pointers, where - * X is a big number if we've done the block allocation right. But, - * we only use one or two of these pointers during each call to read_folio, - * needlessly researching again later on. - * - * The size of the cache could be dynamic based on the size of the file. - * - * I'd also like to see us cache the location the stat data item, since - * we are needlessly researching for that frequently. - * - * --chris - */ - -/* - * If this page has a file tail in it, and - * it was read in by get_block_create_0, the page data is valid, - * but tail is still sitting in a direct item, and we can't write to - * it. So, look through this page, and check all the mapped buffers - * to make sure they have valid block numbers. Any that don't need - * to be unmapped, so that __block_write_begin will correctly call - * reiserfs_get_block to convert the tail into an unformatted node - */ -static inline void fix_tail_page_for_writing(struct page *page) -{ - struct buffer_head *head, *next, *bh; - - if (page && page_has_buffers(page)) { - head = page_buffers(page); - bh = head; - do { - next = bh->b_this_page; - if (buffer_mapped(bh) && bh->b_blocknr == 0) { - reiserfs_unmap_buffer(bh); - } - bh = next; - } while (bh != head); - } -} - -/* - * reiserfs_get_block does not need to allocate a block only if it has been - * done already or non-hole position has been found in the indirect item - */ -static inline int allocation_needed(int retval, b_blocknr_t allocated, - struct item_head *ih, - __le32 * item, int pos_in_item) -{ - if (allocated) - return 0; - if (retval == POSITION_FOUND && is_indirect_le_ih(ih) && - get_block_num(item, pos_in_item)) - return 0; - return 1; -} - -static inline int indirect_item_found(int retval, struct item_head *ih) -{ - return (retval == POSITION_FOUND) && is_indirect_le_ih(ih); -} - -static inline void set_block_dev_mapped(struct buffer_head *bh, - b_blocknr_t block, struct inode *inode) -{ - map_bh(bh, inode->i_sb, block); -} - -/* - * files which were created in the earlier version can not be longer, - * than 2 gb - */ -static int file_capable(struct inode *inode, sector_t block) -{ - /* it is new file. */ - if (get_inode_item_key_version(inode) != KEY_FORMAT_3_5 || - /* old file, but 'block' is inside of 2gb */ - block < (1 << (31 - inode->i_sb->s_blocksize_bits))) - return 1; - - return 0; -} - -static int restart_transaction(struct reiserfs_transaction_handle *th, - struct inode *inode, struct treepath *path) -{ - struct super_block *s = th->t_super; - int err; - - BUG_ON(!th->t_trans_id); - BUG_ON(!th->t_refcount); - - pathrelse(path); - - /* we cannot restart while nested */ - if (th->t_refcount > 1) { - return 0; - } - reiserfs_update_sd(th, inode); - err = journal_end(th); - if (!err) { - err = journal_begin(th, s, JOURNAL_PER_BALANCE_CNT * 6); - if (!err) - reiserfs_update_inode_transaction(inode); - } - return err; -} - -/* - * it is called by get_block when create == 0. Returns block number - * for 'block'-th logical block of file. When it hits direct item it - * returns 0 (being called from bmap) or read direct item into piece - * of page (bh_result) - * Please improve the english/clarity in the comment above, as it is - * hard to understand. - */ -static int _get_block_create_0(struct inode *inode, sector_t block, - struct buffer_head *bh_result, int args) -{ - INITIALIZE_PATH(path); - struct cpu_key key; - struct buffer_head *bh; - struct item_head *ih, tmp_ih; - b_blocknr_t blocknr; - char *p; - int chars; - int ret; - int result; - int done = 0; - unsigned long offset; - - /* prepare the key to look for the 'block'-th block of file */ - make_cpu_key(&key, inode, - (loff_t) block * inode->i_sb->s_blocksize + 1, TYPE_ANY, - 3); - - result = search_for_position_by_key(inode->i_sb, &key, &path); - if (result != POSITION_FOUND) { - pathrelse(&path); - if (result == IO_ERROR) - return -EIO; - /* - * We do not return -ENOENT if there is a hole but page is - * uptodate, because it means that there is some MMAPED data - * associated with it that is yet to be written to disk. - */ - if ((args & GET_BLOCK_NO_HOLE) - && !PageUptodate(bh_result->b_page)) { - return -ENOENT; - } - return 0; - } - - bh = get_last_bh(&path); - ih = tp_item_head(&path); - if (is_indirect_le_ih(ih)) { - __le32 *ind_item = (__le32 *) ih_item_body(bh, ih); - - /* - * FIXME: here we could cache indirect item or part of it in - * the inode to avoid search_by_key in case of subsequent - * access to file - */ - blocknr = get_block_num(ind_item, path.pos_in_item); - ret = 0; - if (blocknr) { - map_bh(bh_result, inode->i_sb, blocknr); - if (path.pos_in_item == - ((ih_item_len(ih) / UNFM_P_SIZE) - 1)) { - set_buffer_boundary(bh_result); - } - } else - /* - * We do not return -ENOENT if there is a hole but - * page is uptodate, because it means that there is - * some MMAPED data associated with it that is - * yet to be written to disk. - */ - if ((args & GET_BLOCK_NO_HOLE) - && !PageUptodate(bh_result->b_page)) { - ret = -ENOENT; - } - - pathrelse(&path); - return ret; - } - /* requested data are in direct item(s) */ - if (!(args & GET_BLOCK_READ_DIRECT)) { - /* - * we are called by bmap. FIXME: we can not map block of file - * when it is stored in direct item(s) - */ - pathrelse(&path); - return -ENOENT; - } - - /* - * if we've got a direct item, and the buffer or page was uptodate, - * we don't want to pull data off disk again. skip to the - * end, where we map the buffer and return - */ - if (buffer_uptodate(bh_result)) { - goto finished; - } else - /* - * grab_tail_page can trigger calls to reiserfs_get_block on - * up to date pages without any buffers. If the page is up - * to date, we don't want read old data off disk. Set the up - * to date bit on the buffer instead and jump to the end - */ - if (!bh_result->b_page || PageUptodate(bh_result->b_page)) { - set_buffer_uptodate(bh_result); - goto finished; - } - /* read file tail into part of page */ - offset = (cpu_key_k_offset(&key) - 1) & (PAGE_SIZE - 1); - copy_item_head(&tmp_ih, ih); - - /* - * we only want to kmap if we are reading the tail into the page. - * this is not the common case, so we don't kmap until we are - * sure we need to. But, this means the item might move if - * kmap schedules - */ - p = (char *)kmap(bh_result->b_page); - p += offset; - memset(p, 0, inode->i_sb->s_blocksize); - do { - if (!is_direct_le_ih(ih)) { - BUG(); - } - /* - * make sure we don't read more bytes than actually exist in - * the file. This can happen in odd cases where i_size isn't - * correct, and when direct item padding results in a few - * extra bytes at the end of the direct item - */ - if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size) - break; - if ((le_ih_k_offset(ih) - 1 + ih_item_len(ih)) > inode->i_size) { - chars = - inode->i_size - (le_ih_k_offset(ih) - 1) - - path.pos_in_item; - done = 1; - } else { - chars = ih_item_len(ih) - path.pos_in_item; - } - memcpy(p, ih_item_body(bh, ih) + path.pos_in_item, chars); - - if (done) - break; - - p += chars; - - /* - * we done, if read direct item is not the last item of - * node FIXME: we could try to check right delimiting key - * to see whether direct item continues in the right - * neighbor or rely on i_size - */ - if (PATH_LAST_POSITION(&path) != (B_NR_ITEMS(bh) - 1)) - break; - - /* update key to look for the next piece */ - set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + chars); - result = search_for_position_by_key(inode->i_sb, &key, &path); - if (result != POSITION_FOUND) - /* i/o error most likely */ - break; - bh = get_last_bh(&path); - ih = tp_item_head(&path); - } while (1); - - flush_dcache_page(bh_result->b_page); - kunmap(bh_result->b_page); - -finished: - pathrelse(&path); - - if (result == IO_ERROR) - return -EIO; - - /* - * this buffer has valid data, but isn't valid for io. mapping it to - * block #0 tells the rest of reiserfs it just has a tail in it - */ - map_bh(bh_result, inode->i_sb, 0); - set_buffer_uptodate(bh_result); - return 0; -} - -/* - * this is called to create file map. So, _get_block_create_0 will not - * read direct item - */ -static int reiserfs_bmap(struct inode *inode, sector_t block, - struct buffer_head *bh_result, int create) -{ - if (!file_capable(inode, block)) - return -EFBIG; - - reiserfs_write_lock(inode->i_sb); - /* do not read the direct item */ - _get_block_create_0(inode, block, bh_result, 0); - reiserfs_write_unlock(inode->i_sb); - return 0; -} - -/* - * special version of get_block that is only used by grab_tail_page right - * now. It is sent to __block_write_begin, and when you try to get a - * block past the end of the file (or a block from a hole) it returns - * -ENOENT instead of a valid buffer. __block_write_begin expects to - * be able to do i/o on the buffers returned, unless an error value - * is also returned. - * - * So, this allows __block_write_begin to be used for reading a single block - * in a page. Where it does not produce a valid page for holes, or past the - * end of the file. This turns out to be exactly what we need for reading - * tails for conversion. - * - * The point of the wrapper is forcing a certain value for create, even - * though the VFS layer is calling this function with create==1. If you - * don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block, - * don't use this function. -*/ -static int reiserfs_get_block_create_0(struct inode *inode, sector_t block, - struct buffer_head *bh_result, - int create) -{ - return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE); -} - -/* - * This is special helper for reiserfs_get_block in case we are executing - * direct_IO request. - */ -static int reiserfs_get_blocks_direct_io(struct inode *inode, - sector_t iblock, - struct buffer_head *bh_result, - int create) -{ - int ret; - - bh_result->b_page = NULL; - - /* - * We set the b_size before reiserfs_get_block call since it is - * referenced in convert_tail_for_hole() that may be called from - * reiserfs_get_block() - */ - bh_result->b_size = i_blocksize(inode); - - ret = reiserfs_get_block(inode, iblock, bh_result, - create | GET_BLOCK_NO_DANGLE); - if (ret) - goto out; - - /* don't allow direct io onto tail pages */ - if (buffer_mapped(bh_result) && bh_result->b_blocknr == 0) { - /* - * make sure future calls to the direct io funcs for this - * offset in the file fail by unmapping the buffer - */ - clear_buffer_mapped(bh_result); - ret = -EINVAL; - } - - /* - * Possible unpacked tail. Flush the data before pages have - * disappeared - */ - if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) { - int err; - - reiserfs_write_lock(inode->i_sb); - - err = reiserfs_commit_for_inode(inode); - REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; - - reiserfs_write_unlock(inode->i_sb); - - if (err < 0) - ret = err; - } -out: - return ret; -} - -/* - * helper function for when reiserfs_get_block is called for a hole - * but the file tail is still in a direct item - * bh_result is the buffer head for the hole - * tail_offset is the offset of the start of the tail in the file - * - * This calls prepare_write, which will start a new transaction - * you should not be in a transaction, or have any paths held when you - * call this. - */ -static int convert_tail_for_hole(struct inode *inode, - struct buffer_head *bh_result, - loff_t tail_offset) -{ - unsigned long index; - unsigned long tail_end; - unsigned long tail_start; - struct page *tail_page; - struct page *hole_page = bh_result->b_page; - int retval = 0; - - if ((tail_offset & (bh_result->b_size - 1)) != 1) - return -EIO; - - /* always try to read until the end of the block */ - tail_start = tail_offset & (PAGE_SIZE - 1); - tail_end = (tail_start | (bh_result->b_size - 1)) + 1; - - index = tail_offset >> PAGE_SHIFT; - /* - * hole_page can be zero in case of direct_io, we are sure - * that we cannot get here if we write with O_DIRECT into tail page - */ - if (!hole_page || index != hole_page->index) { - tail_page = grab_cache_page(inode->i_mapping, index); - retval = -ENOMEM; - if (!tail_page) { - goto out; - } - } else { - tail_page = hole_page; - } - - /* - * we don't have to make sure the conversion did not happen while - * we were locking the page because anyone that could convert - * must first take i_mutex. - * - * We must fix the tail page for writing because it might have buffers - * that are mapped, but have a block number of 0. This indicates tail - * data that has been read directly into the page, and - * __block_write_begin won't trigger a get_block in this case. - */ - fix_tail_page_for_writing(tail_page); - retval = __reiserfs_write_begin(tail_page, tail_start, - tail_end - tail_start); - if (retval) - goto unlock; - - /* tail conversion might change the data in the page */ - flush_dcache_page(tail_page); - - retval = reiserfs_commit_write(NULL, tail_page, tail_start, tail_end); - -unlock: - if (tail_page != hole_page) { - unlock_page(tail_page); - put_page(tail_page); - } -out: - return retval; -} - -static inline int _allocate_block(struct reiserfs_transaction_handle *th, - sector_t block, - struct inode *inode, - b_blocknr_t * allocated_block_nr, - struct treepath *path, int flags) -{ - BUG_ON(!th->t_trans_id); - -#ifdef REISERFS_PREALLOCATE - if (!(flags & GET_BLOCK_NO_IMUX)) { - return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr, - path, block); - } -#endif - return reiserfs_new_unf_blocknrs(th, inode, allocated_block_nr, path, - block); -} - -int reiserfs_get_block(struct inode *inode, sector_t block, - struct buffer_head *bh_result, int create) -{ - int repeat, retval = 0; - /* b_blocknr_t is (unsigned) 32 bit int*/ - b_blocknr_t allocated_block_nr = 0; - INITIALIZE_PATH(path); - int pos_in_item; - struct cpu_key key; - struct buffer_head *bh, *unbh = NULL; - struct item_head *ih, tmp_ih; - __le32 *item; - int done; - int fs_gen; - struct reiserfs_transaction_handle *th = NULL; - /* - * space reserved in transaction batch: - * . 3 balancings in direct->indirect conversion - * . 1 block involved into reiserfs_update_sd() - * XXX in practically impossible worst case direct2indirect() - * can incur (much) more than 3 balancings. - * quota update for user, group - */ - int jbegin_count = - JOURNAL_PER_BALANCE_CNT * 3 + 1 + - 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb); - int version; - int dangle = 1; - loff_t new_offset = - (((loff_t) block) << inode->i_sb->s_blocksize_bits) + 1; - - reiserfs_write_lock(inode->i_sb); - version = get_inode_item_key_version(inode); - - if (!file_capable(inode, block)) { - reiserfs_write_unlock(inode->i_sb); - return -EFBIG; - } - - /* - * if !create, we aren't changing the FS, so we don't need to - * log anything, so we don't need to start a transaction - */ - if (!(create & GET_BLOCK_CREATE)) { - int ret; - /* find number of block-th logical block of the file */ - ret = _get_block_create_0(inode, block, bh_result, - create | GET_BLOCK_READ_DIRECT); - reiserfs_write_unlock(inode->i_sb); - return ret; - } - - /* - * if we're already in a transaction, make sure to close - * any new transactions we start in this func - */ - if ((create & GET_BLOCK_NO_DANGLE) || - reiserfs_transaction_running(inode->i_sb)) - dangle = 0; - - /* - * If file is of such a size, that it might have a tail and - * tails are enabled we should mark it as possibly needing - * tail packing on close - */ - if ((have_large_tails(inode->i_sb) - && inode->i_size < i_block_size(inode) * 4) - || (have_small_tails(inode->i_sb) - && inode->i_size < i_block_size(inode))) - REISERFS_I(inode)->i_flags |= i_pack_on_close_mask; - - /* set the key of the first byte in the 'block'-th block of file */ - make_cpu_key(&key, inode, new_offset, TYPE_ANY, 3 /*key length */ ); - if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) { -start_trans: - th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count); - if (!th) { - retval = -ENOMEM; - goto failure; - } - reiserfs_update_inode_transaction(inode); - } -research: - - retval = search_for_position_by_key(inode->i_sb, &key, &path); - if (retval == IO_ERROR) { - retval = -EIO; - goto failure; - } - - bh = get_last_bh(&path); - ih = tp_item_head(&path); - item = tp_item_body(&path); - pos_in_item = path.pos_in_item; - - fs_gen = get_generation(inode->i_sb); - copy_item_head(&tmp_ih, ih); - - if (allocation_needed - (retval, allocated_block_nr, ih, item, pos_in_item)) { - /* we have to allocate block for the unformatted node */ - if (!th) { - pathrelse(&path); - goto start_trans; - } - - repeat = - _allocate_block(th, block, inode, &allocated_block_nr, - &path, create); - - /* - * restart the transaction to give the journal a chance to free - * some blocks. releases the path, so we have to go back to - * research if we succeed on the second try - */ - if (repeat == NO_DISK_SPACE || repeat == QUOTA_EXCEEDED) { - SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1; - retval = restart_transaction(th, inode, &path); - if (retval) - goto failure; - repeat = - _allocate_block(th, block, inode, - &allocated_block_nr, NULL, create); - - if (repeat != NO_DISK_SPACE && repeat != QUOTA_EXCEEDED) { - goto research; - } - if (repeat == QUOTA_EXCEEDED) - retval = -EDQUOT; - else - retval = -ENOSPC; - goto failure; - } - - if (fs_changed(fs_gen, inode->i_sb) - && item_moved(&tmp_ih, &path)) { - goto research; - } - } - - if (indirect_item_found(retval, ih)) { - b_blocknr_t unfm_ptr; - /* - * 'block'-th block is in the file already (there is - * corresponding cell in some indirect item). But it may be - * zero unformatted node pointer (hole) - */ - unfm_ptr = get_block_num(item, pos_in_item); - if (unfm_ptr == 0) { - /* use allocated block to plug the hole */ - reiserfs_prepare_for_journal(inode->i_sb, bh, 1); - if (fs_changed(fs_gen, inode->i_sb) - && item_moved(&tmp_ih, &path)) { - reiserfs_restore_prepared_buffer(inode->i_sb, - bh); - goto research; - } - set_buffer_new(bh_result); - if (buffer_dirty(bh_result) - && reiserfs_data_ordered(inode->i_sb)) - reiserfs_add_ordered_list(inode, bh_result); - put_block_num(item, pos_in_item, allocated_block_nr); - unfm_ptr = allocated_block_nr; - journal_mark_dirty(th, bh); - reiserfs_update_sd(th, inode); - } - set_block_dev_mapped(bh_result, unfm_ptr, inode); - pathrelse(&path); - retval = 0; - if (!dangle && th) - retval = reiserfs_end_persistent_transaction(th); - - reiserfs_write_unlock(inode->i_sb); - - /* - * the item was found, so new blocks were not added to the file - * there is no need to make sure the inode is updated with this - * transaction - */ - return retval; - } - - if (!th) { - pathrelse(&path); - goto start_trans; - } - - /* - * desired position is not found or is in the direct item. We have - * to append file with holes up to 'block'-th block converting - * direct items to indirect one if necessary - */ - done = 0; - do { - if (is_statdata_le_ih(ih)) { - __le32 unp = 0; - struct cpu_key tmp_key; - - /* indirect item has to be inserted */ - make_le_item_head(&tmp_ih, &key, version, 1, - TYPE_INDIRECT, UNFM_P_SIZE, - 0 /* free_space */ ); - - /* - * we are going to add 'block'-th block to the file. - * Use allocated block for that - */ - if (cpu_key_k_offset(&key) == 1) { - unp = cpu_to_le32(allocated_block_nr); - set_block_dev_mapped(bh_result, - allocated_block_nr, inode); - set_buffer_new(bh_result); - done = 1; - } - tmp_key = key; /* ;) */ - set_cpu_key_k_offset(&tmp_key, 1); - PATH_LAST_POSITION(&path)++; - - retval = - reiserfs_insert_item(th, &path, &tmp_key, &tmp_ih, - inode, (char *)&unp); - if (retval) { - reiserfs_free_block(th, inode, - allocated_block_nr, 1); - /* - * retval == -ENOSPC, -EDQUOT or -EIO - * or -EEXIST - */ - goto failure; - } - } else if (is_direct_le_ih(ih)) { - /* direct item has to be converted */ - loff_t tail_offset; - - tail_offset = - ((le_ih_k_offset(ih) - - 1) & ~(inode->i_sb->s_blocksize - 1)) + 1; - - /* - * direct item we just found fits into block we have - * to map. Convert it into unformatted node: use - * bh_result for the conversion - */ - if (tail_offset == cpu_key_k_offset(&key)) { - set_block_dev_mapped(bh_result, - allocated_block_nr, inode); - unbh = bh_result; - done = 1; - } else { - /* - * we have to pad file tail stored in direct - * item(s) up to block size and convert it - * to unformatted node. FIXME: this should - * also get into page cache - */ - - pathrelse(&path); - /* - * ugly, but we can only end the transaction if - * we aren't nested - */ - BUG_ON(!th->t_refcount); - if (th->t_refcount == 1) { - retval = - reiserfs_end_persistent_transaction - (th); - th = NULL; - if (retval) - goto failure; - } - - retval = - convert_tail_for_hole(inode, bh_result, - tail_offset); - if (retval) { - if (retval != -ENOSPC) - reiserfs_error(inode->i_sb, - "clm-6004", - "convert tail failed " - "inode %lu, error %d", - inode->i_ino, - retval); - if (allocated_block_nr) { - /* - * the bitmap, the super, - * and the stat data == 3 - */ - if (!th) - th = reiserfs_persistent_transaction(inode->i_sb, 3); - if (th) - reiserfs_free_block(th, - inode, - allocated_block_nr, - 1); - } - goto failure; - } - goto research; - } - retval = - direct2indirect(th, inode, &path, unbh, - tail_offset); - if (retval) { - reiserfs_unmap_buffer(unbh); - reiserfs_free_block(th, inode, - allocated_block_nr, 1); - goto failure; - } - /* - * it is important the set_buffer_uptodate is done - * after the direct2indirect. The buffer might - * contain valid data newer than the data on disk - * (read by read_folio, changed, and then sent here by - * writepage). direct2indirect needs to know if unbh - * was already up to date, so it can decide if the - * data in unbh needs to be replaced with data from - * the disk - */ - set_buffer_uptodate(unbh); - - /* - * unbh->b_page == NULL in case of DIRECT_IO request, - * this means buffer will disappear shortly, so it - * should not be added to - */ - if (unbh->b_page) { - /* - * we've converted the tail, so we must - * flush unbh before the transaction commits - */ - reiserfs_add_tail_list(inode, unbh); - - /* - * mark it dirty now to prevent commit_write - * from adding this buffer to the inode's - * dirty buffer list - */ - /* - * AKPM: changed __mark_buffer_dirty to - * mark_buffer_dirty(). It's still atomic, - * but it sets the page dirty too, which makes - * it eligible for writeback at any time by the - * VM (which was also the case with - * __mark_buffer_dirty()) - */ - mark_buffer_dirty(unbh); - } - } else { - /* - * append indirect item with holes if needed, when - * appending pointer to 'block'-th block use block, - * which is already allocated - */ - struct cpu_key tmp_key; - /* - * We use this in case we need to allocate - * only one block which is a fastpath - */ - unp_t unf_single = 0; - unp_t *un; - __u64 max_to_insert = - MAX_ITEM_LEN(inode->i_sb->s_blocksize) / - UNFM_P_SIZE; - __u64 blocks_needed; - - RFALSE(pos_in_item != ih_item_len(ih) / UNFM_P_SIZE, - "vs-804: invalid position for append"); - /* - * indirect item has to be appended, - * set up key of that position - * (key type is unimportant) - */ - make_cpu_key(&tmp_key, inode, - le_key_k_offset(version, - &ih->ih_key) + - op_bytes_number(ih, - inode->i_sb->s_blocksize), - TYPE_INDIRECT, 3); - - RFALSE(cpu_key_k_offset(&tmp_key) > cpu_key_k_offset(&key), - "green-805: invalid offset"); - blocks_needed = - 1 + - ((cpu_key_k_offset(&key) - - cpu_key_k_offset(&tmp_key)) >> inode->i_sb-> - s_blocksize_bits); - - if (blocks_needed == 1) { - un = &unf_single; - } else { - un = kcalloc(min(blocks_needed, max_to_insert), - UNFM_P_SIZE, GFP_NOFS); - if (!un) { - un = &unf_single; - blocks_needed = 1; - max_to_insert = 0; - } - } - if (blocks_needed <= max_to_insert) { - /* - * we are going to add target block to - * the file. Use allocated block for that - */ - un[blocks_needed - 1] = - cpu_to_le32(allocated_block_nr); - set_block_dev_mapped(bh_result, - allocated_block_nr, inode); - set_buffer_new(bh_result); - done = 1; - } else { - /* paste hole to the indirect item */ - /* - * If kcalloc failed, max_to_insert becomes - * zero and it means we only have space for - * one block - */ - blocks_needed = - max_to_insert ? max_to_insert : 1; - } - retval = - reiserfs_paste_into_item(th, &path, &tmp_key, inode, - (char *)un, - UNFM_P_SIZE * - blocks_needed); - - if (blocks_needed != 1) - kfree(un); - - if (retval) { - reiserfs_free_block(th, inode, - allocated_block_nr, 1); - goto failure; - } - if (!done) { - /* - * We need to mark new file size in case - * this function will be interrupted/aborted - * later on. And we may do this only for - * holes. - */ - inode->i_size += - inode->i_sb->s_blocksize * blocks_needed; - } - } - - if (done == 1) - break; - - /* - * this loop could log more blocks than we had originally - * asked for. So, we have to allow the transaction to end - * if it is too big or too full. Update the inode so things - * are consistent if we crash before the function returns - * release the path so that anybody waiting on the path before - * ending their transaction will be able to continue. - */ - if (journal_transaction_should_end(th, th->t_blocks_allocated)) { - retval = restart_transaction(th, inode, &path); - if (retval) - goto failure; - } - /* - * inserting indirect pointers for a hole can take a - * long time. reschedule if needed and also release the write - * lock for others. - */ - reiserfs_cond_resched(inode->i_sb); - - retval = search_for_position_by_key(inode->i_sb, &key, &path); - if (retval == IO_ERROR) { - retval = -EIO; - goto failure; - } - if (retval == POSITION_FOUND) { - reiserfs_warning(inode->i_sb, "vs-825", - "%K should not be found", &key); - retval = -EEXIST; - if (allocated_block_nr) - reiserfs_free_block(th, inode, - allocated_block_nr, 1); - pathrelse(&path); - goto failure; - } - bh = get_last_bh(&path); - ih = tp_item_head(&path); - item = tp_item_body(&path); - pos_in_item = path.pos_in_item; - } while (1); - - retval = 0; - -failure: - if (th && (!dangle || (retval && !th->t_trans_id))) { - int err; - if (th->t_trans_id) - reiserfs_update_sd(th, inode); - err = reiserfs_end_persistent_transaction(th); - if (err) - retval = err; - } - - reiserfs_write_unlock(inode->i_sb); - reiserfs_check_path(&path); - return retval; -} - -static void reiserfs_readahead(struct readahead_control *rac) -{ - mpage_readahead(rac, reiserfs_get_block); -} - -/* - * Compute real number of used bytes by file - * Following three functions can go away when we'll have enough space in - * stat item - */ -static int real_space_diff(struct inode *inode, int sd_size) -{ - int bytes; - loff_t blocksize = inode->i_sb->s_blocksize; - - if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) - return sd_size; - - /* - * End of file is also in full block with indirect reference, so round - * up to the next block. - * - * there is just no way to know if the tail is actually packed - * on the file, so we have to assume it isn't. When we pack the - * tail, we add 4 bytes to pretend there really is an unformatted - * node pointer - */ - bytes = - ((inode->i_size + - (blocksize - 1)) >> inode->i_sb->s_blocksize_bits) * UNFM_P_SIZE + - sd_size; - return bytes; -} - -static inline loff_t to_real_used_space(struct inode *inode, ulong blocks, - int sd_size) -{ - if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) { - return inode->i_size + - (loff_t) (real_space_diff(inode, sd_size)); - } - return ((loff_t) real_space_diff(inode, sd_size)) + - (((loff_t) blocks) << 9); -} - -/* Compute number of blocks used by file in ReiserFS counting */ -static inline ulong to_fake_used_blocks(struct inode *inode, int sd_size) -{ - loff_t bytes = inode_get_bytes(inode); - loff_t real_space = real_space_diff(inode, sd_size); - - /* keeps fsck and non-quota versions of reiserfs happy */ - if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) { - bytes += (loff_t) 511; - } - - /* - * files from before the quota patch might i_blocks such that - * bytes < real_space. Deal with that here to prevent it from - * going negative. - */ - if (bytes < real_space) - return 0; - return (bytes - real_space) >> 9; -} - -/* - * BAD: new directories have stat data of new type and all other items - * of old type. Version stored in the inode says about body items, so - * in update_stat_data we can not rely on inode, but have to check - * item version directly - */ - -/* called by read_locked_inode */ -static void init_inode(struct inode *inode, struct treepath *path) -{ - struct buffer_head *bh; - struct item_head *ih; - __u32 rdev; - - bh = PATH_PLAST_BUFFER(path); - ih = tp_item_head(path); - - copy_key(INODE_PKEY(inode), &ih->ih_key); - - INIT_LIST_HEAD(&REISERFS_I(inode)->i_prealloc_list); - REISERFS_I(inode)->i_flags = 0; - REISERFS_I(inode)->i_prealloc_block = 0; - REISERFS_I(inode)->i_prealloc_count = 0; - REISERFS_I(inode)->i_trans_id = 0; - REISERFS_I(inode)->i_jl = NULL; - reiserfs_init_xattr_rwsem(inode); - - if (stat_data_v1(ih)) { - struct stat_data_v1 *sd = - (struct stat_data_v1 *)ih_item_body(bh, ih); - unsigned long blocks; - - set_inode_item_key_version(inode, KEY_FORMAT_3_5); - set_inode_sd_version(inode, STAT_DATA_V1); - inode->i_mode = sd_v1_mode(sd); - set_nlink(inode, sd_v1_nlink(sd)); - i_uid_write(inode, sd_v1_uid(sd)); - i_gid_write(inode, sd_v1_gid(sd)); - inode->i_size = sd_v1_size(sd); - inode_set_atime(inode, sd_v1_atime(sd), 0); - inode_set_mtime(inode, sd_v1_mtime(sd), 0); - inode_set_ctime(inode, sd_v1_ctime(sd), 0); - - inode->i_blocks = sd_v1_blocks(sd); - inode->i_generation = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); - blocks = (inode->i_size + 511) >> 9; - blocks = _ROUND_UP(blocks, inode->i_sb->s_blocksize >> 9); - - /* - * there was a bug in <=3.5.23 when i_blocks could take - * negative values. Starting from 3.5.17 this value could - * even be stored in stat data. For such files we set - * i_blocks based on file size. Just 2 notes: this can be - * wrong for sparse files. On-disk value will be only - * updated if file's inode will ever change - */ - if (inode->i_blocks > blocks) { - inode->i_blocks = blocks; - } - - rdev = sd_v1_rdev(sd); - REISERFS_I(inode)->i_first_direct_byte = - sd_v1_first_direct_byte(sd); - - /* - * an early bug in the quota code can give us an odd - * number for the block count. This is incorrect, fix it here. - */ - if (inode->i_blocks & 1) { - inode->i_blocks++; - } - inode_set_bytes(inode, - to_real_used_space(inode, inode->i_blocks, - SD_V1_SIZE)); - /* - * nopack is initially zero for v1 objects. For v2 objects, - * nopack is initialised from sd_attrs - */ - REISERFS_I(inode)->i_flags &= ~i_nopack_mask; - } else { - /* - * new stat data found, but object may have old items - * (directories and symlinks) - */ - struct stat_data *sd = (struct stat_data *)ih_item_body(bh, ih); - - inode->i_mode = sd_v2_mode(sd); - set_nlink(inode, sd_v2_nlink(sd)); - i_uid_write(inode, sd_v2_uid(sd)); - inode->i_size = sd_v2_size(sd); - i_gid_write(inode, sd_v2_gid(sd)); - inode_set_mtime(inode, sd_v2_mtime(sd), 0); - inode_set_atime(inode, sd_v2_atime(sd), 0); - inode_set_ctime(inode, sd_v2_ctime(sd), 0); - inode->i_blocks = sd_v2_blocks(sd); - rdev = sd_v2_rdev(sd); - if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) - inode->i_generation = - le32_to_cpu(INODE_PKEY(inode)->k_dir_id); - else - inode->i_generation = sd_v2_generation(sd); - - if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - set_inode_item_key_version(inode, KEY_FORMAT_3_5); - else - set_inode_item_key_version(inode, KEY_FORMAT_3_6); - REISERFS_I(inode)->i_first_direct_byte = 0; - set_inode_sd_version(inode, STAT_DATA_V2); - inode_set_bytes(inode, - to_real_used_space(inode, inode->i_blocks, - SD_V2_SIZE)); - /* - * read persistent inode attributes from sd and initialise - * generic inode flags from them - */ - REISERFS_I(inode)->i_attrs = sd_v2_attrs(sd); - sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode); - } - - pathrelse(path); - if (S_ISREG(inode->i_mode)) { - inode->i_op = &reiserfs_file_inode_operations; - inode->i_fop = &reiserfs_file_operations; - inode->i_mapping->a_ops = &reiserfs_address_space_operations; - } else if (S_ISDIR(inode->i_mode)) { - inode->i_op = &reiserfs_dir_inode_operations; - inode->i_fop = &reiserfs_dir_operations; - } else if (S_ISLNK(inode->i_mode)) { - inode->i_op = &reiserfs_symlink_inode_operations; - inode_nohighmem(inode); - inode->i_mapping->a_ops = &reiserfs_address_space_operations; - } else { - inode->i_blocks = 0; - inode->i_op = &reiserfs_special_inode_operations; - init_special_inode(inode, inode->i_mode, new_decode_dev(rdev)); - } -} - -/* update new stat data with inode fields */ -static void inode2sd(void *sd, struct inode *inode, loff_t size) -{ - struct stat_data *sd_v2 = (struct stat_data *)sd; - - set_sd_v2_mode(sd_v2, inode->i_mode); - set_sd_v2_nlink(sd_v2, inode->i_nlink); - set_sd_v2_uid(sd_v2, i_uid_read(inode)); - set_sd_v2_size(sd_v2, size); - set_sd_v2_gid(sd_v2, i_gid_read(inode)); - set_sd_v2_mtime(sd_v2, inode_get_mtime_sec(inode)); - set_sd_v2_atime(sd_v2, inode_get_atime_sec(inode)); - set_sd_v2_ctime(sd_v2, inode_get_ctime_sec(inode)); - set_sd_v2_blocks(sd_v2, to_fake_used_blocks(inode, SD_V2_SIZE)); - if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) - set_sd_v2_rdev(sd_v2, new_encode_dev(inode->i_rdev)); - else - set_sd_v2_generation(sd_v2, inode->i_generation); - set_sd_v2_attrs(sd_v2, REISERFS_I(inode)->i_attrs); -} - -/* used to copy inode's fields to old stat data */ -static void inode2sd_v1(void *sd, struct inode *inode, loff_t size) -{ - struct stat_data_v1 *sd_v1 = (struct stat_data_v1 *)sd; - - set_sd_v1_mode(sd_v1, inode->i_mode); - set_sd_v1_uid(sd_v1, i_uid_read(inode)); - set_sd_v1_gid(sd_v1, i_gid_read(inode)); - set_sd_v1_nlink(sd_v1, inode->i_nlink); - set_sd_v1_size(sd_v1, size); - set_sd_v1_atime(sd_v1, inode_get_atime_sec(inode)); - set_sd_v1_ctime(sd_v1, inode_get_ctime_sec(inode)); - set_sd_v1_mtime(sd_v1, inode_get_mtime_sec(inode)); - - if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) - set_sd_v1_rdev(sd_v1, new_encode_dev(inode->i_rdev)); - else - set_sd_v1_blocks(sd_v1, to_fake_used_blocks(inode, SD_V1_SIZE)); - - /* Sigh. i_first_direct_byte is back */ - set_sd_v1_first_direct_byte(sd_v1, - REISERFS_I(inode)->i_first_direct_byte); -} - -/* - * NOTE, you must prepare the buffer head before sending it here, - * and then log it after the call - */ -static void update_stat_data(struct treepath *path, struct inode *inode, - loff_t size) -{ - struct buffer_head *bh; - struct item_head *ih; - - bh = PATH_PLAST_BUFFER(path); - ih = tp_item_head(path); - - if (!is_statdata_le_ih(ih)) - reiserfs_panic(inode->i_sb, "vs-13065", "key %k, found item %h", - INODE_PKEY(inode), ih); - - /* path points to old stat data */ - if (stat_data_v1(ih)) { - inode2sd_v1(ih_item_body(bh, ih), inode, size); - } else { - inode2sd(ih_item_body(bh, ih), inode, size); - } - - return; -} - -void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th, - struct inode *inode, loff_t size) -{ - struct cpu_key key; - INITIALIZE_PATH(path); - struct buffer_head *bh; - int fs_gen; - struct item_head *ih, tmp_ih; - int retval; - - BUG_ON(!th->t_trans_id); - - /* key type is unimportant */ - make_cpu_key(&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3); - - for (;;) { - int pos; - /* look for the object's stat data */ - retval = search_item(inode->i_sb, &key, &path); - if (retval == IO_ERROR) { - reiserfs_error(inode->i_sb, "vs-13050", - "i/o failure occurred trying to " - "update %K stat data", &key); - return; - } - if (retval == ITEM_NOT_FOUND) { - pos = PATH_LAST_POSITION(&path); - pathrelse(&path); - if (inode->i_nlink == 0) { - /*reiserfs_warning (inode->i_sb, "vs-13050: reiserfs_update_sd: i_nlink == 0, stat data not found"); */ - return; - } - reiserfs_warning(inode->i_sb, "vs-13060", - "stat data of object %k (nlink == %d) " - "not found (pos %d)", - INODE_PKEY(inode), inode->i_nlink, - pos); - reiserfs_check_path(&path); - return; - } - - /* - * sigh, prepare_for_journal might schedule. When it - * schedules the FS might change. We have to detect that, - * and loop back to the search if the stat data item has moved - */ - bh = get_last_bh(&path); - ih = tp_item_head(&path); - copy_item_head(&tmp_ih, ih); - fs_gen = get_generation(inode->i_sb); - reiserfs_prepare_for_journal(inode->i_sb, bh, 1); - - /* Stat_data item has been moved after scheduling. */ - if (fs_changed(fs_gen, inode->i_sb) - && item_moved(&tmp_ih, &path)) { - reiserfs_restore_prepared_buffer(inode->i_sb, bh); - continue; - } - break; - } - update_stat_data(&path, inode, size); - journal_mark_dirty(th, bh); - pathrelse(&path); - return; -} - -/* - * reiserfs_read_locked_inode is called to read the inode off disk, and it - * does a make_bad_inode when things go wrong. But, we need to make sure - * and clear the key in the private portion of the inode, otherwise a - * corresponding iput might try to delete whatever object the inode last - * represented. - */ -static void reiserfs_make_bad_inode(struct inode *inode) -{ - memset(INODE_PKEY(inode), 0, KEY_SIZE); - make_bad_inode(inode); -} - -/* - * initially this function was derived from minix or ext2's analog and - * evolved as the prototype did - */ -int reiserfs_init_locked_inode(struct inode *inode, void *p) -{ - struct reiserfs_iget_args *args = (struct reiserfs_iget_args *)p; - inode->i_ino = args->objectid; - INODE_PKEY(inode)->k_dir_id = cpu_to_le32(args->dirid); - return 0; -} - -/* - * looks for stat data in the tree, and fills up the fields of in-core - * inode stat data fields - */ -void reiserfs_read_locked_inode(struct inode *inode, - struct reiserfs_iget_args *args) -{ - INITIALIZE_PATH(path_to_sd); - struct cpu_key key; - unsigned long dirino; - int retval; - - dirino = args->dirid; - - /* - * set version 1, version 2 could be used too, because stat data - * key is the same in both versions - */ - _make_cpu_key(&key, KEY_FORMAT_3_5, dirino, inode->i_ino, 0, 0, 3); - - /* look for the object's stat data */ - retval = search_item(inode->i_sb, &key, &path_to_sd); - if (retval == IO_ERROR) { - reiserfs_error(inode->i_sb, "vs-13070", - "i/o failure occurred trying to find " - "stat data of %K", &key); - reiserfs_make_bad_inode(inode); - return; - } - - /* a stale NFS handle can trigger this without it being an error */ - if (retval != ITEM_FOUND) { - pathrelse(&path_to_sd); - reiserfs_make_bad_inode(inode); - clear_nlink(inode); - return; - } - - init_inode(inode, &path_to_sd); - - /* - * It is possible that knfsd is trying to access inode of a file - * that is being removed from the disk by some other thread. As we - * update sd on unlink all that is required is to check for nlink - * here. This bug was first found by Sizif when debugging - * SquidNG/Butterfly, forgotten, and found again after Philippe - * Gramoulle reproduced it. - - * More logical fix would require changes in fs/inode.c:iput() to - * remove inode from hash-table _after_ fs cleaned disk stuff up and - * in iget() to return NULL if I_FREEING inode is found in - * hash-table. - */ - - /* - * Currently there is one place where it's ok to meet inode with - * nlink==0: processing of open-unlinked and half-truncated files - * during mount (fs/reiserfs/super.c:finish_unfinished()). - */ - if ((inode->i_nlink == 0) && - !REISERFS_SB(inode->i_sb)->s_is_unlinked_ok) { - reiserfs_warning(inode->i_sb, "vs-13075", - "dead inode read from disk %K. " - "This is likely to be race with knfsd. Ignore", - &key); - reiserfs_make_bad_inode(inode); - } - - /* init inode should be relsing */ - reiserfs_check_path(&path_to_sd); - - /* - * Stat data v1 doesn't support ACLs. - */ - if (get_inode_sd_version(inode) == STAT_DATA_V1) - cache_no_acl(inode); -} - -/* - * reiserfs_find_actor() - "find actor" reiserfs supplies to iget5_locked(). - * - * @inode: inode from hash table to check - * @opaque: "cookie" passed to iget5_locked(). This is &reiserfs_iget_args. - * - * This function is called by iget5_locked() to distinguish reiserfs inodes - * having the same inode numbers. Such inodes can only exist due to some - * error condition. One of them should be bad. Inodes with identical - * inode numbers (objectids) are distinguished by parent directory ids. - * - */ -int reiserfs_find_actor(struct inode *inode, void *opaque) -{ - struct reiserfs_iget_args *args; - - args = opaque; - /* args is already in CPU order */ - return (inode->i_ino == args->objectid) && - (le32_to_cpu(INODE_PKEY(inode)->k_dir_id) == args->dirid); -} - -struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key) -{ - struct inode *inode; - struct reiserfs_iget_args args; - int depth; - - args.objectid = key->on_disk_key.k_objectid; - args.dirid = key->on_disk_key.k_dir_id; - depth = reiserfs_write_unlock_nested(s); - inode = iget5_locked(s, key->on_disk_key.k_objectid, - reiserfs_find_actor, reiserfs_init_locked_inode, - (void *)(&args)); - reiserfs_write_lock_nested(s, depth); - if (!inode) - return ERR_PTR(-ENOMEM); - - if (inode->i_state & I_NEW) { - reiserfs_read_locked_inode(inode, &args); - unlock_new_inode(inode); - } - - if (comp_short_keys(INODE_PKEY(inode), key) || is_bad_inode(inode)) { - /* either due to i/o error or a stale NFS handle */ - iput(inode); - inode = NULL; - } - return inode; -} - -static struct dentry *reiserfs_get_dentry(struct super_block *sb, - u32 objectid, u32 dir_id, u32 generation) - -{ - struct cpu_key key; - struct inode *inode; - - key.on_disk_key.k_objectid = objectid; - key.on_disk_key.k_dir_id = dir_id; - reiserfs_write_lock(sb); - inode = reiserfs_iget(sb, &key); - if (inode && !IS_ERR(inode) && generation != 0 && - generation != inode->i_generation) { - iput(inode); - inode = NULL; - } - reiserfs_write_unlock(sb); - - return d_obtain_alias(inode); -} - -struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, - int fh_len, int fh_type) -{ - /* - * fhtype happens to reflect the number of u32s encoded. - * due to a bug in earlier code, fhtype might indicate there - * are more u32s then actually fitted. - * so if fhtype seems to be more than len, reduce fhtype. - * Valid types are: - * 2 - objectid + dir_id - legacy support - * 3 - objectid + dir_id + generation - * 4 - objectid + dir_id + objectid and dirid of parent - legacy - * 5 - objectid + dir_id + generation + objectid and dirid of parent - * 6 - as above plus generation of directory - * 6 does not fit in NFSv2 handles - */ - if (fh_type > fh_len) { - if (fh_type != 6 || fh_len != 5) - reiserfs_warning(sb, "reiserfs-13077", - "nfsd/reiserfs, fhtype=%d, len=%d - odd", - fh_type, fh_len); - fh_type = fh_len; - } - if (fh_len < 2) - return NULL; - - return reiserfs_get_dentry(sb, fid->raw[0], fid->raw[1], - (fh_type == 3 || fh_type >= 5) ? fid->raw[2] : 0); -} - -struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid, - int fh_len, int fh_type) -{ - if (fh_type > fh_len) - fh_type = fh_len; - if (fh_type < 4) - return NULL; - - return reiserfs_get_dentry(sb, - (fh_type >= 5) ? fid->raw[3] : fid->raw[2], - (fh_type >= 5) ? fid->raw[4] : fid->raw[3], - (fh_type == 6) ? fid->raw[5] : 0); -} - -int reiserfs_encode_fh(struct inode *inode, __u32 * data, int *lenp, - struct inode *parent) -{ - int maxlen = *lenp; - - if (parent && (maxlen < 5)) { - *lenp = 5; - return FILEID_INVALID; - } else if (maxlen < 3) { - *lenp = 3; - return FILEID_INVALID; - } - - data[0] = inode->i_ino; - data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); - data[2] = inode->i_generation; - *lenp = 3; - if (parent) { - data[3] = parent->i_ino; - data[4] = le32_to_cpu(INODE_PKEY(parent)->k_dir_id); - *lenp = 5; - if (maxlen >= 6) { - data[5] = parent->i_generation; - *lenp = 6; - } - } - return *lenp; -} - -/* - * looks for stat data, then copies fields to it, marks the buffer - * containing stat data as dirty - */ -/* - * reiserfs inodes are never really dirty, since the dirty inode call - * always logs them. This call allows the VFS inode marking routines - * to properly mark inodes for datasync and such, but only actually - * does something when called for a synchronous update. - */ -int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc) -{ - struct reiserfs_transaction_handle th; - int jbegin_count = 1; - - if (sb_rdonly(inode->i_sb)) - return -EROFS; - /* - * memory pressure can sometimes initiate write_inode calls with - * sync == 1, - * these cases are just when the system needs ram, not when the - * inode needs to reach disk for safety, and they can safely be - * ignored because the altered inode has already been logged. - */ - if (wbc->sync_mode == WB_SYNC_ALL && !(current->flags & PF_MEMALLOC)) { - reiserfs_write_lock(inode->i_sb); - if (!journal_begin(&th, inode->i_sb, jbegin_count)) { - reiserfs_update_sd(&th, inode); - journal_end_sync(&th); - } - reiserfs_write_unlock(inode->i_sb); - } - return 0; -} - -/* - * stat data of new object is inserted already, this inserts the item - * containing "." and ".." entries - */ -static int reiserfs_new_directory(struct reiserfs_transaction_handle *th, - struct inode *inode, - struct item_head *ih, struct treepath *path, - struct inode *dir) -{ - struct super_block *sb = th->t_super; - char empty_dir[EMPTY_DIR_SIZE]; - char *body = empty_dir; - struct cpu_key key; - int retval; - - BUG_ON(!th->t_trans_id); - - _make_cpu_key(&key, KEY_FORMAT_3_5, le32_to_cpu(ih->ih_key.k_dir_id), - le32_to_cpu(ih->ih_key.k_objectid), DOT_OFFSET, - TYPE_DIRENTRY, 3 /*key length */ ); - - /* - * compose item head for new item. Directories consist of items of - * old type (ITEM_VERSION_1). Do not set key (second arg is 0), it - * is done by reiserfs_new_inode - */ - if (old_format_only(sb)) { - make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET, - TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2); - - make_empty_dir_item_v1(body, ih->ih_key.k_dir_id, - ih->ih_key.k_objectid, - INODE_PKEY(dir)->k_dir_id, - INODE_PKEY(dir)->k_objectid); - } else { - make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET, - TYPE_DIRENTRY, EMPTY_DIR_SIZE, 2); - - make_empty_dir_item(body, ih->ih_key.k_dir_id, - ih->ih_key.k_objectid, - INODE_PKEY(dir)->k_dir_id, - INODE_PKEY(dir)->k_objectid); - } - - /* look for place in the tree for new item */ - retval = search_item(sb, &key, path); - if (retval == IO_ERROR) { - reiserfs_error(sb, "vs-13080", - "i/o failure occurred creating new directory"); - return -EIO; - } - if (retval == ITEM_FOUND) { - pathrelse(path); - reiserfs_warning(sb, "vs-13070", - "object with this key exists (%k)", - &(ih->ih_key)); - return -EEXIST; - } - - /* insert item, that is empty directory item */ - return reiserfs_insert_item(th, path, &key, ih, inode, body); -} - -/* - * stat data of object has been inserted, this inserts the item - * containing the body of symlink - */ -static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, - struct inode *inode, - struct item_head *ih, - struct treepath *path, const char *symname, - int item_len) -{ - struct super_block *sb = th->t_super; - struct cpu_key key; - int retval; - - BUG_ON(!th->t_trans_id); - - _make_cpu_key(&key, KEY_FORMAT_3_5, - le32_to_cpu(ih->ih_key.k_dir_id), - le32_to_cpu(ih->ih_key.k_objectid), - 1, TYPE_DIRECT, 3 /*key length */ ); - - make_le_item_head(ih, NULL, KEY_FORMAT_3_5, 1, TYPE_DIRECT, item_len, - 0 /*free_space */ ); - - /* look for place in the tree for new item */ - retval = search_item(sb, &key, path); - if (retval == IO_ERROR) { - reiserfs_error(sb, "vs-13080", - "i/o failure occurred creating new symlink"); - return -EIO; - } - if (retval == ITEM_FOUND) { - pathrelse(path); - reiserfs_warning(sb, "vs-13080", - "object with this key exists (%k)", - &(ih->ih_key)); - return -EEXIST; - } - - /* insert item, that is body of symlink */ - return reiserfs_insert_item(th, path, &key, ih, inode, symname); -} - -/* - * inserts the stat data into the tree, and then calls - * reiserfs_new_directory (to insert ".", ".." item if new object is - * directory) or reiserfs_new_symlink (to insert symlink body if new - * object is symlink) or nothing (if new object is regular file) - - * NOTE! uid and gid must already be set in the inode. If we return - * non-zero due to an error, we have to drop the quota previously allocated - * for the fresh inode. This can only be done outside a transaction, so - * if we return non-zero, we also end the transaction. - * - * @th: active transaction handle - * @dir: parent directory for new inode - * @mode: mode of new inode - * @symname: symlink contents if inode is symlink - * @isize: 0 for regular file, EMPTY_DIR_SIZE for dirs, strlen(symname) for - * symlinks - * @inode: inode to be filled - * @security: optional security context to associate with this inode - */ -int reiserfs_new_inode(struct reiserfs_transaction_handle *th, - struct inode *dir, umode_t mode, const char *symname, - /* 0 for regular, EMTRY_DIR_SIZE for dirs, - strlen (symname) for symlinks) */ - loff_t i_size, struct dentry *dentry, - struct inode *inode, - struct reiserfs_security_handle *security) -{ - struct super_block *sb = dir->i_sb; - struct reiserfs_iget_args args; - INITIALIZE_PATH(path_to_key); - struct cpu_key key; - struct item_head ih; - struct stat_data sd; - int retval; - int err; - int depth; - - BUG_ON(!th->t_trans_id); - - depth = reiserfs_write_unlock_nested(sb); - err = dquot_alloc_inode(inode); - reiserfs_write_lock_nested(sb, depth); - if (err) - goto out_end_trans; - if (!dir->i_nlink) { - err = -EPERM; - goto out_bad_inode; - } - - /* item head of new item */ - ih.ih_key.k_dir_id = reiserfs_choose_packing(dir); - ih.ih_key.k_objectid = cpu_to_le32(reiserfs_get_unused_objectid(th)); - if (!ih.ih_key.k_objectid) { - err = -ENOMEM; - goto out_bad_inode; - } - args.objectid = inode->i_ino = le32_to_cpu(ih.ih_key.k_objectid); - if (old_format_only(sb)) - make_le_item_head(&ih, NULL, KEY_FORMAT_3_5, SD_OFFSET, - TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT); - else - make_le_item_head(&ih, NULL, KEY_FORMAT_3_6, SD_OFFSET, - TYPE_STAT_DATA, SD_SIZE, MAX_US_INT); - memcpy(INODE_PKEY(inode), &ih.ih_key, KEY_SIZE); - args.dirid = le32_to_cpu(ih.ih_key.k_dir_id); - - depth = reiserfs_write_unlock_nested(inode->i_sb); - err = insert_inode_locked4(inode, args.objectid, - reiserfs_find_actor, &args); - reiserfs_write_lock_nested(inode->i_sb, depth); - if (err) { - err = -EINVAL; - goto out_bad_inode; - } - - if (old_format_only(sb)) - /* - * not a perfect generation count, as object ids can be reused, - * but this is as good as reiserfs can do right now. - * note that the private part of inode isn't filled in yet, - * we have to use the directory. - */ - inode->i_generation = le32_to_cpu(INODE_PKEY(dir)->k_objectid); - else -#if defined( USE_INODE_GENERATION_COUNTER ) - inode->i_generation = - le32_to_cpu(REISERFS_SB(sb)->s_rs->s_inode_generation); -#else - inode->i_generation = ++event; -#endif - - /* fill stat data */ - set_nlink(inode, (S_ISDIR(mode) ? 2 : 1)); - - /* uid and gid must already be set by the caller for quota init */ - - simple_inode_init_ts(inode); - inode->i_size = i_size; - inode->i_blocks = 0; - inode->i_bytes = 0; - REISERFS_I(inode)->i_first_direct_byte = S_ISLNK(mode) ? 1 : - U32_MAX /*NO_BYTES_IN_DIRECT_ITEM */ ; - - INIT_LIST_HEAD(&REISERFS_I(inode)->i_prealloc_list); - REISERFS_I(inode)->i_flags = 0; - REISERFS_I(inode)->i_prealloc_block = 0; - REISERFS_I(inode)->i_prealloc_count = 0; - REISERFS_I(inode)->i_trans_id = 0; - REISERFS_I(inode)->i_jl = NULL; - REISERFS_I(inode)->i_attrs = - REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK; - sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode); - reiserfs_init_xattr_rwsem(inode); - - /* key to search for correct place for new stat data */ - _make_cpu_key(&key, KEY_FORMAT_3_6, le32_to_cpu(ih.ih_key.k_dir_id), - le32_to_cpu(ih.ih_key.k_objectid), SD_OFFSET, - TYPE_STAT_DATA, 3 /*key length */ ); - - /* find proper place for inserting of stat data */ - retval = search_item(sb, &key, &path_to_key); - if (retval == IO_ERROR) { - err = -EIO; - goto out_bad_inode; - } - if (retval == ITEM_FOUND) { - pathrelse(&path_to_key); - err = -EEXIST; - goto out_bad_inode; - } - if (old_format_only(sb)) { - /* i_uid or i_gid is too big to be stored in stat data v3.5 */ - if (i_uid_read(inode) & ~0xffff || i_gid_read(inode) & ~0xffff) { - pathrelse(&path_to_key); - err = -EINVAL; - goto out_bad_inode; - } - inode2sd_v1(&sd, inode, inode->i_size); - } else { - inode2sd(&sd, inode, inode->i_size); - } - /* - * store in in-core inode the key of stat data and version all - * object items will have (directory items will have old offset - * format, other new objects will consist of new items) - */ - if (old_format_only(sb) || S_ISDIR(mode) || S_ISLNK(mode)) - set_inode_item_key_version(inode, KEY_FORMAT_3_5); - else - set_inode_item_key_version(inode, KEY_FORMAT_3_6); - if (old_format_only(sb)) - set_inode_sd_version(inode, STAT_DATA_V1); - else - set_inode_sd_version(inode, STAT_DATA_V2); - - /* insert the stat data into the tree */ -#ifdef DISPLACE_NEW_PACKING_LOCALITIES - if (REISERFS_I(dir)->new_packing_locality) - th->displace_new_blocks = 1; -#endif - retval = - reiserfs_insert_item(th, &path_to_key, &key, &ih, inode, - (char *)(&sd)); - if (retval) { - err = retval; - reiserfs_check_path(&path_to_key); - goto out_bad_inode; - } -#ifdef DISPLACE_NEW_PACKING_LOCALITIES - if (!th->displace_new_blocks) - REISERFS_I(dir)->new_packing_locality = 0; -#endif - if (S_ISDIR(mode)) { - /* insert item with "." and ".." */ - retval = - reiserfs_new_directory(th, inode, &ih, &path_to_key, dir); - } - - if (S_ISLNK(mode)) { - /* insert body of symlink */ - if (!old_format_only(sb)) - i_size = ROUND_UP(i_size); - retval = - reiserfs_new_symlink(th, inode, &ih, &path_to_key, symname, - i_size); - } - if (retval) { - err = retval; - reiserfs_check_path(&path_to_key); - journal_end(th); - goto out_inserted_sd; - } - - /* - * Mark it private if we're creating the privroot - * or something under it. - */ - if (IS_PRIVATE(dir) || dentry == REISERFS_SB(sb)->priv_root) - reiserfs_init_priv_inode(inode); - - if (reiserfs_posixacl(inode->i_sb)) { - reiserfs_write_unlock(inode->i_sb); - retval = reiserfs_inherit_default_acl(th, dir, dentry, inode); - reiserfs_write_lock(inode->i_sb); - if (retval) { - err = retval; - reiserfs_check_path(&path_to_key); - journal_end(th); - goto out_inserted_sd; - } - } else if (inode->i_sb->s_flags & SB_POSIXACL) { - reiserfs_warning(inode->i_sb, "jdm-13090", - "ACLs aren't enabled in the fs, " - "but vfs thinks they are!"); - } - - if (security->name) { - reiserfs_write_unlock(inode->i_sb); - retval = reiserfs_security_write(th, inode, security); - reiserfs_write_lock(inode->i_sb); - if (retval) { - err = retval; - reiserfs_check_path(&path_to_key); - retval = journal_end(th); - if (retval) - err = retval; - goto out_inserted_sd; - } - } - - reiserfs_update_sd(th, inode); - reiserfs_check_path(&path_to_key); - - return 0; - -out_bad_inode: - /* Invalidate the object, nothing was inserted yet */ - INODE_PKEY(inode)->k_objectid = 0; - - /* Quota change must be inside a transaction for journaling */ - depth = reiserfs_write_unlock_nested(inode->i_sb); - dquot_free_inode(inode); - reiserfs_write_lock_nested(inode->i_sb, depth); - -out_end_trans: - journal_end(th); - /* - * Drop can be outside and it needs more credits so it's better - * to have it outside - */ - depth = reiserfs_write_unlock_nested(inode->i_sb); - dquot_drop(inode); - reiserfs_write_lock_nested(inode->i_sb, depth); - inode->i_flags |= S_NOQUOTA; - make_bad_inode(inode); - -out_inserted_sd: - clear_nlink(inode); - th->t_trans_id = 0; /* so the caller can't use this handle later */ - if (inode->i_state & I_NEW) - unlock_new_inode(inode); - iput(inode); - return err; -} - -/* - * finds the tail page in the page cache, - * reads the last block in. - * - * On success, page_result is set to a locked, pinned page, and bh_result - * is set to an up to date buffer for the last block in the file. returns 0. - * - * tail conversion is not done, so bh_result might not be valid for writing - * check buffer_mapped(bh_result) and bh_result->b_blocknr != 0 before - * trying to write the block. - * - * on failure, nonzero is returned, page_result and bh_result are untouched. - */ -static int grab_tail_page(struct inode *inode, - struct page **page_result, - struct buffer_head **bh_result) -{ - - /* - * we want the page with the last byte in the file, - * not the page that will hold the next byte for appending - */ - unsigned long index = (inode->i_size - 1) >> PAGE_SHIFT; - unsigned long pos = 0; - unsigned long start = 0; - unsigned long blocksize = inode->i_sb->s_blocksize; - unsigned long offset = (inode->i_size) & (PAGE_SIZE - 1); - struct buffer_head *bh; - struct buffer_head *head; - struct folio *folio; - int error; - - /* - * we know that we are only called with inode->i_size > 0. - * we also know that a file tail can never be as big as a block - * If i_size % blocksize == 0, our file is currently block aligned - * and it won't need converting or zeroing after a truncate. - */ - if ((offset & (blocksize - 1)) == 0) { - return -ENOENT; - } - folio = __filemap_get_folio(inode->i_mapping, index, - FGP_LOCK | FGP_ACCESSED | FGP_CREAT, - mapping_gfp_mask(inode->i_mapping)); - if (IS_ERR(folio)) - return PTR_ERR(folio); - /* start within the page of the last block in the file */ - start = (offset / blocksize) * blocksize; - - error = __block_write_begin(folio, start, offset - start, - reiserfs_get_block_create_0); - if (error) - goto unlock; - - head = folio_buffers(folio); - bh = head; - do { - if (pos >= start) { - break; - } - bh = bh->b_this_page; - pos += blocksize; - } while (bh != head); - - if (!buffer_uptodate(bh)) { - /* - * note, this should never happen, prepare_write should be - * taking care of this for us. If the buffer isn't up to - * date, I've screwed up the code to find the buffer, or the - * code to call prepare_write - */ - reiserfs_error(inode->i_sb, "clm-6000", - "error reading block %lu", bh->b_blocknr); - error = -EIO; - goto unlock; - } - *bh_result = bh; - *page_result = &folio->page; - - return error; - -unlock: - folio_unlock(folio); - folio_put(folio); - return error; -} - -/* - * vfs version of truncate file. Must NOT be called with - * a transaction already started. - * - * some code taken from block_truncate_page - */ -int reiserfs_truncate_file(struct inode *inode, int update_timestamps) -{ - struct reiserfs_transaction_handle th; - /* we want the offset for the first byte after the end of the file */ - unsigned long offset = inode->i_size & (PAGE_SIZE - 1); - unsigned blocksize = inode->i_sb->s_blocksize; - unsigned length; - struct page *page = NULL; - int error; - struct buffer_head *bh = NULL; - int err2; - - reiserfs_write_lock(inode->i_sb); - - if (inode->i_size > 0) { - error = grab_tail_page(inode, &page, &bh); - if (error) { - /* - * -ENOENT means we truncated past the end of the - * file, and get_block_create_0 could not find a - * block to read in, which is ok. - */ - if (error != -ENOENT) - reiserfs_error(inode->i_sb, "clm-6001", - "grab_tail_page failed %d", - error); - page = NULL; - bh = NULL; - } - } - - /* - * so, if page != NULL, we have a buffer head for the offset at - * the end of the file. if the bh is mapped, and bh->b_blocknr != 0, - * then we have an unformatted node. Otherwise, we have a direct item, - * and no zeroing is required on disk. We zero after the truncate, - * because the truncate might pack the item anyway - * (it will unmap bh if it packs). - * - * it is enough to reserve space in transaction for 2 balancings: - * one for "save" link adding and another for the first - * cut_from_item. 1 is for update_sd - */ - error = journal_begin(&th, inode->i_sb, - JOURNAL_PER_BALANCE_CNT * 2 + 1); - if (error) - goto out; - reiserfs_update_inode_transaction(inode); - if (update_timestamps) - /* - * we are doing real truncate: if the system crashes - * before the last transaction of truncating gets committed - * - on reboot the file either appears truncated properly - * or not truncated at all - */ - add_save_link(&th, inode, 1); - err2 = reiserfs_do_truncate(&th, inode, page, update_timestamps); - error = journal_end(&th); - if (error) - goto out; - - /* check reiserfs_do_truncate after ending the transaction */ - if (err2) { - error = err2; - goto out; - } - - if (update_timestamps) { - error = remove_save_link(inode, 1 /* truncate */); - if (error) - goto out; - } - - if (page) { - length = offset & (blocksize - 1); - /* if we are not on a block boundary */ - if (length) { - length = blocksize - length; - zero_user(page, offset, length); - if (buffer_mapped(bh) && bh->b_blocknr != 0) { - mark_buffer_dirty(bh); - } - } - unlock_page(page); - put_page(page); - } - - reiserfs_write_unlock(inode->i_sb); - - return 0; -out: - if (page) { - unlock_page(page); - put_page(page); - } - - reiserfs_write_unlock(inode->i_sb); - - return error; -} - -static int map_block_for_writepage(struct inode *inode, - struct buffer_head *bh_result, - unsigned long block) -{ - struct reiserfs_transaction_handle th; - int fs_gen; - struct item_head tmp_ih; - struct item_head *ih; - struct buffer_head *bh; - __le32 *item; - struct cpu_key key; - INITIALIZE_PATH(path); - int pos_in_item; - int jbegin_count = JOURNAL_PER_BALANCE_CNT; - loff_t byte_offset = ((loff_t)block << inode->i_sb->s_blocksize_bits)+1; - int retval; - int use_get_block = 0; - int bytes_copied = 0; - int copy_size; - int trans_running = 0; - - /* - * catch places below that try to log something without - * starting a trans - */ - th.t_trans_id = 0; - - if (!buffer_uptodate(bh_result)) { - return -EIO; - } - - kmap(bh_result->b_page); -start_over: - reiserfs_write_lock(inode->i_sb); - make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3); - -research: - retval = search_for_position_by_key(inode->i_sb, &key, &path); - if (retval != POSITION_FOUND) { - use_get_block = 1; - goto out; - } - - bh = get_last_bh(&path); - ih = tp_item_head(&path); - item = tp_item_body(&path); - pos_in_item = path.pos_in_item; - - /* we've found an unformatted node */ - if (indirect_item_found(retval, ih)) { - if (bytes_copied > 0) { - reiserfs_warning(inode->i_sb, "clm-6002", - "bytes_copied %d", bytes_copied); - } - if (!get_block_num(item, pos_in_item)) { - /* crap, we are writing to a hole */ - use_get_block = 1; - goto out; - } - set_block_dev_mapped(bh_result, - get_block_num(item, pos_in_item), inode); - } else if (is_direct_le_ih(ih)) { - char *p; - p = page_address(bh_result->b_page); - p += (byte_offset - 1) & (PAGE_SIZE - 1); - copy_size = ih_item_len(ih) - pos_in_item; - - fs_gen = get_generation(inode->i_sb); - copy_item_head(&tmp_ih, ih); - - if (!trans_running) { - /* vs-3050 is gone, no need to drop the path */ - retval = journal_begin(&th, inode->i_sb, jbegin_count); - if (retval) - goto out; - reiserfs_update_inode_transaction(inode); - trans_running = 1; - if (fs_changed(fs_gen, inode->i_sb) - && item_moved(&tmp_ih, &path)) { - reiserfs_restore_prepared_buffer(inode->i_sb, - bh); - goto research; - } - } - - reiserfs_prepare_for_journal(inode->i_sb, bh, 1); - - if (fs_changed(fs_gen, inode->i_sb) - && item_moved(&tmp_ih, &path)) { - reiserfs_restore_prepared_buffer(inode->i_sb, bh); - goto research; - } - - memcpy(ih_item_body(bh, ih) + pos_in_item, p + bytes_copied, - copy_size); - - journal_mark_dirty(&th, bh); - bytes_copied += copy_size; - set_block_dev_mapped(bh_result, 0, inode); - - /* are there still bytes left? */ - if (bytes_copied < bh_result->b_size && - (byte_offset + bytes_copied) < inode->i_size) { - set_cpu_key_k_offset(&key, - cpu_key_k_offset(&key) + - copy_size); - goto research; - } - } else { - reiserfs_warning(inode->i_sb, "clm-6003", - "bad item inode %lu", inode->i_ino); - retval = -EIO; - goto out; - } - retval = 0; - -out: - pathrelse(&path); - if (trans_running) { - int err = journal_end(&th); - if (err) - retval = err; - trans_running = 0; - } - reiserfs_write_unlock(inode->i_sb); - - /* this is where we fill in holes in the file. */ - if (use_get_block) { - retval = reiserfs_get_block(inode, block, bh_result, - GET_BLOCK_CREATE | GET_BLOCK_NO_IMUX - | GET_BLOCK_NO_DANGLE); - if (!retval) { - if (!buffer_mapped(bh_result) - || bh_result->b_blocknr == 0) { - /* get_block failed to find a mapped unformatted node. */ - use_get_block = 0; - goto start_over; - } - } - } - kunmap(bh_result->b_page); - - if (!retval && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) { - /* - * we've copied data from the page into the direct item, so the - * buffer in the page is now clean, mark it to reflect that. - */ - lock_buffer(bh_result); - clear_buffer_dirty(bh_result); - unlock_buffer(bh_result); - } - return retval; -} - -/* - * mason@suse.com: updated in 2.5.54 to follow the same general io - * start/recovery path as __block_write_full_folio, along with special - * code to handle reiserfs tails. - */ -static int reiserfs_write_folio(struct folio *folio, - struct writeback_control *wbc, void *data) -{ - struct inode *inode = folio->mapping->host; - unsigned long end_index = inode->i_size >> PAGE_SHIFT; - int error = 0; - unsigned long block; - sector_t last_block; - struct buffer_head *head, *bh; - int partial = 0; - int nr = 0; - int checked = folio_test_checked(folio); - struct reiserfs_transaction_handle th; - struct super_block *s = inode->i_sb; - int bh_per_page = PAGE_SIZE / s->s_blocksize; - th.t_trans_id = 0; - - /* no logging allowed when nonblocking or from PF_MEMALLOC */ - if (checked && (current->flags & PF_MEMALLOC)) { - folio_redirty_for_writepage(wbc, folio); - folio_unlock(folio); - return 0; - } - - /* - * The folio dirty bit is cleared before writepage is called, which - * means we have to tell create_empty_buffers to make dirty buffers - * The folio really should be up to date at this point, so tossing - * in the BH_Uptodate is just a sanity check. - */ - head = folio_buffers(folio); - if (!head) - head = create_empty_buffers(folio, s->s_blocksize, - (1 << BH_Dirty) | (1 << BH_Uptodate)); - - /* - * last folio in the file, zero out any contents past the - * last byte in the file - */ - if (folio->index >= end_index) { - unsigned last_offset; - - last_offset = inode->i_size & (PAGE_SIZE - 1); - /* no file contents in this folio */ - if (folio->index >= end_index + 1 || !last_offset) { - folio_unlock(folio); - return 0; - } - folio_zero_segment(folio, last_offset, folio_size(folio)); - } - bh = head; - block = folio->index << (PAGE_SHIFT - s->s_blocksize_bits); - last_block = (i_size_read(inode) - 1) >> inode->i_blkbits; - /* first map all the buffers, logging any direct items we find */ - do { - if (block > last_block) { - /* - * This can happen when the block size is less than - * the folio size. The corresponding bytes in the folio - * were zero filled above - */ - clear_buffer_dirty(bh); - set_buffer_uptodate(bh); - } else if ((checked || buffer_dirty(bh)) && - (!buffer_mapped(bh) || bh->b_blocknr == 0)) { - /* - * not mapped yet, or it points to a direct item, search - * the btree for the mapping info, and log any direct - * items found - */ - if ((error = map_block_for_writepage(inode, bh, block))) { - goto fail; - } - } - bh = bh->b_this_page; - block++; - } while (bh != head); - - /* - * we start the transaction after map_block_for_writepage, - * because it can create holes in the file (an unbounded operation). - * starting it here, we can make a reliable estimate for how many - * blocks we're going to log - */ - if (checked) { - folio_clear_checked(folio); - reiserfs_write_lock(s); - error = journal_begin(&th, s, bh_per_page + 1); - if (error) { - reiserfs_write_unlock(s); - goto fail; - } - reiserfs_update_inode_transaction(inode); - } - /* now go through and lock any dirty buffers on the folio */ - do { - get_bh(bh); - if (!buffer_mapped(bh)) - continue; - if (buffer_mapped(bh) && bh->b_blocknr == 0) - continue; - - if (checked) { - reiserfs_prepare_for_journal(s, bh, 1); - journal_mark_dirty(&th, bh); - continue; - } - /* - * from this point on, we know the buffer is mapped to a - * real block and not a direct item - */ - if (wbc->sync_mode != WB_SYNC_NONE) { - lock_buffer(bh); - } else { - if (!trylock_buffer(bh)) { - folio_redirty_for_writepage(wbc, folio); - continue; - } - } - if (test_clear_buffer_dirty(bh)) { - mark_buffer_async_write(bh); - } else { - unlock_buffer(bh); - } - } while ((bh = bh->b_this_page) != head); - - if (checked) { - error = journal_end(&th); - reiserfs_write_unlock(s); - if (error) - goto fail; - } - BUG_ON(folio_test_writeback(folio)); - folio_start_writeback(folio); - folio_unlock(folio); - - /* - * since any buffer might be the only dirty buffer on the folio, - * the first submit_bh can bring the folio out of writeback. - * be careful with the buffers. - */ - do { - struct buffer_head *next = bh->b_this_page; - if (buffer_async_write(bh)) { - submit_bh(REQ_OP_WRITE, bh); - nr++; - } - put_bh(bh); - bh = next; - } while (bh != head); - - error = 0; -done: - if (nr == 0) { - /* - * if this folio only had a direct item, it is very possible for - * no io to be required without there being an error. Or, - * someone else could have locked them and sent them down the - * pipe without locking the folio - */ - bh = head; - do { - if (!buffer_uptodate(bh)) { - partial = 1; - break; - } - bh = bh->b_this_page; - } while (bh != head); - if (!partial) - folio_mark_uptodate(folio); - folio_end_writeback(folio); - } - return error; - -fail: - /* - * catches various errors, we need to make sure any valid dirty blocks - * get to the media. The folio is currently locked and not marked for - * writeback - */ - folio_clear_uptodate(folio); - bh = head; - do { - get_bh(bh); - if (buffer_mapped(bh) && buffer_dirty(bh) && bh->b_blocknr) { - lock_buffer(bh); - mark_buffer_async_write(bh); - } else { - /* - * clear any dirty bits that might have come from - * getting attached to a dirty folio - */ - clear_buffer_dirty(bh); - } - bh = bh->b_this_page; - } while (bh != head); - BUG_ON(folio_test_writeback(folio)); - folio_start_writeback(folio); - folio_unlock(folio); - do { - struct buffer_head *next = bh->b_this_page; - if (buffer_async_write(bh)) { - clear_buffer_dirty(bh); - submit_bh(REQ_OP_WRITE, bh); - nr++; - } - put_bh(bh); - bh = next; - } while (bh != head); - goto done; -} - -static int reiserfs_read_folio(struct file *f, struct folio *folio) -{ - return block_read_full_folio(folio, reiserfs_get_block); -} - -static int reiserfs_writepages(struct address_space *mapping, - struct writeback_control *wbc) -{ - reiserfs_wait_on_write_block(mapping->host->i_sb); - return write_cache_pages(mapping, wbc, reiserfs_write_folio, NULL); -} - -static void reiserfs_truncate_failed_write(struct inode *inode) -{ - truncate_inode_pages(inode->i_mapping, inode->i_size); - reiserfs_truncate_file(inode, 0); -} - -static int reiserfs_write_begin(struct file *file, - struct address_space *mapping, - loff_t pos, unsigned len, - struct folio **foliop, void **fsdata) -{ - struct inode *inode; - struct folio *folio; - pgoff_t index; - int ret; - int old_ref = 0; - - inode = mapping->host; - index = pos >> PAGE_SHIFT; - folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN, - mapping_gfp_mask(mapping)); - if (IS_ERR(folio)) - return PTR_ERR(folio); - *foliop = folio; - - reiserfs_wait_on_write_block(inode->i_sb); - fix_tail_page_for_writing(&folio->page); - if (reiserfs_transaction_running(inode->i_sb)) { - struct reiserfs_transaction_handle *th; - th = (struct reiserfs_transaction_handle *)current-> - journal_info; - BUG_ON(!th->t_refcount); - BUG_ON(!th->t_trans_id); - old_ref = th->t_refcount; - th->t_refcount++; - } - ret = __block_write_begin(folio, pos, len, reiserfs_get_block); - if (ret && reiserfs_transaction_running(inode->i_sb)) { - struct reiserfs_transaction_handle *th = current->journal_info; - /* - * this gets a little ugly. If reiserfs_get_block returned an - * error and left a transacstion running, we've got to close - * it, and we've got to free handle if it was a persistent - * transaction. - * - * But, if we had nested into an existing transaction, we need - * to just drop the ref count on the handle. - * - * If old_ref == 0, the transaction is from reiserfs_get_block, - * and it was a persistent trans. Otherwise, it was nested - * above. - */ - if (th->t_refcount > old_ref) { - if (old_ref) - th->t_refcount--; - else { - int err; - reiserfs_write_lock(inode->i_sb); - err = reiserfs_end_persistent_transaction(th); - reiserfs_write_unlock(inode->i_sb); - if (err) - ret = err; - } - } - } - if (ret) { - folio_unlock(folio); - folio_put(folio); - /* Truncate allocated blocks */ - reiserfs_truncate_failed_write(inode); - } - return ret; -} - -int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len) -{ - struct inode *inode = page->mapping->host; - int ret; - int old_ref = 0; - int depth; - - depth = reiserfs_write_unlock_nested(inode->i_sb); - reiserfs_wait_on_write_block(inode->i_sb); - reiserfs_write_lock_nested(inode->i_sb, depth); - - fix_tail_page_for_writing(page); - if (reiserfs_transaction_running(inode->i_sb)) { - struct reiserfs_transaction_handle *th; - th = (struct reiserfs_transaction_handle *)current-> - journal_info; - BUG_ON(!th->t_refcount); - BUG_ON(!th->t_trans_id); - old_ref = th->t_refcount; - th->t_refcount++; - } - - ret = __block_write_begin(page_folio(page), from, len, reiserfs_get_block); - if (ret && reiserfs_transaction_running(inode->i_sb)) { - struct reiserfs_transaction_handle *th = current->journal_info; - /* - * this gets a little ugly. If reiserfs_get_block returned an - * error and left a transacstion running, we've got to close - * it, and we've got to free handle if it was a persistent - * transaction. - * - * But, if we had nested into an existing transaction, we need - * to just drop the ref count on the handle. - * - * If old_ref == 0, the transaction is from reiserfs_get_block, - * and it was a persistent trans. Otherwise, it was nested - * above. - */ - if (th->t_refcount > old_ref) { - if (old_ref) - th->t_refcount--; - else { - int err; - reiserfs_write_lock(inode->i_sb); - err = reiserfs_end_persistent_transaction(th); - reiserfs_write_unlock(inode->i_sb); - if (err) - ret = err; - } - } - } - return ret; - -} - -static sector_t reiserfs_aop_bmap(struct address_space *as, sector_t block) -{ - return generic_block_bmap(as, block, reiserfs_bmap); -} - -static int reiserfs_write_end(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct folio *folio, void *fsdata) -{ - struct inode *inode = folio->mapping->host; - int ret = 0; - int update_sd = 0; - struct reiserfs_transaction_handle *th; - unsigned start; - bool locked = false; - - reiserfs_wait_on_write_block(inode->i_sb); - if (reiserfs_transaction_running(inode->i_sb)) - th = current->journal_info; - else - th = NULL; - - start = pos & (PAGE_SIZE - 1); - if (unlikely(copied < len)) { - if (!folio_test_uptodate(folio)) - copied = 0; - - folio_zero_new_buffers(folio, start + copied, start + len); - } - flush_dcache_folio(folio); - - reiserfs_commit_page(inode, &folio->page, start, start + copied); - - /* - * generic_commit_write does this for us, but does not update the - * transaction tracking stuff when the size changes. So, we have - * to do the i_size updates here. - */ - if (pos + copied > inode->i_size) { - struct reiserfs_transaction_handle myth; - reiserfs_write_lock(inode->i_sb); - locked = true; - /* - * If the file have grown beyond the border where it - * can have a tail, unmark it as needing a tail - * packing - */ - if ((have_large_tails(inode->i_sb) - && inode->i_size > i_block_size(inode) * 4) - || (have_small_tails(inode->i_sb) - && inode->i_size > i_block_size(inode))) - REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; - - ret = journal_begin(&myth, inode->i_sb, 1); - if (ret) - goto journal_error; - - reiserfs_update_inode_transaction(inode); - inode->i_size = pos + copied; - /* - * this will just nest into our transaction. It's important - * to use mark_inode_dirty so the inode gets pushed around on - * the dirty lists, and so that O_SYNC works as expected - */ - mark_inode_dirty(inode); - reiserfs_update_sd(&myth, inode); - update_sd = 1; - ret = journal_end(&myth); - if (ret) - goto journal_error; - } - if (th) { - if (!locked) { - reiserfs_write_lock(inode->i_sb); - locked = true; - } - if (!update_sd) - mark_inode_dirty(inode); - ret = reiserfs_end_persistent_transaction(th); - if (ret) - goto out; - } - -out: - if (locked) - reiserfs_write_unlock(inode->i_sb); - folio_unlock(folio); - folio_put(folio); - - if (pos + len > inode->i_size) - reiserfs_truncate_failed_write(inode); - - return ret == 0 ? copied : ret; - -journal_error: - reiserfs_write_unlock(inode->i_sb); - locked = false; - if (th) { - if (!update_sd) - reiserfs_update_sd(th, inode); - ret = reiserfs_end_persistent_transaction(th); - } - goto out; -} - -int reiserfs_commit_write(struct file *f, struct page *page, - unsigned from, unsigned to) -{ - struct inode *inode = page->mapping->host; - loff_t pos = ((loff_t) page->index << PAGE_SHIFT) + to; - int ret = 0; - int update_sd = 0; - struct reiserfs_transaction_handle *th = NULL; - int depth; - - depth = reiserfs_write_unlock_nested(inode->i_sb); - reiserfs_wait_on_write_block(inode->i_sb); - reiserfs_write_lock_nested(inode->i_sb, depth); - - if (reiserfs_transaction_running(inode->i_sb)) { - th = current->journal_info; - } - reiserfs_commit_page(inode, page, from, to); - - /* - * generic_commit_write does this for us, but does not update the - * transaction tracking stuff when the size changes. So, we have - * to do the i_size updates here. - */ - if (pos > inode->i_size) { - struct reiserfs_transaction_handle myth; - /* - * If the file have grown beyond the border where it - * can have a tail, unmark it as needing a tail - * packing - */ - if ((have_large_tails(inode->i_sb) - && inode->i_size > i_block_size(inode) * 4) - || (have_small_tails(inode->i_sb) - && inode->i_size > i_block_size(inode))) - REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; - - ret = journal_begin(&myth, inode->i_sb, 1); - if (ret) - goto journal_error; - - reiserfs_update_inode_transaction(inode); - inode->i_size = pos; - /* - * this will just nest into our transaction. It's important - * to use mark_inode_dirty so the inode gets pushed around - * on the dirty lists, and so that O_SYNC works as expected - */ - mark_inode_dirty(inode); - reiserfs_update_sd(&myth, inode); - update_sd = 1; - ret = journal_end(&myth); - if (ret) - goto journal_error; - } - if (th) { - if (!update_sd) - mark_inode_dirty(inode); - ret = reiserfs_end_persistent_transaction(th); - if (ret) - goto out; - } - -out: - return ret; - -journal_error: - if (th) { - if (!update_sd) - reiserfs_update_sd(th, inode); - ret = reiserfs_end_persistent_transaction(th); - } - - return ret; -} - -void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode) -{ - if (reiserfs_attrs(inode->i_sb)) { - if (sd_attrs & REISERFS_SYNC_FL) - inode->i_flags |= S_SYNC; - else - inode->i_flags &= ~S_SYNC; - if (sd_attrs & REISERFS_IMMUTABLE_FL) - inode->i_flags |= S_IMMUTABLE; - else - inode->i_flags &= ~S_IMMUTABLE; - if (sd_attrs & REISERFS_APPEND_FL) - inode->i_flags |= S_APPEND; - else - inode->i_flags &= ~S_APPEND; - if (sd_attrs & REISERFS_NOATIME_FL) - inode->i_flags |= S_NOATIME; - else - inode->i_flags &= ~S_NOATIME; - if (sd_attrs & REISERFS_NOTAIL_FL) - REISERFS_I(inode)->i_flags |= i_nopack_mask; - else - REISERFS_I(inode)->i_flags &= ~i_nopack_mask; - } -} - -/* - * decide if this buffer needs to stay around for data logging or ordered - * write purposes - */ -static int invalidate_folio_can_drop(struct inode *inode, struct buffer_head *bh) -{ - int ret = 1; - struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb); - - lock_buffer(bh); - spin_lock(&j->j_dirty_buffers_lock); - if (!buffer_mapped(bh)) { - goto free_jh; - } - /* - * the page is locked, and the only places that log a data buffer - * also lock the page. - */ - if (reiserfs_file_data_log(inode)) { - /* - * very conservative, leave the buffer pinned if - * anyone might need it. - */ - if (buffer_journaled(bh) || buffer_journal_dirty(bh)) { - ret = 0; - } - } else if (buffer_dirty(bh)) { - struct reiserfs_journal_list *jl; - struct reiserfs_jh *jh = bh->b_private; - - /* - * why is this safe? - * reiserfs_setattr updates i_size in the on disk - * stat data before allowing vmtruncate to be called. - * - * If buffer was put onto the ordered list for this - * transaction, we know for sure either this transaction - * or an older one already has updated i_size on disk, - * and this ordered data won't be referenced in the file - * if we crash. - * - * if the buffer was put onto the ordered list for an older - * transaction, we need to leave it around - */ - if (jh && (jl = jh->jl) - && jl != SB_JOURNAL(inode->i_sb)->j_current_jl) - ret = 0; - } -free_jh: - if (ret && bh->b_private) { - reiserfs_free_jh(bh); - } - spin_unlock(&j->j_dirty_buffers_lock); - unlock_buffer(bh); - return ret; -} - -/* clm -- taken from fs/buffer.c:block_invalidate_folio */ -static void reiserfs_invalidate_folio(struct folio *folio, size_t offset, - size_t length) -{ - struct buffer_head *head, *bh, *next; - struct inode *inode = folio->mapping->host; - unsigned int curr_off = 0; - unsigned int stop = offset + length; - int partial_page = (offset || length < folio_size(folio)); - int ret = 1; - - BUG_ON(!folio_test_locked(folio)); - - if (!partial_page) - folio_clear_checked(folio); - - head = folio_buffers(folio); - if (!head) - goto out; - - bh = head; - do { - unsigned int next_off = curr_off + bh->b_size; - next = bh->b_this_page; - - if (next_off > stop) - goto out; - - /* - * is this block fully invalidated? - */ - if (offset <= curr_off) { - if (invalidate_folio_can_drop(inode, bh)) - reiserfs_unmap_buffer(bh); - else - ret = 0; - } - curr_off = next_off; - bh = next; - } while (bh != head); - - /* - * We release buffers only if the entire page is being invalidated. - * The get_block cached value has been unconditionally invalidated, - * so real IO is not possible anymore. - */ - if (!partial_page && ret) { - ret = filemap_release_folio(folio, 0); - /* maybe should BUG_ON(!ret); - neilb */ - } -out: - return; -} - -static bool reiserfs_dirty_folio(struct address_space *mapping, - struct folio *folio) -{ - if (reiserfs_file_data_log(mapping->host)) { - folio_set_checked(folio); - return filemap_dirty_folio(mapping, folio); - } - return block_dirty_folio(mapping, folio); -} - -/* - * Returns true if the folio's buffers were dropped. The folio is locked. - * - * Takes j_dirty_buffers_lock to protect the b_assoc_buffers list_heads - * in the buffers at folio_buffers(folio). - * - * even in -o notail mode, we can't be sure an old mount without -o notail - * didn't create files with tails. - */ -static bool reiserfs_release_folio(struct folio *folio, gfp_t unused_gfp_flags) -{ - struct inode *inode = folio->mapping->host; - struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb); - struct buffer_head *head; - struct buffer_head *bh; - bool ret = true; - - WARN_ON(folio_test_checked(folio)); - spin_lock(&j->j_dirty_buffers_lock); - head = folio_buffers(folio); - bh = head; - do { - if (bh->b_private) { - if (!buffer_dirty(bh) && !buffer_locked(bh)) { - reiserfs_free_jh(bh); - } else { - ret = false; - break; - } - } - bh = bh->b_this_page; - } while (bh != head); - if (ret) - ret = try_to_free_buffers(folio); - spin_unlock(&j->j_dirty_buffers_lock); - return ret; -} - -/* - * We thank Mingming Cao for helping us understand in great detail what - * to do in this section of the code. - */ -static ssize_t reiserfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) -{ - struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; - size_t count = iov_iter_count(iter); - ssize_t ret; - - ret = blockdev_direct_IO(iocb, inode, iter, - reiserfs_get_blocks_direct_io); - - /* - * In case of error extending write may have instantiated a few - * blocks outside i_size. Trim these off again. - */ - if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) { - loff_t isize = i_size_read(inode); - loff_t end = iocb->ki_pos + count; - - if ((end > isize) && inode_newsize_ok(inode, isize) == 0) { - truncate_setsize(inode, isize); - reiserfs_vfs_truncate_file(inode); - } - } - - return ret; -} - -int reiserfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, - struct iattr *attr) -{ - struct inode *inode = d_inode(dentry); - unsigned int ia_valid; - int error; - - error = setattr_prepare(&nop_mnt_idmap, dentry, attr); - if (error) - return error; - - /* must be turned off for recursive notify_change calls */ - ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID); - - if (is_quota_modification(&nop_mnt_idmap, inode, attr)) { - error = dquot_initialize(inode); - if (error) - return error; - } - reiserfs_write_lock(inode->i_sb); - if (attr->ia_valid & ATTR_SIZE) { - /* - * version 2 items will be caught by the s_maxbytes check - * done for us in vmtruncate - */ - if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5 && - attr->ia_size > MAX_NON_LFS) { - reiserfs_write_unlock(inode->i_sb); - error = -EFBIG; - goto out; - } - - inode_dio_wait(inode); - - /* fill in hole pointers in the expanding truncate case. */ - if (attr->ia_size > inode->i_size) { - loff_t pos = attr->ia_size; - - if ((pos & (inode->i_sb->s_blocksize - 1)) == 0) - pos++; - error = generic_cont_expand_simple(inode, pos); - if (REISERFS_I(inode)->i_prealloc_count > 0) { - int err; - struct reiserfs_transaction_handle th; - /* we're changing at most 2 bitmaps, inode + super */ - err = journal_begin(&th, inode->i_sb, 4); - if (!err) { - reiserfs_discard_prealloc(&th, inode); - err = journal_end(&th); - } - if (err) - error = err; - } - if (error) { - reiserfs_write_unlock(inode->i_sb); - goto out; - } - /* - * file size is changed, ctime and mtime are - * to be updated - */ - attr->ia_valid |= (ATTR_MTIME | ATTR_CTIME); - } - } - reiserfs_write_unlock(inode->i_sb); - - if ((((attr->ia_valid & ATTR_UID) && (from_kuid(&init_user_ns, attr->ia_uid) & ~0xffff)) || - ((attr->ia_valid & ATTR_GID) && (from_kgid(&init_user_ns, attr->ia_gid) & ~0xffff))) && - (get_inode_sd_version(inode) == STAT_DATA_V1)) { - /* stat data of format v3.5 has 16 bit uid and gid */ - error = -EINVAL; - goto out; - } - - if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) || - (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) { - struct reiserfs_transaction_handle th; - int jbegin_count = - 2 * - (REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb) + - REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)) + - 2; - - error = reiserfs_chown_xattrs(inode, attr); - - if (error) - return error; - - /* - * (user+group)*(old+new) structure - we count quota - * info and , inode write (sb, inode) - */ - reiserfs_write_lock(inode->i_sb); - error = journal_begin(&th, inode->i_sb, jbegin_count); - reiserfs_write_unlock(inode->i_sb); - if (error) - goto out; - error = dquot_transfer(&nop_mnt_idmap, inode, attr); - reiserfs_write_lock(inode->i_sb); - if (error) { - journal_end(&th); - reiserfs_write_unlock(inode->i_sb); - goto out; - } - - /* - * Update corresponding info in inode so that everything - * is in one transaction - */ - if (attr->ia_valid & ATTR_UID) - inode->i_uid = attr->ia_uid; - if (attr->ia_valid & ATTR_GID) - inode->i_gid = attr->ia_gid; - mark_inode_dirty(inode); - error = journal_end(&th); - reiserfs_write_unlock(inode->i_sb); - if (error) - goto out; - } - - if ((attr->ia_valid & ATTR_SIZE) && - attr->ia_size != i_size_read(inode)) { - error = inode_newsize_ok(inode, attr->ia_size); - if (!error) { - /* - * Could race against reiserfs_file_release - * if called from NFS, so take tailpack mutex. - */ - mutex_lock(&REISERFS_I(inode)->tailpack); - truncate_setsize(inode, attr->ia_size); - reiserfs_truncate_file(inode, 1); - mutex_unlock(&REISERFS_I(inode)->tailpack); - } - } - - if (!error) { - setattr_copy(&nop_mnt_idmap, inode, attr); - mark_inode_dirty(inode); - } - - if (!error && reiserfs_posixacl(inode->i_sb)) { - if (attr->ia_valid & ATTR_MODE) - error = reiserfs_acl_chmod(dentry); - } - -out: - return error; -} - -const struct address_space_operations reiserfs_address_space_operations = { - .writepages = reiserfs_writepages, - .read_folio = reiserfs_read_folio, - .readahead = reiserfs_readahead, - .release_folio = reiserfs_release_folio, - .invalidate_folio = reiserfs_invalidate_folio, - .write_begin = reiserfs_write_begin, - .write_end = reiserfs_write_end, - .bmap = reiserfs_aop_bmap, - .direct_IO = reiserfs_direct_IO, - .dirty_folio = reiserfs_dirty_folio, - .migrate_folio = buffer_migrate_folio, -}; diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c deleted file mode 100644 index dd33f8cc6eda..000000000000 --- a/fs/reiserfs/ioctl.c +++ /dev/null @@ -1,221 +0,0 @@ -/* - * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README - */ - -#include -#include -#include -#include "reiserfs.h" -#include -#include -#include -#include -#include - -int reiserfs_fileattr_get(struct dentry *dentry, struct fileattr *fa) -{ - struct inode *inode = d_inode(dentry); - - if (!reiserfs_attrs(inode->i_sb)) - return -ENOTTY; - - fileattr_fill_flags(fa, REISERFS_I(inode)->i_attrs); - - return 0; -} - -int reiserfs_fileattr_set(struct mnt_idmap *idmap, - struct dentry *dentry, struct fileattr *fa) -{ - struct inode *inode = d_inode(dentry); - unsigned int flags = fa->flags; - int err; - - reiserfs_write_lock(inode->i_sb); - - err = -ENOTTY; - if (!reiserfs_attrs(inode->i_sb)) - goto unlock; - - err = -EOPNOTSUPP; - if (fileattr_has_fsx(fa)) - goto unlock; - - /* - * Is it quota file? Do not allow user to mess with it - */ - err = -EPERM; - if (IS_NOQUOTA(inode)) - goto unlock; - - if ((flags & REISERFS_NOTAIL_FL) && S_ISREG(inode->i_mode)) { - err = reiserfs_unpack(inode); - if (err) - goto unlock; - } - sd_attrs_to_i_attrs(flags, inode); - REISERFS_I(inode)->i_attrs = flags; - inode_set_ctime_current(inode); - mark_inode_dirty(inode); - err = 0; -unlock: - reiserfs_write_unlock(inode->i_sb); - - return err; -} - -/* - * reiserfs_ioctl - handler for ioctl for inode - * supported commands: - * 1) REISERFS_IOC_UNPACK - try to unpack tail from direct item into indirect - * and prevent packing file (argument arg has t - * be non-zero) - * 2) REISERFS_IOC_[GS]ETFLAGS, REISERFS_IOC_[GS]ETVERSION - * 3) That's all for a while ... - */ -long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) -{ - struct inode *inode = file_inode(filp); - int err = 0; - - reiserfs_write_lock(inode->i_sb); - - switch (cmd) { - case REISERFS_IOC_UNPACK: - if (S_ISREG(inode->i_mode)) { - if (arg) - err = reiserfs_unpack(inode); - } else - err = -ENOTTY; - break; - /* - * following two cases are taken from fs/ext2/ioctl.c by Remy - * Card (card@masi.ibp.fr) - */ - case REISERFS_IOC_GETVERSION: - err = put_user(inode->i_generation, (int __user *)arg); - break; - case REISERFS_IOC_SETVERSION: - if (!inode_owner_or_capable(&nop_mnt_idmap, inode)) { - err = -EPERM; - break; - } - err = mnt_want_write_file(filp); - if (err) - break; - if (get_user(inode->i_generation, (int __user *)arg)) { - err = -EFAULT; - goto setversion_out; - } - inode_set_ctime_current(inode); - mark_inode_dirty(inode); -setversion_out: - mnt_drop_write_file(filp); - break; - default: - err = -ENOTTY; - } - - reiserfs_write_unlock(inode->i_sb); - - return err; -} - -#ifdef CONFIG_COMPAT -long reiserfs_compat_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - /* - * These are just misnamed, they actually - * get/put from/to user an int - */ - switch (cmd) { - case REISERFS_IOC32_UNPACK: - cmd = REISERFS_IOC_UNPACK; - break; - case REISERFS_IOC32_GETVERSION: - cmd = REISERFS_IOC_GETVERSION; - break; - case REISERFS_IOC32_SETVERSION: - cmd = REISERFS_IOC_SETVERSION; - break; - default: - return -ENOIOCTLCMD; - } - - return reiserfs_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); -} -#endif - -int reiserfs_commit_write(struct file *f, struct page *page, - unsigned from, unsigned to); -/* - * reiserfs_unpack - * Function try to convert tail from direct item into indirect. - * It set up nopack attribute in the REISERFS_I(inode)->nopack - */ -int reiserfs_unpack(struct inode *inode) -{ - int retval = 0; - int index; - struct page *page; - struct address_space *mapping; - unsigned long write_from; - unsigned long blocksize = inode->i_sb->s_blocksize; - - if (inode->i_size == 0) { - REISERFS_I(inode)->i_flags |= i_nopack_mask; - return 0; - } - /* ioctl already done */ - if (REISERFS_I(inode)->i_flags & i_nopack_mask) { - return 0; - } - - /* we need to make sure nobody is changing the file size beneath us */ - { - int depth = reiserfs_write_unlock_nested(inode->i_sb); - - inode_lock(inode); - reiserfs_write_lock_nested(inode->i_sb, depth); - } - - reiserfs_write_lock(inode->i_sb); - - write_from = inode->i_size & (blocksize - 1); - /* if we are on a block boundary, we are already unpacked. */ - if (write_from == 0) { - REISERFS_I(inode)->i_flags |= i_nopack_mask; - goto out; - } - - /* - * we unpack by finding the page with the tail, and calling - * __reiserfs_write_begin on that page. This will force a - * reiserfs_get_block to unpack the tail for us. - */ - index = inode->i_size >> PAGE_SHIFT; - mapping = inode->i_mapping; - page = grab_cache_page(mapping, index); - retval = -ENOMEM; - if (!page) { - goto out; - } - retval = __reiserfs_write_begin(page, write_from, 0); - if (retval) - goto out_unlock; - - /* conversion can change page contents, must flush */ - flush_dcache_page(page); - retval = reiserfs_commit_write(NULL, page, write_from, write_from); - REISERFS_I(inode)->i_flags |= i_nopack_mask; - -out_unlock: - unlock_page(page); - put_page(page); - -out: - inode_unlock(inode); - reiserfs_write_unlock(inode->i_sb); - return retval; -} diff --git a/fs/reiserfs/item_ops.c b/fs/reiserfs/item_ops.c deleted file mode 100644 index 5011c10287c6..000000000000 --- a/fs/reiserfs/item_ops.c +++ /dev/null @@ -1,737 +0,0 @@ -/* - * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README - */ - -#include -#include "reiserfs.h" - -/* - * this contains item handlers for old item types: sd, direct, - * indirect, directory - */ - -/* - * and where are the comments? how about saying where we can find an - * explanation of each item handler method? -Hans - */ - -/* stat data functions */ -static int sd_bytes_number(struct item_head *ih, int block_size) -{ - return 0; -} - -static void sd_decrement_key(struct cpu_key *key) -{ - key->on_disk_key.k_objectid--; - set_cpu_key_k_type(key, TYPE_ANY); - set_cpu_key_k_offset(key, (loff_t)(~0ULL >> 1)); -} - -static int sd_is_left_mergeable(struct reiserfs_key *key, unsigned long bsize) -{ - return 0; -} - -static void sd_print_item(struct item_head *ih, char *item) -{ - printk("\tmode | size | nlinks | first direct | mtime\n"); - if (stat_data_v1(ih)) { - struct stat_data_v1 *sd = (struct stat_data_v1 *)item; - - printk("\t0%-6o | %6u | %2u | %d | %u\n", sd_v1_mode(sd), - sd_v1_size(sd), sd_v1_nlink(sd), - sd_v1_first_direct_byte(sd), - sd_v1_mtime(sd)); - } else { - struct stat_data *sd = (struct stat_data *)item; - - printk("\t0%-6o | %6llu | %2u | %d | %u\n", sd_v2_mode(sd), - (unsigned long long)sd_v2_size(sd), sd_v2_nlink(sd), - sd_v2_rdev(sd), sd_v2_mtime(sd)); - } -} - -static void sd_check_item(struct item_head *ih, char *item) -{ - /* unused */ -} - -static int sd_create_vi(struct virtual_node *vn, - struct virtual_item *vi, - int is_affected, int insert_size) -{ - vi->vi_index = TYPE_STAT_DATA; - return 0; -} - -static int sd_check_left(struct virtual_item *vi, int free, - int start_skip, int end_skip) -{ - BUG_ON(start_skip || end_skip); - return -1; -} - -static int sd_check_right(struct virtual_item *vi, int free) -{ - return -1; -} - -static int sd_part_size(struct virtual_item *vi, int first, int count) -{ - BUG_ON(count); - return 0; -} - -static int sd_unit_num(struct virtual_item *vi) -{ - return vi->vi_item_len - IH_SIZE; -} - -static void sd_print_vi(struct virtual_item *vi) -{ - reiserfs_warning(NULL, "reiserfs-16100", - "STATDATA, index %d, type 0x%x, %h", - vi->vi_index, vi->vi_type, vi->vi_ih); -} - -static struct item_operations stat_data_ops = { - .bytes_number = sd_bytes_number, - .decrement_key = sd_decrement_key, - .is_left_mergeable = sd_is_left_mergeable, - .print_item = sd_print_item, - .check_item = sd_check_item, - - .create_vi = sd_create_vi, - .check_left = sd_check_left, - .check_right = sd_check_right, - .part_size = sd_part_size, - .unit_num = sd_unit_num, - .print_vi = sd_print_vi -}; - -/* direct item functions */ -static int direct_bytes_number(struct item_head *ih, int block_size) -{ - return ih_item_len(ih); -} - -/* FIXME: this should probably switch to indirect as well */ -static void direct_decrement_key(struct cpu_key *key) -{ - cpu_key_k_offset_dec(key); - if (cpu_key_k_offset(key) == 0) - set_cpu_key_k_type(key, TYPE_STAT_DATA); -} - -static int direct_is_left_mergeable(struct reiserfs_key *key, - unsigned long bsize) -{ - int version = le_key_version(key); - return ((le_key_k_offset(version, key) & (bsize - 1)) != 1); -} - -static void direct_print_item(struct item_head *ih, char *item) -{ - int j = 0; - -/* return; */ - printk("\""); - while (j < ih_item_len(ih)) - printk("%c", item[j++]); - printk("\"\n"); -} - -static void direct_check_item(struct item_head *ih, char *item) -{ - /* unused */ -} - -static int direct_create_vi(struct virtual_node *vn, - struct virtual_item *vi, - int is_affected, int insert_size) -{ - vi->vi_index = TYPE_DIRECT; - return 0; -} - -static int direct_check_left(struct virtual_item *vi, int free, - int start_skip, int end_skip) -{ - int bytes; - - bytes = free - free % 8; - return bytes ? : -1; -} - -static int direct_check_right(struct virtual_item *vi, int free) -{ - return direct_check_left(vi, free, 0, 0); -} - -static int direct_part_size(struct virtual_item *vi, int first, int count) -{ - return count; -} - -static int direct_unit_num(struct virtual_item *vi) -{ - return vi->vi_item_len - IH_SIZE; -} - -static void direct_print_vi(struct virtual_item *vi) -{ - reiserfs_warning(NULL, "reiserfs-16101", - "DIRECT, index %d, type 0x%x, %h", - vi->vi_index, vi->vi_type, vi->vi_ih); -} - -static struct item_operations direct_ops = { - .bytes_number = direct_bytes_number, - .decrement_key = direct_decrement_key, - .is_left_mergeable = direct_is_left_mergeable, - .print_item = direct_print_item, - .check_item = direct_check_item, - - .create_vi = direct_create_vi, - .check_left = direct_check_left, - .check_right = direct_check_right, - .part_size = direct_part_size, - .unit_num = direct_unit_num, - .print_vi = direct_print_vi -}; - -/* indirect item functions */ -static int indirect_bytes_number(struct item_head *ih, int block_size) -{ - return ih_item_len(ih) / UNFM_P_SIZE * block_size; -} - -/* decrease offset, if it becomes 0, change type to stat data */ -static void indirect_decrement_key(struct cpu_key *key) -{ - cpu_key_k_offset_dec(key); - if (cpu_key_k_offset(key) == 0) - set_cpu_key_k_type(key, TYPE_STAT_DATA); -} - -/* if it is not first item of the body, then it is mergeable */ -static int indirect_is_left_mergeable(struct reiserfs_key *key, - unsigned long bsize) -{ - int version = le_key_version(key); - return (le_key_k_offset(version, key) != 1); -} - -/* printing of indirect item */ -static void start_new_sequence(__u32 * start, int *len, __u32 new) -{ - *start = new; - *len = 1; -} - -static int sequence_finished(__u32 start, int *len, __u32 new) -{ - if (start == INT_MAX) - return 1; - - if (start == 0 && new == 0) { - (*len)++; - return 0; - } - if (start != 0 && (start + *len) == new) { - (*len)++; - return 0; - } - return 1; -} - -static void print_sequence(__u32 start, int len) -{ - if (start == INT_MAX) - return; - - if (len == 1) - printk(" %d", start); - else - printk(" %d(%d)", start, len); -} - -static void indirect_print_item(struct item_head *ih, char *item) -{ - int j; - __le32 *unp; - __u32 prev = INT_MAX; - int num = 0; - - unp = (__le32 *) item; - - if (ih_item_len(ih) % UNFM_P_SIZE) - reiserfs_warning(NULL, "reiserfs-16102", "invalid item len"); - - printk("%d pointers\n[ ", (int)I_UNFM_NUM(ih)); - for (j = 0; j < I_UNFM_NUM(ih); j++) { - if (sequence_finished(prev, &num, get_block_num(unp, j))) { - print_sequence(prev, num); - start_new_sequence(&prev, &num, get_block_num(unp, j)); - } - } - print_sequence(prev, num); - printk("]\n"); -} - -static void indirect_check_item(struct item_head *ih, char *item) -{ - /* unused */ -} - -static int indirect_create_vi(struct virtual_node *vn, - struct virtual_item *vi, - int is_affected, int insert_size) -{ - vi->vi_index = TYPE_INDIRECT; - return 0; -} - -static int indirect_check_left(struct virtual_item *vi, int free, - int start_skip, int end_skip) -{ - int bytes; - - bytes = free - free % UNFM_P_SIZE; - return bytes ? : -1; -} - -static int indirect_check_right(struct virtual_item *vi, int free) -{ - return indirect_check_left(vi, free, 0, 0); -} - -/* - * return size in bytes of 'units' units. If first == 0 - calculate - * from the head (left), otherwise - from tail (right) - */ -static int indirect_part_size(struct virtual_item *vi, int first, int units) -{ - /* unit of indirect item is byte (yet) */ - return units; -} - -static int indirect_unit_num(struct virtual_item *vi) -{ - /* unit of indirect item is byte (yet) */ - return vi->vi_item_len - IH_SIZE; -} - -static void indirect_print_vi(struct virtual_item *vi) -{ - reiserfs_warning(NULL, "reiserfs-16103", - "INDIRECT, index %d, type 0x%x, %h", - vi->vi_index, vi->vi_type, vi->vi_ih); -} - -static struct item_operations indirect_ops = { - .bytes_number = indirect_bytes_number, - .decrement_key = indirect_decrement_key, - .is_left_mergeable = indirect_is_left_mergeable, - .print_item = indirect_print_item, - .check_item = indirect_check_item, - - .create_vi = indirect_create_vi, - .check_left = indirect_check_left, - .check_right = indirect_check_right, - .part_size = indirect_part_size, - .unit_num = indirect_unit_num, - .print_vi = indirect_print_vi -}; - -/* direntry functions */ -static int direntry_bytes_number(struct item_head *ih, int block_size) -{ - reiserfs_warning(NULL, "vs-16090", - "bytes number is asked for direntry"); - return 0; -} - -static void direntry_decrement_key(struct cpu_key *key) -{ - cpu_key_k_offset_dec(key); - if (cpu_key_k_offset(key) == 0) - set_cpu_key_k_type(key, TYPE_STAT_DATA); -} - -static int direntry_is_left_mergeable(struct reiserfs_key *key, - unsigned long bsize) -{ - if (le32_to_cpu(key->u.k_offset_v1.k_offset) == DOT_OFFSET) - return 0; - return 1; - -} - -static void direntry_print_item(struct item_head *ih, char *item) -{ - int i; - int namelen; - struct reiserfs_de_head *deh; - char *name; - static char namebuf[80]; - - printk("\n # %-15s%-30s%-15s%-15s%-15s\n", "Name", - "Key of pointed object", "Hash", "Gen number", "Status"); - - deh = (struct reiserfs_de_head *)item; - - for (i = 0; i < ih_entry_count(ih); i++, deh++) { - namelen = - (i ? (deh_location(deh - 1)) : ih_item_len(ih)) - - deh_location(deh); - name = item + deh_location(deh); - if (name[namelen - 1] == 0) - namelen = strlen(name); - - scnprintf(namebuf, sizeof(namebuf), "\"%.*s\"", - (int)sizeof(namebuf)-3, name); - - printk("%d: %-15s%-15d%-15d%-15lld%-15lld(%s)\n", - i, namebuf, - deh_dir_id(deh), deh_objectid(deh), - GET_HASH_VALUE(deh_offset(deh)), - GET_GENERATION_NUMBER((deh_offset(deh))), - (de_hidden(deh)) ? "HIDDEN" : "VISIBLE"); - } -} - -static void direntry_check_item(struct item_head *ih, char *item) -{ - int i; - struct reiserfs_de_head *deh; - - /* unused */ - deh = (struct reiserfs_de_head *)item; - for (i = 0; i < ih_entry_count(ih); i++, deh++) { - ; - } -} - -#define DIRENTRY_VI_FIRST_DIRENTRY_ITEM 1 - -/* - * function returns old entry number in directory item in real node - * using new entry number in virtual item in virtual node - */ -static inline int old_entry_num(int is_affected, int virtual_entry_num, - int pos_in_item, int mode) -{ - if (mode == M_INSERT || mode == M_DELETE) - return virtual_entry_num; - - if (!is_affected) - /* cut or paste is applied to another item */ - return virtual_entry_num; - - if (virtual_entry_num < pos_in_item) - return virtual_entry_num; - - if (mode == M_CUT) - return virtual_entry_num + 1; - - RFALSE(mode != M_PASTE || virtual_entry_num == 0, - "vs-8015: old_entry_num: mode must be M_PASTE (mode = \'%c\'", - mode); - - return virtual_entry_num - 1; -} - -/* - * Create an array of sizes of directory entries for virtual - * item. Return space used by an item. FIXME: no control over - * consuming of space used by this item handler - */ -static int direntry_create_vi(struct virtual_node *vn, - struct virtual_item *vi, - int is_affected, int insert_size) -{ - struct direntry_uarea *dir_u = vi->vi_uarea; - int i, j; - int size = sizeof(struct direntry_uarea); - struct reiserfs_de_head *deh; - - vi->vi_index = TYPE_DIRENTRY; - - BUG_ON(!(vi->vi_ih) || !vi->vi_item); - - dir_u->flags = 0; - if (le_ih_k_offset(vi->vi_ih) == DOT_OFFSET) - dir_u->flags |= DIRENTRY_VI_FIRST_DIRENTRY_ITEM; - - deh = (struct reiserfs_de_head *)(vi->vi_item); - - /* virtual directory item have this amount of entry after */ - dir_u->entry_count = ih_entry_count(vi->vi_ih) + - ((is_affected) ? ((vn->vn_mode == M_CUT) ? -1 : - (vn->vn_mode == M_PASTE ? 1 : 0)) : 0); - - for (i = 0; i < dir_u->entry_count; i++) { - j = old_entry_num(is_affected, i, vn->vn_pos_in_item, - vn->vn_mode); - dir_u->entry_sizes[i] = - (j ? deh_location(&deh[j - 1]) : ih_item_len(vi->vi_ih)) - - deh_location(&deh[j]) + DEH_SIZE; - } - - size += (dir_u->entry_count * sizeof(short)); - - /* set size of pasted entry */ - if (is_affected && vn->vn_mode == M_PASTE) - dir_u->entry_sizes[vn->vn_pos_in_item] = insert_size; - -#ifdef CONFIG_REISERFS_CHECK - /* compare total size of entries with item length */ - { - int k, l; - - l = 0; - for (k = 0; k < dir_u->entry_count; k++) - l += dir_u->entry_sizes[k]; - - if (l + IH_SIZE != vi->vi_item_len + - ((is_affected - && (vn->vn_mode == M_PASTE - || vn->vn_mode == M_CUT)) ? insert_size : 0)) { - reiserfs_panic(NULL, "vs-8025", "(mode==%c, " - "insert_size==%d), invalid length of " - "directory item", - vn->vn_mode, insert_size); - } - } -#endif - - return size; - -} - -/* - * return number of entries which may fit into specified amount of - * free space, or -1 if free space is not enough even for 1 entry - */ -static int direntry_check_left(struct virtual_item *vi, int free, - int start_skip, int end_skip) -{ - int i; - int entries = 0; - struct direntry_uarea *dir_u = vi->vi_uarea; - - for (i = start_skip; i < dir_u->entry_count - end_skip; i++) { - /* i-th entry doesn't fit into the remaining free space */ - if (dir_u->entry_sizes[i] > free) - break; - - free -= dir_u->entry_sizes[i]; - entries++; - } - - if (entries == dir_u->entry_count) { - reiserfs_panic(NULL, "item_ops-1", - "free space %d, entry_count %d", free, - dir_u->entry_count); - } - - /* "." and ".." can not be separated from each other */ - if (start_skip == 0 && (dir_u->flags & DIRENTRY_VI_FIRST_DIRENTRY_ITEM) - && entries < 2) - entries = 0; - - return entries ? : -1; -} - -static int direntry_check_right(struct virtual_item *vi, int free) -{ - int i; - int entries = 0; - struct direntry_uarea *dir_u = vi->vi_uarea; - - for (i = dir_u->entry_count - 1; i >= 0; i--) { - /* i-th entry doesn't fit into the remaining free space */ - if (dir_u->entry_sizes[i] > free) - break; - - free -= dir_u->entry_sizes[i]; - entries++; - } - BUG_ON(entries == dir_u->entry_count); - - /* "." and ".." can not be separated from each other */ - if ((dir_u->flags & DIRENTRY_VI_FIRST_DIRENTRY_ITEM) - && entries > dir_u->entry_count - 2) - entries = dir_u->entry_count - 2; - - return entries ? : -1; -} - -/* sum of entry sizes between from-th and to-th entries including both edges */ -static int direntry_part_size(struct virtual_item *vi, int first, int count) -{ - int i, retval; - int from, to; - struct direntry_uarea *dir_u = vi->vi_uarea; - - retval = 0; - if (first == 0) - from = 0; - else - from = dir_u->entry_count - count; - to = from + count - 1; - - for (i = from; i <= to; i++) - retval += dir_u->entry_sizes[i]; - - return retval; -} - -static int direntry_unit_num(struct virtual_item *vi) -{ - struct direntry_uarea *dir_u = vi->vi_uarea; - - return dir_u->entry_count; -} - -static void direntry_print_vi(struct virtual_item *vi) -{ - int i; - struct direntry_uarea *dir_u = vi->vi_uarea; - - reiserfs_warning(NULL, "reiserfs-16104", - "DIRENTRY, index %d, type 0x%x, %h, flags 0x%x", - vi->vi_index, vi->vi_type, vi->vi_ih, dir_u->flags); - printk("%d entries: ", dir_u->entry_count); - for (i = 0; i < dir_u->entry_count; i++) - printk("%d ", dir_u->entry_sizes[i]); - printk("\n"); -} - -static struct item_operations direntry_ops = { - .bytes_number = direntry_bytes_number, - .decrement_key = direntry_decrement_key, - .is_left_mergeable = direntry_is_left_mergeable, - .print_item = direntry_print_item, - .check_item = direntry_check_item, - - .create_vi = direntry_create_vi, - .check_left = direntry_check_left, - .check_right = direntry_check_right, - .part_size = direntry_part_size, - .unit_num = direntry_unit_num, - .print_vi = direntry_print_vi -}; - -/* Error catching functions to catch errors caused by incorrect item types. */ -static int errcatch_bytes_number(struct item_head *ih, int block_size) -{ - reiserfs_warning(NULL, "green-16001", - "Invalid item type observed, run fsck ASAP"); - return 0; -} - -static void errcatch_decrement_key(struct cpu_key *key) -{ - reiserfs_warning(NULL, "green-16002", - "Invalid item type observed, run fsck ASAP"); -} - -static int errcatch_is_left_mergeable(struct reiserfs_key *key, - unsigned long bsize) -{ - reiserfs_warning(NULL, "green-16003", - "Invalid item type observed, run fsck ASAP"); - return 0; -} - -static void errcatch_print_item(struct item_head *ih, char *item) -{ - reiserfs_warning(NULL, "green-16004", - "Invalid item type observed, run fsck ASAP"); -} - -static void errcatch_check_item(struct item_head *ih, char *item) -{ - reiserfs_warning(NULL, "green-16005", - "Invalid item type observed, run fsck ASAP"); -} - -static int errcatch_create_vi(struct virtual_node *vn, - struct virtual_item *vi, - int is_affected, int insert_size) -{ - reiserfs_warning(NULL, "green-16006", - "Invalid item type observed, run fsck ASAP"); - /* - * We might return -1 here as well, but it won't help as - * create_virtual_node() from where this operation is called - * from is of return type void. - */ - return 0; -} - -static int errcatch_check_left(struct virtual_item *vi, int free, - int start_skip, int end_skip) -{ - reiserfs_warning(NULL, "green-16007", - "Invalid item type observed, run fsck ASAP"); - return -1; -} - -static int errcatch_check_right(struct virtual_item *vi, int free) -{ - reiserfs_warning(NULL, "green-16008", - "Invalid item type observed, run fsck ASAP"); - return -1; -} - -static int errcatch_part_size(struct virtual_item *vi, int first, int count) -{ - reiserfs_warning(NULL, "green-16009", - "Invalid item type observed, run fsck ASAP"); - return 0; -} - -static int errcatch_unit_num(struct virtual_item *vi) -{ - reiserfs_warning(NULL, "green-16010", - "Invalid item type observed, run fsck ASAP"); - return 0; -} - -static void errcatch_print_vi(struct virtual_item *vi) -{ - reiserfs_warning(NULL, "green-16011", - "Invalid item type observed, run fsck ASAP"); -} - -static struct item_operations errcatch_ops = { - .bytes_number = errcatch_bytes_number, - .decrement_key = errcatch_decrement_key, - .is_left_mergeable = errcatch_is_left_mergeable, - .print_item = errcatch_print_item, - .check_item = errcatch_check_item, - - .create_vi = errcatch_create_vi, - .check_left = errcatch_check_left, - .check_right = errcatch_check_right, - .part_size = errcatch_part_size, - .unit_num = errcatch_unit_num, - .print_vi = errcatch_print_vi -}; - -#if ! (TYPE_STAT_DATA == 0 && TYPE_INDIRECT == 1 && TYPE_DIRECT == 2 && TYPE_DIRENTRY == 3) -#error Item types must use disk-format assigned values. -#endif - -struct item_operations *item_ops[TYPE_ANY + 1] = { - &stat_data_ops, - &indirect_ops, - &direct_ops, - &direntry_ops, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - &errcatch_ops /* This is to catch errors with invalid type (15th entry for TYPE_ANY) */ -}; diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c deleted file mode 100644 index e477ee0ff35d..000000000000 --- a/fs/reiserfs/journal.c +++ /dev/null @@ -1,4404 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Write ahead logging implementation copyright Chris Mason 2000 - * - * The background commits make this code very interrelated, and - * overly complex. I need to rethink things a bit....The major players: - * - * journal_begin -- call with the number of blocks you expect to log. - * If the current transaction is too - * old, it will block until the current transaction is - * finished, and then start a new one. - * Usually, your transaction will get joined in with - * previous ones for speed. - * - * journal_join -- same as journal_begin, but won't block on the current - * transaction regardless of age. Don't ever call - * this. Ever. There are only two places it should be - * called from, and they are both inside this file. - * - * journal_mark_dirty -- adds blocks into this transaction. clears any flags - * that might make them get sent to disk - * and then marks them BH_JDirty. Puts the buffer head - * into the current transaction hash. - * - * journal_end -- if the current transaction is batchable, it does nothing - * otherwise, it could do an async/synchronous commit, or - * a full flush of all log and real blocks in the - * transaction. - * - * flush_old_commits -- if the current transaction is too old, it is ended and - * commit blocks are sent to disk. Forces commit blocks - * to disk for all backgrounded commits that have been - * around too long. - * -- Note, if you call this as an immediate flush from - * within kupdate, it will ignore the immediate flag - */ - -#include -#include -#include -#include "reiserfs.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -/* gets a struct reiserfs_journal_list * from a list head */ -#define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \ - j_list)) - -/* must be correct to keep the desc and commit structs at 4k */ -#define JOURNAL_TRANS_HALF 1018 -#define BUFNR 64 /*read ahead */ - -/* cnode stat bits. Move these into reiserfs_fs.h */ - -/* this block was freed, and can't be written. */ -#define BLOCK_FREED 2 -/* this block was freed during this transaction, and can't be written */ -#define BLOCK_FREED_HOLDER 3 - -/* used in flush_journal_list */ -#define BLOCK_NEEDS_FLUSH 4 -#define BLOCK_DIRTIED 5 - -/* journal list state bits */ -#define LIST_TOUCHED 1 -#define LIST_DIRTY 2 -#define LIST_COMMIT_PENDING 4 /* someone will commit this list */ - -/* flags for do_journal_end */ -#define FLUSH_ALL 1 /* flush commit and real blocks */ -#define COMMIT_NOW 2 /* end and commit this transaction */ -#define WAIT 4 /* wait for the log blocks to hit the disk */ - -static int do_journal_end(struct reiserfs_transaction_handle *, int flags); -static int flush_journal_list(struct super_block *s, - struct reiserfs_journal_list *jl, int flushall); -static int flush_commit_list(struct super_block *s, - struct reiserfs_journal_list *jl, int flushall); -static int can_dirty(struct reiserfs_journal_cnode *cn); -static int journal_join(struct reiserfs_transaction_handle *th, - struct super_block *sb); -static void release_journal_dev(struct reiserfs_journal *journal); -static void dirty_one_transaction(struct super_block *s, - struct reiserfs_journal_list *jl); -static void flush_async_commits(struct work_struct *work); -static void queue_log_writer(struct super_block *s); - -/* values for join in do_journal_begin_r */ -enum { - JBEGIN_REG = 0, /* regular journal begin */ - /* join the running transaction if at all possible */ - JBEGIN_JOIN = 1, - /* called from cleanup code, ignores aborted flag */ - JBEGIN_ABORT = 2, -}; - -static int do_journal_begin_r(struct reiserfs_transaction_handle *th, - struct super_block *sb, - unsigned long nblocks, int join); - -static void init_journal_hash(struct super_block *sb) -{ - struct reiserfs_journal *journal = SB_JOURNAL(sb); - memset(journal->j_hash_table, 0, - JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)); -} - -/* - * clears BH_Dirty and sticks the buffer on the clean list. Called because - * I can't allow refile_buffer to make schedule happen after I've freed a - * block. Look at remove_from_transaction and journal_mark_freed for - * more details. - */ -static int reiserfs_clean_and_file_buffer(struct buffer_head *bh) -{ - if (bh) { - clear_buffer_dirty(bh); - clear_buffer_journal_test(bh); - } - return 0; -} - -static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block - *sb) -{ - struct reiserfs_bitmap_node *bn; - static int id; - - bn = kmalloc(sizeof(struct reiserfs_bitmap_node), GFP_NOFS); - if (!bn) { - return NULL; - } - bn->data = kzalloc(sb->s_blocksize, GFP_NOFS); - if (!bn->data) { - kfree(bn); - return NULL; - } - bn->id = id++; - INIT_LIST_HEAD(&bn->list); - return bn; -} - -static struct reiserfs_bitmap_node *get_bitmap_node(struct super_block *sb) -{ - struct reiserfs_journal *journal = SB_JOURNAL(sb); - struct reiserfs_bitmap_node *bn = NULL; - struct list_head *entry = journal->j_bitmap_nodes.next; - - journal->j_used_bitmap_nodes++; -repeat: - - if (entry != &journal->j_bitmap_nodes) { - bn = list_entry(entry, struct reiserfs_bitmap_node, list); - list_del(entry); - memset(bn->data, 0, sb->s_blocksize); - journal->j_free_bitmap_nodes--; - return bn; - } - bn = allocate_bitmap_node(sb); - if (!bn) { - yield(); - goto repeat; - } - return bn; -} -static inline void free_bitmap_node(struct super_block *sb, - struct reiserfs_bitmap_node *bn) -{ - struct reiserfs_journal *journal = SB_JOURNAL(sb); - journal->j_used_bitmap_nodes--; - if (journal->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) { - kfree(bn->data); - kfree(bn); - } else { - list_add(&bn->list, &journal->j_bitmap_nodes); - journal->j_free_bitmap_nodes++; - } -} - -static void allocate_bitmap_nodes(struct super_block *sb) -{ - int i; - struct reiserfs_journal *journal = SB_JOURNAL(sb); - struct reiserfs_bitmap_node *bn = NULL; - for (i = 0; i < REISERFS_MIN_BITMAP_NODES; i++) { - bn = allocate_bitmap_node(sb); - if (bn) { - list_add(&bn->list, &journal->j_bitmap_nodes); - journal->j_free_bitmap_nodes++; - } else { - /* this is ok, we'll try again when more are needed */ - break; - } - } -} - -static int set_bit_in_list_bitmap(struct super_block *sb, - b_blocknr_t block, - struct reiserfs_list_bitmap *jb) -{ - unsigned int bmap_nr = block / (sb->s_blocksize << 3); - unsigned int bit_nr = block % (sb->s_blocksize << 3); - - if (!jb->bitmaps[bmap_nr]) { - jb->bitmaps[bmap_nr] = get_bitmap_node(sb); - } - set_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data); - return 0; -} - -static void cleanup_bitmap_list(struct super_block *sb, - struct reiserfs_list_bitmap *jb) -{ - int i; - if (jb->bitmaps == NULL) - return; - - for (i = 0; i < reiserfs_bmap_count(sb); i++) { - if (jb->bitmaps[i]) { - free_bitmap_node(sb, jb->bitmaps[i]); - jb->bitmaps[i] = NULL; - } - } -} - -/* - * only call this on FS unmount. - */ -static int free_list_bitmaps(struct super_block *sb, - struct reiserfs_list_bitmap *jb_array) -{ - int i; - struct reiserfs_list_bitmap *jb; - for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { - jb = jb_array + i; - jb->journal_list = NULL; - cleanup_bitmap_list(sb, jb); - vfree(jb->bitmaps); - jb->bitmaps = NULL; - } - return 0; -} - -static int free_bitmap_nodes(struct super_block *sb) -{ - struct reiserfs_journal *journal = SB_JOURNAL(sb); - struct list_head *next = journal->j_bitmap_nodes.next; - struct reiserfs_bitmap_node *bn; - - while (next != &journal->j_bitmap_nodes) { - bn = list_entry(next, struct reiserfs_bitmap_node, list); - list_del(next); - kfree(bn->data); - kfree(bn); - next = journal->j_bitmap_nodes.next; - journal->j_free_bitmap_nodes--; - } - - return 0; -} - -/* - * get memory for JOURNAL_NUM_BITMAPS worth of bitmaps. - * jb_array is the array to be filled in. - */ -int reiserfs_allocate_list_bitmaps(struct super_block *sb, - struct reiserfs_list_bitmap *jb_array, - unsigned int bmap_nr) -{ - int i; - int failed = 0; - struct reiserfs_list_bitmap *jb; - int mem = bmap_nr * sizeof(struct reiserfs_bitmap_node *); - - for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { - jb = jb_array + i; - jb->journal_list = NULL; - jb->bitmaps = vzalloc(mem); - if (!jb->bitmaps) { - reiserfs_warning(sb, "clm-2000", "unable to " - "allocate bitmaps for journal lists"); - failed = 1; - break; - } - } - if (failed) { - free_list_bitmaps(sb, jb_array); - return -1; - } - return 0; -} - -/* - * find an available list bitmap. If you can't find one, flush a commit list - * and try again - */ -static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *sb, - struct reiserfs_journal_list - *jl) -{ - int i, j; - struct reiserfs_journal *journal = SB_JOURNAL(sb); - struct reiserfs_list_bitmap *jb = NULL; - - for (j = 0; j < (JOURNAL_NUM_BITMAPS * 3); j++) { - i = journal->j_list_bitmap_index; - journal->j_list_bitmap_index = (i + 1) % JOURNAL_NUM_BITMAPS; - jb = journal->j_list_bitmap + i; - if (journal->j_list_bitmap[i].journal_list) { - flush_commit_list(sb, - journal->j_list_bitmap[i]. - journal_list, 1); - if (!journal->j_list_bitmap[i].journal_list) { - break; - } - } else { - break; - } - } - /* double check to make sure if flushed correctly */ - if (jb->journal_list) - return NULL; - jb->journal_list = jl; - return jb; -} - -/* - * allocates a new chunk of X nodes, and links them all together as a list. - * Uses the cnode->next and cnode->prev pointers - * returns NULL on failure - */ -static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes) -{ - struct reiserfs_journal_cnode *head; - int i; - if (num_cnodes <= 0) { - return NULL; - } - head = vzalloc(array_size(num_cnodes, - sizeof(struct reiserfs_journal_cnode))); - if (!head) { - return NULL; - } - head[0].prev = NULL; - head[0].next = head + 1; - for (i = 1; i < num_cnodes; i++) { - head[i].prev = head + (i - 1); - head[i].next = head + (i + 1); /* if last one, overwrite it after the if */ - } - head[num_cnodes - 1].next = NULL; - return head; -} - -/* pulls a cnode off the free list, or returns NULL on failure */ -static struct reiserfs_journal_cnode *get_cnode(struct super_block *sb) -{ - struct reiserfs_journal_cnode *cn; - struct reiserfs_journal *journal = SB_JOURNAL(sb); - - reiserfs_check_lock_depth(sb, "get_cnode"); - - if (journal->j_cnode_free <= 0) { - return NULL; - } - journal->j_cnode_used++; - journal->j_cnode_free--; - cn = journal->j_cnode_free_list; - if (!cn) { - return cn; - } - if (cn->next) { - cn->next->prev = NULL; - } - journal->j_cnode_free_list = cn->next; - memset(cn, 0, sizeof(struct reiserfs_journal_cnode)); - return cn; -} - -/* - * returns a cnode to the free list - */ -static void free_cnode(struct super_block *sb, - struct reiserfs_journal_cnode *cn) -{ - struct reiserfs_journal *journal = SB_JOURNAL(sb); - - reiserfs_check_lock_depth(sb, "free_cnode"); - - journal->j_cnode_used--; - journal->j_cnode_free++; - /* memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ; */ - cn->next = journal->j_cnode_free_list; - if (journal->j_cnode_free_list) { - journal->j_cnode_free_list->prev = cn; - } - cn->prev = NULL; /* not needed with the memset, but I might kill the memset, and forget to do this */ - journal->j_cnode_free_list = cn; -} - -static void clear_prepared_bits(struct buffer_head *bh) -{ - clear_buffer_journal_prepared(bh); - clear_buffer_journal_restore_dirty(bh); -} - -/* - * return a cnode with same dev, block number and size in table, - * or null if not found - */ -static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct - super_block - *sb, - struct - reiserfs_journal_cnode - **table, - long bl) -{ - struct reiserfs_journal_cnode *cn; - cn = journal_hash(table, sb, bl); - while (cn) { - if (cn->blocknr == bl && cn->sb == sb) - return cn; - cn = cn->hnext; - } - return (struct reiserfs_journal_cnode *)0; -} - -/* - * this actually means 'can this block be reallocated yet?'. If you set - * search_all, a block can only be allocated if it is not in the current - * transaction, was not freed by the current transaction, and has no chance - * of ever being overwritten by a replay after crashing. - * - * If you don't set search_all, a block can only be allocated if it is not - * in the current transaction. Since deleting a block removes it from the - * current transaction, this case should never happen. If you don't set - * search_all, make sure you never write the block without logging it. - * - * next_zero_bit is a suggestion about the next block to try for find_forward. - * when bl is rejected because it is set in a journal list bitmap, we search - * for the next zero bit in the bitmap that rejected bl. Then, we return - * that through next_zero_bit for find_forward to try. - * - * Just because we return something in next_zero_bit does not mean we won't - * reject it on the next call to reiserfs_in_journal - */ -int reiserfs_in_journal(struct super_block *sb, - unsigned int bmap_nr, int bit_nr, int search_all, - b_blocknr_t * next_zero_bit) -{ - struct reiserfs_journal *journal = SB_JOURNAL(sb); - struct reiserfs_list_bitmap *jb; - int i; - unsigned long bl; - - *next_zero_bit = 0; /* always start this at zero. */ - - PROC_INFO_INC(sb, journal.in_journal); - /* - * If we aren't doing a search_all, this is a metablock, and it - * will be logged before use. if we crash before the transaction - * that freed it commits, this transaction won't have committed - * either, and the block will never be written - */ - if (search_all) { - for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { - PROC_INFO_INC(sb, journal.in_journal_bitmap); - jb = journal->j_list_bitmap + i; - if (jb->journal_list && jb->bitmaps[bmap_nr] && - test_bit(bit_nr, - (unsigned long *)jb->bitmaps[bmap_nr]-> - data)) { - *next_zero_bit = - find_next_zero_bit((unsigned long *) - (jb->bitmaps[bmap_nr]-> - data), - sb->s_blocksize << 3, - bit_nr + 1); - return 1; - } - } - } - - bl = bmap_nr * (sb->s_blocksize << 3) + bit_nr; - /* is it in any old transactions? */ - if (search_all - && (get_journal_hash_dev(sb, journal->j_list_hash_table, bl))) { - return 1; - } - - /* is it in the current transaction. This should never happen */ - if ((get_journal_hash_dev(sb, journal->j_hash_table, bl))) { - BUG(); - return 1; - } - - PROC_INFO_INC(sb, journal.in_journal_reusable); - /* safe for reuse */ - return 0; -} - -/* insert cn into table */ -static inline void insert_journal_hash(struct reiserfs_journal_cnode **table, - struct reiserfs_journal_cnode *cn) -{ - struct reiserfs_journal_cnode *cn_orig; - - cn_orig = journal_hash(table, cn->sb, cn->blocknr); - cn->hnext = cn_orig; - cn->hprev = NULL; - if (cn_orig) { - cn_orig->hprev = cn; - } - journal_hash(table, cn->sb, cn->blocknr) = cn; -} - -/* lock the current transaction */ -static inline void lock_journal(struct super_block *sb) -{ - PROC_INFO_INC(sb, journal.lock_journal); - - reiserfs_mutex_lock_safe(&SB_JOURNAL(sb)->j_mutex, sb); -} - -/* unlock the current transaction */ -static inline void unlock_journal(struct super_block *sb) -{ - mutex_unlock(&SB_JOURNAL(sb)->j_mutex); -} - -static inline void get_journal_list(struct reiserfs_journal_list *jl) -{ - jl->j_refcount++; -} - -static inline void put_journal_list(struct super_block *s, - struct reiserfs_journal_list *jl) -{ - if (jl->j_refcount < 1) { - reiserfs_panic(s, "journal-2", "trans id %u, refcount at %d", - jl->j_trans_id, jl->j_refcount); - } - if (--jl->j_refcount == 0) - kfree(jl); -} - -/* - * this used to be much more involved, and I'm keeping it just in case - * things get ugly again. it gets called by flush_commit_list, and - * cleans up any data stored about blocks freed during a transaction. - */ -static void cleanup_freed_for_journal_list(struct super_block *sb, - struct reiserfs_journal_list *jl) -{ - - struct reiserfs_list_bitmap *jb = jl->j_list_bitmap; - if (jb) { - cleanup_bitmap_list(sb, jb); - } - jl->j_list_bitmap->journal_list = NULL; - jl->j_list_bitmap = NULL; -} - -static int journal_list_still_alive(struct super_block *s, - unsigned int trans_id) -{ - struct reiserfs_journal *journal = SB_JOURNAL(s); - struct list_head *entry = &journal->j_journal_list; - struct reiserfs_journal_list *jl; - - if (!list_empty(entry)) { - jl = JOURNAL_LIST_ENTRY(entry->next); - if (jl->j_trans_id <= trans_id) { - return 1; - } - } - return 0; -} - -/* - * If page->mapping was null, we failed to truncate this page for - * some reason. Most likely because it was truncated after being - * logged via data=journal. - * - * This does a check to see if the buffer belongs to one of these - * lost pages before doing the final put_bh. If page->mapping was - * null, it tries to free buffers on the page, which should make the - * final put_page drop the page from the lru. - */ -static void release_buffer_page(struct buffer_head *bh) -{ - struct folio *folio = bh->b_folio; - if (!folio->mapping && folio_trylock(folio)) { - folio_get(folio); - put_bh(bh); - if (!folio->mapping) - try_to_free_buffers(folio); - folio_unlock(folio); - folio_put(folio); - } else { - put_bh(bh); - } -} - -static void reiserfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate) -{ - if (buffer_journaled(bh)) { - reiserfs_warning(NULL, "clm-2084", - "pinned buffer %lu:%pg sent to disk", - bh->b_blocknr, bh->b_bdev); - } - if (uptodate) - set_buffer_uptodate(bh); - else - clear_buffer_uptodate(bh); - - unlock_buffer(bh); - release_buffer_page(bh); -} - -static void reiserfs_end_ordered_io(struct buffer_head *bh, int uptodate) -{ - if (uptodate) - set_buffer_uptodate(bh); - else - clear_buffer_uptodate(bh); - unlock_buffer(bh); - put_bh(bh); -} - -static void submit_logged_buffer(struct buffer_head *bh) -{ - get_bh(bh); - bh->b_end_io = reiserfs_end_buffer_io_sync; - clear_buffer_journal_new(bh); - clear_buffer_dirty(bh); - if (!test_clear_buffer_journal_test(bh)) - BUG(); - if (!buffer_uptodate(bh)) - BUG(); - submit_bh(REQ_OP_WRITE, bh); -} - -static void submit_ordered_buffer(struct buffer_head *bh) -{ - get_bh(bh); - bh->b_end_io = reiserfs_end_ordered_io; - clear_buffer_dirty(bh); - if (!buffer_uptodate(bh)) - BUG(); - submit_bh(REQ_OP_WRITE, bh); -} - -#define CHUNK_SIZE 32 -struct buffer_chunk { - struct buffer_head *bh[CHUNK_SIZE]; - int nr; -}; - -static void write_chunk(struct buffer_chunk *chunk) -{ - int i; - for (i = 0; i < chunk->nr; i++) { - submit_logged_buffer(chunk->bh[i]); - } - chunk->nr = 0; -} - -static void write_ordered_chunk(struct buffer_chunk *chunk) -{ - int i; - for (i = 0; i < chunk->nr; i++) { - submit_ordered_buffer(chunk->bh[i]); - } - chunk->nr = 0; -} - -static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh, - spinlock_t * lock, void (fn) (struct buffer_chunk *)) -{ - int ret = 0; - BUG_ON(chunk->nr >= CHUNK_SIZE); - chunk->bh[chunk->nr++] = bh; - if (chunk->nr >= CHUNK_SIZE) { - ret = 1; - if (lock) { - spin_unlock(lock); - fn(chunk); - spin_lock(lock); - } else { - fn(chunk); - } - } - return ret; -} - -static atomic_t nr_reiserfs_jh = ATOMIC_INIT(0); -static struct reiserfs_jh *alloc_jh(void) -{ - struct reiserfs_jh *jh; - while (1) { - jh = kmalloc(sizeof(*jh), GFP_NOFS); - if (jh) { - atomic_inc(&nr_reiserfs_jh); - return jh; - } - yield(); - } -} - -/* - * we want to free the jh when the buffer has been written - * and waited on - */ -void reiserfs_free_jh(struct buffer_head *bh) -{ - struct reiserfs_jh *jh; - - jh = bh->b_private; - if (jh) { - bh->b_private = NULL; - jh->bh = NULL; - list_del_init(&jh->list); - kfree(jh); - if (atomic_read(&nr_reiserfs_jh) <= 0) - BUG(); - atomic_dec(&nr_reiserfs_jh); - put_bh(bh); - } -} - -static inline int __add_jh(struct reiserfs_journal *j, struct buffer_head *bh, - int tail) -{ - struct reiserfs_jh *jh; - - if (bh->b_private) { - spin_lock(&j->j_dirty_buffers_lock); - if (!bh->b_private) { - spin_unlock(&j->j_dirty_buffers_lock); - goto no_jh; - } - jh = bh->b_private; - list_del_init(&jh->list); - } else { -no_jh: - get_bh(bh); - jh = alloc_jh(); - spin_lock(&j->j_dirty_buffers_lock); - /* - * buffer must be locked for __add_jh, should be able to have - * two adds at the same time - */ - BUG_ON(bh->b_private); - jh->bh = bh; - bh->b_private = jh; - } - jh->jl = j->j_current_jl; - if (tail) - list_add_tail(&jh->list, &jh->jl->j_tail_bh_list); - else { - list_add_tail(&jh->list, &jh->jl->j_bh_list); - } - spin_unlock(&j->j_dirty_buffers_lock); - return 0; -} - -int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh) -{ - return __add_jh(SB_JOURNAL(inode->i_sb), bh, 1); -} -int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh) -{ - return __add_jh(SB_JOURNAL(inode->i_sb), bh, 0); -} - -#define JH_ENTRY(l) list_entry((l), struct reiserfs_jh, list) -static int write_ordered_buffers(spinlock_t * lock, - struct reiserfs_journal *j, - struct reiserfs_journal_list *jl, - struct list_head *list) -{ - struct buffer_head *bh; - struct reiserfs_jh *jh; - int ret = j->j_errno; - struct buffer_chunk chunk; - struct list_head tmp; - INIT_LIST_HEAD(&tmp); - - chunk.nr = 0; - spin_lock(lock); - while (!list_empty(list)) { - jh = JH_ENTRY(list->next); - bh = jh->bh; - get_bh(bh); - if (!trylock_buffer(bh)) { - if (!buffer_dirty(bh)) { - list_move(&jh->list, &tmp); - goto loop_next; - } - spin_unlock(lock); - if (chunk.nr) - write_ordered_chunk(&chunk); - wait_on_buffer(bh); - cond_resched(); - spin_lock(lock); - goto loop_next; - } - /* - * in theory, dirty non-uptodate buffers should never get here, - * but the upper layer io error paths still have a few quirks. - * Handle them here as gracefully as we can - */ - if (!buffer_uptodate(bh) && buffer_dirty(bh)) { - clear_buffer_dirty(bh); - ret = -EIO; - } - if (buffer_dirty(bh)) { - list_move(&jh->list, &tmp); - add_to_chunk(&chunk, bh, lock, write_ordered_chunk); - } else { - reiserfs_free_jh(bh); - unlock_buffer(bh); - } -loop_next: - put_bh(bh); - cond_resched_lock(lock); - } - if (chunk.nr) { - spin_unlock(lock); - write_ordered_chunk(&chunk); - spin_lock(lock); - } - while (!list_empty(&tmp)) { - jh = JH_ENTRY(tmp.prev); - bh = jh->bh; - get_bh(bh); - reiserfs_free_jh(bh); - - if (buffer_locked(bh)) { - spin_unlock(lock); - wait_on_buffer(bh); - spin_lock(lock); - } - if (!buffer_uptodate(bh)) { - ret = -EIO; - } - /* - * ugly interaction with invalidate_folio here. - * reiserfs_invalidate_folio will pin any buffer that has a - * valid journal head from an older transaction. If someone - * else sets our buffer dirty after we write it in the first - * loop, and then someone truncates the page away, nobody - * will ever write the buffer. We're safe if we write the - * page one last time after freeing the journal header. - */ - if (buffer_dirty(bh) && unlikely(bh->b_folio->mapping == NULL)) { - spin_unlock(lock); - write_dirty_buffer(bh, 0); - spin_lock(lock); - } - put_bh(bh); - cond_resched_lock(lock); - } - spin_unlock(lock); - return ret; -} - -static int flush_older_commits(struct super_block *s, - struct reiserfs_journal_list *jl) -{ - struct reiserfs_journal *journal = SB_JOURNAL(s); - struct reiserfs_journal_list *other_jl; - struct reiserfs_journal_list *first_jl; - struct list_head *entry; - unsigned int trans_id = jl->j_trans_id; - unsigned int other_trans_id; - -find_first: - /* - * first we walk backwards to find the oldest uncommitted transation - */ - first_jl = jl; - entry = jl->j_list.prev; - while (1) { - other_jl = JOURNAL_LIST_ENTRY(entry); - if (entry == &journal->j_journal_list || - atomic_read(&other_jl->j_older_commits_done)) - break; - - first_jl = other_jl; - entry = other_jl->j_list.prev; - } - - /* if we didn't find any older uncommitted transactions, return now */ - if (first_jl == jl) { - return 0; - } - - entry = &first_jl->j_list; - while (1) { - other_jl = JOURNAL_LIST_ENTRY(entry); - other_trans_id = other_jl->j_trans_id; - - if (other_trans_id < trans_id) { - if (atomic_read(&other_jl->j_commit_left) != 0) { - flush_commit_list(s, other_jl, 0); - - /* list we were called with is gone, return */ - if (!journal_list_still_alive(s, trans_id)) - return 1; - - /* - * the one we just flushed is gone, this means - * all older lists are also gone, so first_jl - * is no longer valid either. Go back to the - * beginning. - */ - if (!journal_list_still_alive - (s, other_trans_id)) { - goto find_first; - } - } - entry = entry->next; - if (entry == &journal->j_journal_list) - return 0; - } else { - return 0; - } - } - return 0; -} - -static int reiserfs_async_progress_wait(struct super_block *s) -{ - struct reiserfs_journal *j = SB_JOURNAL(s); - - if (atomic_read(&j->j_async_throttle)) { - int depth; - - depth = reiserfs_write_unlock_nested(s); - wait_var_event_timeout(&j->j_async_throttle, - atomic_read(&j->j_async_throttle) == 0, - HZ / 10); - reiserfs_write_lock_nested(s, depth); - } - - return 0; -} - -/* - * if this journal list still has commit blocks unflushed, send them to disk. - * - * log areas must be flushed in order (transaction 2 can't commit before - * transaction 1) Before the commit block can by written, every other log - * block must be safely on disk - */ -static int flush_commit_list(struct super_block *s, - struct reiserfs_journal_list *jl, int flushall) -{ - int i; - b_blocknr_t bn; - struct buffer_head *tbh = NULL; - unsigned int trans_id = jl->j_trans_id; - struct reiserfs_journal *journal = SB_JOURNAL(s); - int retval = 0; - int write_len; - int depth; - - reiserfs_check_lock_depth(s, "flush_commit_list"); - - if (atomic_read(&jl->j_older_commits_done)) { - return 0; - } - - /* - * before we can put our commit blocks on disk, we have to make - * sure everyone older than us is on disk too - */ - BUG_ON(jl->j_len <= 0); - BUG_ON(trans_id == journal->j_trans_id); - - get_journal_list(jl); - if (flushall) { - if (flush_older_commits(s, jl) == 1) { - /* - * list disappeared during flush_older_commits. - * return - */ - goto put_jl; - } - } - - /* make sure nobody is trying to flush this one at the same time */ - reiserfs_mutex_lock_safe(&jl->j_commit_mutex, s); - - if (!journal_list_still_alive(s, trans_id)) { - mutex_unlock(&jl->j_commit_mutex); - goto put_jl; - } - BUG_ON(jl->j_trans_id == 0); - - /* this commit is done, exit */ - if (atomic_read(&jl->j_commit_left) <= 0) { - if (flushall) { - atomic_set(&jl->j_older_commits_done, 1); - } - mutex_unlock(&jl->j_commit_mutex); - goto put_jl; - } - - if (!list_empty(&jl->j_bh_list)) { - int ret; - - /* - * We might sleep in numerous places inside - * write_ordered_buffers. Relax the write lock. - */ - depth = reiserfs_write_unlock_nested(s); - ret = write_ordered_buffers(&journal->j_dirty_buffers_lock, - journal, jl, &jl->j_bh_list); - if (ret < 0 && retval == 0) - retval = ret; - reiserfs_write_lock_nested(s, depth); - } - BUG_ON(!list_empty(&jl->j_bh_list)); - /* - * for the description block and all the log blocks, submit any buffers - * that haven't already reached the disk. Try to write at least 256 - * log blocks. later on, we will only wait on blocks that correspond - * to this transaction, but while we're unplugging we might as well - * get a chunk of data on there. - */ - atomic_inc(&journal->j_async_throttle); - write_len = jl->j_len + 1; - if (write_len < 256) - write_len = 256; - for (i = 0 ; i < write_len ; i++) { - bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start + i) % - SB_ONDISK_JOURNAL_SIZE(s); - tbh = journal_find_get_block(s, bn); - if (tbh) { - if (buffer_dirty(tbh)) { - depth = reiserfs_write_unlock_nested(s); - write_dirty_buffer(tbh, 0); - reiserfs_write_lock_nested(s, depth); - } - put_bh(tbh) ; - } - } - if (atomic_dec_and_test(&journal->j_async_throttle)) - wake_up_var(&journal->j_async_throttle); - - for (i = 0; i < (jl->j_len + 1); i++) { - bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + - (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s); - tbh = journal_find_get_block(s, bn); - - depth = reiserfs_write_unlock_nested(s); - __wait_on_buffer(tbh); - reiserfs_write_lock_nested(s, depth); - /* - * since we're using ll_rw_blk above, it might have skipped - * over a locked buffer. Double check here - */ - /* redundant, sync_dirty_buffer() checks */ - if (buffer_dirty(tbh)) { - depth = reiserfs_write_unlock_nested(s); - sync_dirty_buffer(tbh); - reiserfs_write_lock_nested(s, depth); - } - if (unlikely(!buffer_uptodate(tbh))) { -#ifdef CONFIG_REISERFS_CHECK - reiserfs_warning(s, "journal-601", - "buffer write failed"); -#endif - retval = -EIO; - } - /* once for journal_find_get_block */ - put_bh(tbh); - /* once due to original getblk in do_journal_end */ - put_bh(tbh); - atomic_dec(&jl->j_commit_left); - } - - BUG_ON(atomic_read(&jl->j_commit_left) != 1); - - /* - * If there was a write error in the journal - we can't commit - * this transaction - it will be invalid and, if successful, - * will just end up propagating the write error out to - * the file system. - */ - if (likely(!retval && !reiserfs_is_journal_aborted (journal))) { - if (buffer_dirty(jl->j_commit_bh)) - BUG(); - mark_buffer_dirty(jl->j_commit_bh) ; - depth = reiserfs_write_unlock_nested(s); - if (reiserfs_barrier_flush(s)) - __sync_dirty_buffer(jl->j_commit_bh, - REQ_SYNC | REQ_PREFLUSH | REQ_FUA); - else - sync_dirty_buffer(jl->j_commit_bh); - reiserfs_write_lock_nested(s, depth); - } - - /* - * If there was a write error in the journal - we can't commit this - * transaction - it will be invalid and, if successful, will just end - * up propagating the write error out to the filesystem. - */ - if (unlikely(!buffer_uptodate(jl->j_commit_bh))) { -#ifdef CONFIG_REISERFS_CHECK - reiserfs_warning(s, "journal-615", "buffer write failed"); -#endif - retval = -EIO; - } - bforget(jl->j_commit_bh); - if (journal->j_last_commit_id != 0 && - (jl->j_trans_id - journal->j_last_commit_id) != 1) { - reiserfs_warning(s, "clm-2200", "last commit %lu, current %lu", - journal->j_last_commit_id, jl->j_trans_id); - } - journal->j_last_commit_id = jl->j_trans_id; - - /* - * now, every commit block is on the disk. It is safe to allow - * blocks freed during this transaction to be reallocated - */ - cleanup_freed_for_journal_list(s, jl); - - retval = retval ? retval : journal->j_errno; - - /* mark the metadata dirty */ - if (!retval) - dirty_one_transaction(s, jl); - atomic_dec(&jl->j_commit_left); - - if (flushall) { - atomic_set(&jl->j_older_commits_done, 1); - } - mutex_unlock(&jl->j_commit_mutex); -put_jl: - put_journal_list(s, jl); - - if (retval) - reiserfs_abort(s, retval, "Journal write error in %s", - __func__); - return retval; -} - -/* - * flush_journal_list frequently needs to find a newer transaction for a - * given block. This does that, or returns NULL if it can't find anything - */ -static struct reiserfs_journal_list *find_newer_jl_for_cn(struct - reiserfs_journal_cnode - *cn) -{ - struct super_block *sb = cn->sb; - b_blocknr_t blocknr = cn->blocknr; - - cn = cn->hprev; - while (cn) { - if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist) { - return cn->jlist; - } - cn = cn->hprev; - } - return NULL; -} - -static void remove_journal_hash(struct super_block *, - struct reiserfs_journal_cnode **, - struct reiserfs_journal_list *, unsigned long, - int); - -/* - * once all the real blocks have been flushed, it is safe to remove them - * from the journal list for this transaction. Aside from freeing the - * cnode, this also allows the block to be reallocated for data blocks - * if it had been deleted. - */ -static void remove_all_from_journal_list(struct super_block *sb, - struct reiserfs_journal_list *jl, - int debug) -{ - struct reiserfs_journal *journal = SB_JOURNAL(sb); - struct reiserfs_journal_cnode *cn, *last; - cn = jl->j_realblock; - - /* - * which is better, to lock once around the whole loop, or - * to lock for each call to remove_journal_hash? - */ - while (cn) { - if (cn->blocknr != 0) { - if (debug) { - reiserfs_warning(sb, "reiserfs-2201", - "block %u, bh is %d, state %ld", - cn->blocknr, cn->bh ? 1 : 0, - cn->state); - } - cn->state = 0; - remove_journal_hash(sb, journal->j_list_hash_table, - jl, cn->blocknr, 1); - } - last = cn; - cn = cn->next; - free_cnode(sb, last); - } - jl->j_realblock = NULL; -} - -/* - * if this timestamp is greater than the timestamp we wrote last to the - * header block, write it to the header block. once this is done, I can - * safely say the log area for this transaction won't ever be replayed, - * and I can start releasing blocks in this transaction for reuse as data - * blocks. called by flush_journal_list, before it calls - * remove_all_from_journal_list - */ -static int _update_journal_header_block(struct super_block *sb, - unsigned long offset, - unsigned int trans_id) -{ - struct reiserfs_journal_header *jh; - struct reiserfs_journal *journal = SB_JOURNAL(sb); - int depth; - - if (reiserfs_is_journal_aborted(journal)) - return -EIO; - - if (trans_id >= journal->j_last_flush_trans_id) { - if (buffer_locked((journal->j_header_bh))) { - depth = reiserfs_write_unlock_nested(sb); - __wait_on_buffer(journal->j_header_bh); - reiserfs_write_lock_nested(sb, depth); - if (unlikely(!buffer_uptodate(journal->j_header_bh))) { -#ifdef CONFIG_REISERFS_CHECK - reiserfs_warning(sb, "journal-699", - "buffer write failed"); -#endif - return -EIO; - } - } - journal->j_last_flush_trans_id = trans_id; - journal->j_first_unflushed_offset = offset; - jh = (struct reiserfs_journal_header *)(journal->j_header_bh-> - b_data); - jh->j_last_flush_trans_id = cpu_to_le32(trans_id); - jh->j_first_unflushed_offset = cpu_to_le32(offset); - jh->j_mount_id = cpu_to_le32(journal->j_mount_id); - - set_buffer_dirty(journal->j_header_bh); - depth = reiserfs_write_unlock_nested(sb); - - if (reiserfs_barrier_flush(sb)) - __sync_dirty_buffer(journal->j_header_bh, - REQ_SYNC | REQ_PREFLUSH | REQ_FUA); - else - sync_dirty_buffer(journal->j_header_bh); - - reiserfs_write_lock_nested(sb, depth); - if (!buffer_uptodate(journal->j_header_bh)) { - reiserfs_warning(sb, "journal-837", - "IO error during journal replay"); - return -EIO; - } - } - return 0; -} - -static int update_journal_header_block(struct super_block *sb, - unsigned long offset, - unsigned int trans_id) -{ - return _update_journal_header_block(sb, offset, trans_id); -} - -/* -** flush any and all journal lists older than you are -** can only be called from flush_journal_list -*/ -static int flush_older_journal_lists(struct super_block *sb, - struct reiserfs_journal_list *jl) -{ - struct list_head *entry; - struct reiserfs_journal_list *other_jl; - struct reiserfs_journal *journal = SB_JOURNAL(sb); - unsigned int trans_id = jl->j_trans_id; - - /* - * we know we are the only ones flushing things, no extra race - * protection is required. - */ -restart: - entry = journal->j_journal_list.next; - /* Did we wrap? */ - if (entry == &journal->j_journal_list) - return 0; - other_jl = JOURNAL_LIST_ENTRY(entry); - if (other_jl->j_trans_id < trans_id) { - BUG_ON(other_jl->j_refcount <= 0); - /* do not flush all */ - flush_journal_list(sb, other_jl, 0); - - /* other_jl is now deleted from the list */ - goto restart; - } - return 0; -} - -static void del_from_work_list(struct super_block *s, - struct reiserfs_journal_list *jl) -{ - struct reiserfs_journal *journal = SB_JOURNAL(s); - if (!list_empty(&jl->j_working_list)) { - list_del_init(&jl->j_working_list); - journal->j_num_work_lists--; - } -} - -/* - * flush a journal list, both commit and real blocks - * - * always set flushall to 1, unless you are calling from inside - * flush_journal_list - * - * IMPORTANT. This can only be called while there are no journal writers, - * and the journal is locked. That means it can only be called from - * do_journal_end, or by journal_release - */ -static int flush_journal_list(struct super_block *s, - struct reiserfs_journal_list *jl, int flushall) -{ - struct reiserfs_journal_list *pjl; - struct reiserfs_journal_cnode *cn; - int count; - int was_jwait = 0; - int was_dirty = 0; - struct buffer_head *saved_bh; - unsigned long j_len_saved = jl->j_len; - struct reiserfs_journal *journal = SB_JOURNAL(s); - int err = 0; - int depth; - - BUG_ON(j_len_saved <= 0); - - if (atomic_read(&journal->j_wcount) != 0) { - reiserfs_warning(s, "clm-2048", "called with wcount %d", - atomic_read(&journal->j_wcount)); - } - - /* if flushall == 0, the lock is already held */ - if (flushall) { - reiserfs_mutex_lock_safe(&journal->j_flush_mutex, s); - } else if (mutex_trylock(&journal->j_flush_mutex)) { - BUG(); - } - - count = 0; - if (j_len_saved > journal->j_trans_max) { - reiserfs_panic(s, "journal-715", "length is %lu, trans id %lu", - j_len_saved, jl->j_trans_id); - return 0; - } - - /* if all the work is already done, get out of here */ - if (atomic_read(&jl->j_nonzerolen) <= 0 && - atomic_read(&jl->j_commit_left) <= 0) { - goto flush_older_and_return; - } - - /* - * start by putting the commit list on disk. This will also flush - * the commit lists of any olders transactions - */ - flush_commit_list(s, jl, 1); - - if (!(jl->j_state & LIST_DIRTY) - && !reiserfs_is_journal_aborted(journal)) - BUG(); - - /* are we done now? */ - if (atomic_read(&jl->j_nonzerolen) <= 0 && - atomic_read(&jl->j_commit_left) <= 0) { - goto flush_older_and_return; - } - - /* - * loop through each cnode, see if we need to write it, - * or wait on a more recent transaction, or just ignore it - */ - if (atomic_read(&journal->j_wcount) != 0) { - reiserfs_panic(s, "journal-844", "journal list is flushing, " - "wcount is not 0"); - } - cn = jl->j_realblock; - while (cn) { - was_jwait = 0; - was_dirty = 0; - saved_bh = NULL; - /* blocknr of 0 is no longer in the hash, ignore it */ - if (cn->blocknr == 0) { - goto free_cnode; - } - - /* - * This transaction failed commit. - * Don't write out to the disk - */ - if (!(jl->j_state & LIST_DIRTY)) - goto free_cnode; - - pjl = find_newer_jl_for_cn(cn); - /* - * the order is important here. We check pjl to make sure we - * don't clear BH_JDirty_wait if we aren't the one writing this - * block to disk - */ - if (!pjl && cn->bh) { - saved_bh = cn->bh; - - /* - * we do this to make sure nobody releases the - * buffer while we are working with it - */ - get_bh(saved_bh); - - if (buffer_journal_dirty(saved_bh)) { - BUG_ON(!can_dirty(cn)); - was_jwait = 1; - was_dirty = 1; - } else if (can_dirty(cn)) { - /* - * everything with !pjl && jwait - * should be writable - */ - BUG(); - } - } - - /* - * if someone has this block in a newer transaction, just make - * sure they are committed, and don't try writing it to disk - */ - if (pjl) { - if (atomic_read(&pjl->j_commit_left)) - flush_commit_list(s, pjl, 1); - goto free_cnode; - } - - /* - * bh == NULL when the block got to disk on its own, OR, - * the block got freed in a future transaction - */ - if (saved_bh == NULL) { - goto free_cnode; - } - - /* - * this should never happen. kupdate_one_transaction has - * this list locked while it works, so we should never see a - * buffer here that is not marked JDirty_wait - */ - if ((!was_jwait) && !buffer_locked(saved_bh)) { - reiserfs_warning(s, "journal-813", - "BAD! buffer %llu %cdirty %cjwait, " - "not in a newer transaction", - (unsigned long long)saved_bh-> - b_blocknr, was_dirty ? ' ' : '!', - was_jwait ? ' ' : '!'); - } - if (was_dirty) { - /* - * we inc again because saved_bh gets decremented - * at free_cnode - */ - get_bh(saved_bh); - set_bit(BLOCK_NEEDS_FLUSH, &cn->state); - lock_buffer(saved_bh); - BUG_ON(cn->blocknr != saved_bh->b_blocknr); - if (buffer_dirty(saved_bh)) - submit_logged_buffer(saved_bh); - else - unlock_buffer(saved_bh); - count++; - } else { - reiserfs_warning(s, "clm-2082", - "Unable to flush buffer %llu in %s", - (unsigned long long)saved_bh-> - b_blocknr, __func__); - } -free_cnode: - cn = cn->next; - if (saved_bh) { - /* - * we incremented this to keep others from - * taking the buffer head away - */ - put_bh(saved_bh); - if (atomic_read(&saved_bh->b_count) < 0) { - reiserfs_warning(s, "journal-945", - "saved_bh->b_count < 0"); - } - } - } - if (count > 0) { - cn = jl->j_realblock; - while (cn) { - if (test_bit(BLOCK_NEEDS_FLUSH, &cn->state)) { - if (!cn->bh) { - reiserfs_panic(s, "journal-1011", - "cn->bh is NULL"); - } - - depth = reiserfs_write_unlock_nested(s); - __wait_on_buffer(cn->bh); - reiserfs_write_lock_nested(s, depth); - - if (!cn->bh) { - reiserfs_panic(s, "journal-1012", - "cn->bh is NULL"); - } - if (unlikely(!buffer_uptodate(cn->bh))) { -#ifdef CONFIG_REISERFS_CHECK - reiserfs_warning(s, "journal-949", - "buffer write failed"); -#endif - err = -EIO; - } - /* - * note, we must clear the JDirty_wait bit - * after the up to date check, otherwise we - * race against our flushpage routine - */ - BUG_ON(!test_clear_buffer_journal_dirty - (cn->bh)); - - /* drop one ref for us */ - put_bh(cn->bh); - /* drop one ref for journal_mark_dirty */ - release_buffer_page(cn->bh); - } - cn = cn->next; - } - } - - if (err) - reiserfs_abort(s, -EIO, - "Write error while pushing transaction to disk in %s", - __func__); -flush_older_and_return: - - /* - * before we can update the journal header block, we _must_ flush all - * real blocks from all older transactions to disk. This is because - * once the header block is updated, this transaction will not be - * replayed after a crash - */ - if (flushall) { - flush_older_journal_lists(s, jl); - } - - err = journal->j_errno; - /* - * before we can remove everything from the hash tables for this - * transaction, we must make sure it can never be replayed - * - * since we are only called from do_journal_end, we know for sure there - * are no allocations going on while we are flushing journal lists. So, - * we only need to update the journal header block for the last list - * being flushed - */ - if (!err && flushall) { - err = - update_journal_header_block(s, - (jl->j_start + jl->j_len + - 2) % SB_ONDISK_JOURNAL_SIZE(s), - jl->j_trans_id); - if (err) - reiserfs_abort(s, -EIO, - "Write error while updating journal header in %s", - __func__); - } - remove_all_from_journal_list(s, jl, 0); - list_del_init(&jl->j_list); - journal->j_num_lists--; - del_from_work_list(s, jl); - - if (journal->j_last_flush_id != 0 && - (jl->j_trans_id - journal->j_last_flush_id) != 1) { - reiserfs_warning(s, "clm-2201", "last flush %lu, current %lu", - journal->j_last_flush_id, jl->j_trans_id); - } - journal->j_last_flush_id = jl->j_trans_id; - - /* - * not strictly required since we are freeing the list, but it should - * help find code using dead lists later on - */ - jl->j_len = 0; - atomic_set(&jl->j_nonzerolen, 0); - jl->j_start = 0; - jl->j_realblock = NULL; - jl->j_commit_bh = NULL; - jl->j_trans_id = 0; - jl->j_state = 0; - put_journal_list(s, jl); - if (flushall) - mutex_unlock(&journal->j_flush_mutex); - return err; -} - -static int write_one_transaction(struct super_block *s, - struct reiserfs_journal_list *jl, - struct buffer_chunk *chunk) -{ - struct reiserfs_journal_cnode *cn; - int ret = 0; - - jl->j_state |= LIST_TOUCHED; - del_from_work_list(s, jl); - if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0) { - return 0; - } - - cn = jl->j_realblock; - while (cn) { - /* - * if the blocknr == 0, this has been cleared from the hash, - * skip it - */ - if (cn->blocknr == 0) { - goto next; - } - if (cn->bh && can_dirty(cn) && buffer_dirty(cn->bh)) { - struct buffer_head *tmp_bh; - /* - * we can race against journal_mark_freed when we try - * to lock_buffer(cn->bh), so we have to inc the buffer - * count, and recheck things after locking - */ - tmp_bh = cn->bh; - get_bh(tmp_bh); - lock_buffer(tmp_bh); - if (cn->bh && can_dirty(cn) && buffer_dirty(tmp_bh)) { - if (!buffer_journal_dirty(tmp_bh) || - buffer_journal_prepared(tmp_bh)) - BUG(); - add_to_chunk(chunk, tmp_bh, NULL, write_chunk); - ret++; - } else { - /* note, cn->bh might be null now */ - unlock_buffer(tmp_bh); - } - put_bh(tmp_bh); - } -next: - cn = cn->next; - cond_resched(); - } - return ret; -} - -/* used by flush_commit_list */ -static void dirty_one_transaction(struct super_block *s, - struct reiserfs_journal_list *jl) -{ - struct reiserfs_journal_cnode *cn; - struct reiserfs_journal_list *pjl; - - jl->j_state |= LIST_DIRTY; - cn = jl->j_realblock; - while (cn) { - /* - * look for a more recent transaction that logged this - * buffer. Only the most recent transaction with a buffer in - * it is allowed to send that buffer to disk - */ - pjl = find_newer_jl_for_cn(cn); - if (!pjl && cn->blocknr && cn->bh - && buffer_journal_dirty(cn->bh)) { - BUG_ON(!can_dirty(cn)); - /* - * if the buffer is prepared, it will either be logged - * or restored. If restored, we need to make sure - * it actually gets marked dirty - */ - clear_buffer_journal_new(cn->bh); - if (buffer_journal_prepared(cn->bh)) { - set_buffer_journal_restore_dirty(cn->bh); - } else { - set_buffer_journal_test(cn->bh); - mark_buffer_dirty(cn->bh); - } - } - cn = cn->next; - } -} - -static int kupdate_transactions(struct super_block *s, - struct reiserfs_journal_list *jl, - struct reiserfs_journal_list **next_jl, - unsigned int *next_trans_id, - int num_blocks, int num_trans) -{ - int ret = 0; - int written = 0; - int transactions_flushed = 0; - unsigned int orig_trans_id = jl->j_trans_id; - struct buffer_chunk chunk; - struct list_head *entry; - struct reiserfs_journal *journal = SB_JOURNAL(s); - chunk.nr = 0; - - reiserfs_mutex_lock_safe(&journal->j_flush_mutex, s); - if (!journal_list_still_alive(s, orig_trans_id)) { - goto done; - } - - /* - * we've got j_flush_mutex held, nobody is going to delete any - * of these lists out from underneath us - */ - while ((num_trans && transactions_flushed < num_trans) || - (!num_trans && written < num_blocks)) { - - if (jl->j_len == 0 || (jl->j_state & LIST_TOUCHED) || - atomic_read(&jl->j_commit_left) - || !(jl->j_state & LIST_DIRTY)) { - del_from_work_list(s, jl); - break; - } - ret = write_one_transaction(s, jl, &chunk); - - if (ret < 0) - goto done; - transactions_flushed++; - written += ret; - entry = jl->j_list.next; - - /* did we wrap? */ - if (entry == &journal->j_journal_list) { - break; - } - jl = JOURNAL_LIST_ENTRY(entry); - - /* don't bother with older transactions */ - if (jl->j_trans_id <= orig_trans_id) - break; - } - if (chunk.nr) { - write_chunk(&chunk); - } - -done: - mutex_unlock(&journal->j_flush_mutex); - return ret; -} - -/* - * for o_sync and fsync heavy applications, they tend to use - * all the journa list slots with tiny transactions. These - * trigger lots and lots of calls to update the header block, which - * adds seeks and slows things down. - * - * This function tries to clear out a large chunk of the journal lists - * at once, which makes everything faster since only the newest journal - * list updates the header block - */ -static int flush_used_journal_lists(struct super_block *s, - struct reiserfs_journal_list *jl) -{ - unsigned long len = 0; - unsigned long cur_len; - int i; - int limit = 256; - struct reiserfs_journal_list *tjl; - struct reiserfs_journal_list *flush_jl; - unsigned int trans_id; - struct reiserfs_journal *journal = SB_JOURNAL(s); - - flush_jl = tjl = jl; - - /* in data logging mode, try harder to flush a lot of blocks */ - if (reiserfs_data_log(s)) - limit = 1024; - /* flush for 256 transactions or limit blocks, whichever comes first */ - for (i = 0; i < 256 && len < limit; i++) { - if (atomic_read(&tjl->j_commit_left) || - tjl->j_trans_id < jl->j_trans_id) { - break; - } - cur_len = atomic_read(&tjl->j_nonzerolen); - if (cur_len > 0) { - tjl->j_state &= ~LIST_TOUCHED; - } - len += cur_len; - flush_jl = tjl; - if (tjl->j_list.next == &journal->j_journal_list) - break; - tjl = JOURNAL_LIST_ENTRY(tjl->j_list.next); - } - get_journal_list(jl); - get_journal_list(flush_jl); - - /* - * try to find a group of blocks we can flush across all the - * transactions, but only bother if we've actually spanned - * across multiple lists - */ - if (flush_jl != jl) - kupdate_transactions(s, jl, &tjl, &trans_id, len, i); - - flush_journal_list(s, flush_jl, 1); - put_journal_list(s, flush_jl); - put_journal_list(s, jl); - return 0; -} - -/* - * removes any nodes in table with name block and dev as bh. - * only touchs the hnext and hprev pointers. - */ -static void remove_journal_hash(struct super_block *sb, - struct reiserfs_journal_cnode **table, - struct reiserfs_journal_list *jl, - unsigned long block, int remove_freed) -{ - struct reiserfs_journal_cnode *cur; - struct reiserfs_journal_cnode **head; - - head = &(journal_hash(table, sb, block)); - if (!head) { - return; - } - cur = *head; - while (cur) { - if (cur->blocknr == block && cur->sb == sb - && (jl == NULL || jl == cur->jlist) - && (!test_bit(BLOCK_FREED, &cur->state) || remove_freed)) { - if (cur->hnext) { - cur->hnext->hprev = cur->hprev; - } - if (cur->hprev) { - cur->hprev->hnext = cur->hnext; - } else { - *head = cur->hnext; - } - cur->blocknr = 0; - cur->sb = NULL; - cur->state = 0; - /* - * anybody who clears the cur->bh will also - * dec the nonzerolen - */ - if (cur->bh && cur->jlist) - atomic_dec(&cur->jlist->j_nonzerolen); - cur->bh = NULL; - cur->jlist = NULL; - } - cur = cur->hnext; - } -} - -static void free_journal_ram(struct super_block *sb) -{ - struct reiserfs_journal *journal = SB_JOURNAL(sb); - kfree(journal->j_current_jl); - journal->j_num_lists--; - - vfree(journal->j_cnode_free_orig); - free_list_bitmaps(sb, journal->j_list_bitmap); - free_bitmap_nodes(sb); /* must be after free_list_bitmaps */ - if (journal->j_header_bh) { - brelse(journal->j_header_bh); - } - /* - * j_header_bh is on the journal dev, make sure - * not to release the journal dev until we brelse j_header_bh - */ - release_journal_dev(journal); - vfree(journal); -} - -/* - * call on unmount. Only set error to 1 if you haven't made your way out - * of read_super() yet. Any other caller must keep error at 0. - */ -static int do_journal_release(struct reiserfs_transaction_handle *th, - struct super_block *sb, int error) -{ - struct reiserfs_transaction_handle myth; - struct reiserfs_journal *journal = SB_JOURNAL(sb); - - /* - * we only want to flush out transactions if we were - * called with error == 0 - */ - if (!error && !sb_rdonly(sb)) { - /* end the current trans */ - BUG_ON(!th->t_trans_id); - do_journal_end(th, FLUSH_ALL); - - /* - * make sure something gets logged to force - * our way into the flush code - */ - if (!journal_join(&myth, sb)) { - reiserfs_prepare_for_journal(sb, - SB_BUFFER_WITH_SB(sb), - 1); - journal_mark_dirty(&myth, SB_BUFFER_WITH_SB(sb)); - do_journal_end(&myth, FLUSH_ALL); - } - } - - /* this also catches errors during the do_journal_end above */ - if (!error && reiserfs_is_journal_aborted(journal)) { - memset(&myth, 0, sizeof(myth)); - if (!journal_join_abort(&myth, sb)) { - reiserfs_prepare_for_journal(sb, - SB_BUFFER_WITH_SB(sb), - 1); - journal_mark_dirty(&myth, SB_BUFFER_WITH_SB(sb)); - do_journal_end(&myth, FLUSH_ALL); - } - } - - - /* - * We must release the write lock here because - * the workqueue job (flush_async_commit) needs this lock - */ - reiserfs_write_unlock(sb); - - /* - * Cancel flushing of old commits. Note that neither of these works - * will be requeued because superblock is being shutdown and doesn't - * have SB_ACTIVE set. - */ - reiserfs_cancel_old_flush(sb); - /* wait for all commits to finish */ - cancel_delayed_work_sync(&SB_JOURNAL(sb)->j_work); - - free_journal_ram(sb); - - reiserfs_write_lock(sb); - - return 0; -} - -/* * call on unmount. flush all journal trans, release all alloc'd ram */ -int journal_release(struct reiserfs_transaction_handle *th, - struct super_block *sb) -{ - return do_journal_release(th, sb, 0); -} - -/* only call from an error condition inside reiserfs_read_super! */ -int journal_release_error(struct reiserfs_transaction_handle *th, - struct super_block *sb) -{ - return do_journal_release(th, sb, 1); -} - -/* - * compares description block with commit block. - * returns 1 if they differ, 0 if they are the same - */ -static int journal_compare_desc_commit(struct super_block *sb, - struct reiserfs_journal_desc *desc, - struct reiserfs_journal_commit *commit) -{ - if (get_commit_trans_id(commit) != get_desc_trans_id(desc) || - get_commit_trans_len(commit) != get_desc_trans_len(desc) || - get_commit_trans_len(commit) > SB_JOURNAL(sb)->j_trans_max || - get_commit_trans_len(commit) <= 0) { - return 1; - } - return 0; -} - -/* - * returns 0 if it did not find a description block - * returns -1 if it found a corrupt commit block - * returns 1 if both desc and commit were valid - * NOTE: only called during fs mount - */ -static int journal_transaction_is_valid(struct super_block *sb, - struct buffer_head *d_bh, - unsigned int *oldest_invalid_trans_id, - unsigned long *newest_mount_id) -{ - struct reiserfs_journal_desc *desc; - struct reiserfs_journal_commit *commit; - struct buffer_head *c_bh; - unsigned long offset; - - if (!d_bh) - return 0; - - desc = (struct reiserfs_journal_desc *)d_bh->b_data; - if (get_desc_trans_len(desc) > 0 - && !memcmp(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8)) { - if (oldest_invalid_trans_id && *oldest_invalid_trans_id - && get_desc_trans_id(desc) > *oldest_invalid_trans_id) { - reiserfs_debug(sb, REISERFS_DEBUG_CODE, - "journal-986: transaction " - "is valid returning because trans_id %d is greater than " - "oldest_invalid %lu", - get_desc_trans_id(desc), - *oldest_invalid_trans_id); - return 0; - } - if (newest_mount_id - && *newest_mount_id > get_desc_mount_id(desc)) { - reiserfs_debug(sb, REISERFS_DEBUG_CODE, - "journal-1087: transaction " - "is valid returning because mount_id %d is less than " - "newest_mount_id %lu", - get_desc_mount_id(desc), - *newest_mount_id); - return -1; - } - if (get_desc_trans_len(desc) > SB_JOURNAL(sb)->j_trans_max) { - reiserfs_warning(sb, "journal-2018", - "Bad transaction length %d " - "encountered, ignoring transaction", - get_desc_trans_len(desc)); - return -1; - } - offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb); - - /* - * ok, we have a journal description block, - * let's see if the transaction was valid - */ - c_bh = - journal_bread(sb, - SB_ONDISK_JOURNAL_1st_BLOCK(sb) + - ((offset + get_desc_trans_len(desc) + - 1) % SB_ONDISK_JOURNAL_SIZE(sb))); - if (!c_bh) - return 0; - commit = (struct reiserfs_journal_commit *)c_bh->b_data; - if (journal_compare_desc_commit(sb, desc, commit)) { - reiserfs_debug(sb, REISERFS_DEBUG_CODE, - "journal_transaction_is_valid, commit offset %ld had bad " - "time %d or length %d", - c_bh->b_blocknr - - SB_ONDISK_JOURNAL_1st_BLOCK(sb), - get_commit_trans_id(commit), - get_commit_trans_len(commit)); - brelse(c_bh); - if (oldest_invalid_trans_id) { - *oldest_invalid_trans_id = - get_desc_trans_id(desc); - reiserfs_debug(sb, REISERFS_DEBUG_CODE, - "journal-1004: " - "transaction_is_valid setting oldest invalid trans_id " - "to %d", - get_desc_trans_id(desc)); - } - return -1; - } - brelse(c_bh); - reiserfs_debug(sb, REISERFS_DEBUG_CODE, - "journal-1006: found valid " - "transaction start offset %llu, len %d id %d", - d_bh->b_blocknr - - SB_ONDISK_JOURNAL_1st_BLOCK(sb), - get_desc_trans_len(desc), - get_desc_trans_id(desc)); - return 1; - } else { - return 0; - } -} - -static void brelse_array(struct buffer_head **heads, int num) -{ - int i; - for (i = 0; i < num; i++) { - brelse(heads[i]); - } -} - -/* - * given the start, and values for the oldest acceptable transactions, - * this either reads in a replays a transaction, or returns because the - * transaction is invalid, or too old. - * NOTE: only called during fs mount - */ -static int journal_read_transaction(struct super_block *sb, - unsigned long cur_dblock, - unsigned long oldest_start, - unsigned int oldest_trans_id, - unsigned long newest_mount_id) -{ - struct reiserfs_journal *journal = SB_JOURNAL(sb); - struct reiserfs_journal_desc *desc; - struct reiserfs_journal_commit *commit; - unsigned int trans_id = 0; - struct buffer_head *c_bh; - struct buffer_head *d_bh; - struct buffer_head **log_blocks = NULL; - struct buffer_head **real_blocks = NULL; - unsigned int trans_offset; - int i; - int trans_half; - - d_bh = journal_bread(sb, cur_dblock); - if (!d_bh) - return 1; - desc = (struct reiserfs_journal_desc *)d_bh->b_data; - trans_offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb); - reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1037: " - "journal_read_transaction, offset %llu, len %d mount_id %d", - d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb), - get_desc_trans_len(desc), get_desc_mount_id(desc)); - if (get_desc_trans_id(desc) < oldest_trans_id) { - reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1039: " - "journal_read_trans skipping because %lu is too old", - cur_dblock - - SB_ONDISK_JOURNAL_1st_BLOCK(sb)); - brelse(d_bh); - return 1; - } - if (get_desc_mount_id(desc) != newest_mount_id) { - reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1146: " - "journal_read_trans skipping because %d is != " - "newest_mount_id %lu", get_desc_mount_id(desc), - newest_mount_id); - brelse(d_bh); - return 1; - } - c_bh = journal_bread(sb, SB_ONDISK_JOURNAL_1st_BLOCK(sb) + - ((trans_offset + get_desc_trans_len(desc) + 1) % - SB_ONDISK_JOURNAL_SIZE(sb))); - if (!c_bh) { - brelse(d_bh); - return 1; - } - commit = (struct reiserfs_journal_commit *)c_bh->b_data; - if (journal_compare_desc_commit(sb, desc, commit)) { - reiserfs_debug(sb, REISERFS_DEBUG_CODE, - "journal_read_transaction, " - "commit offset %llu had bad time %d or length %d", - c_bh->b_blocknr - - SB_ONDISK_JOURNAL_1st_BLOCK(sb), - get_commit_trans_id(commit), - get_commit_trans_len(commit)); - brelse(c_bh); - brelse(d_bh); - return 1; - } - - if (bdev_read_only(sb->s_bdev)) { - reiserfs_warning(sb, "clm-2076", - "device is readonly, unable to replay log"); - brelse(c_bh); - brelse(d_bh); - return -EROFS; - } - - trans_id = get_desc_trans_id(desc); - /* - * now we know we've got a good transaction, and it was - * inside the valid time ranges - */ - log_blocks = kmalloc_array(get_desc_trans_len(desc), - sizeof(struct buffer_head *), - GFP_NOFS); - real_blocks = kmalloc_array(get_desc_trans_len(desc), - sizeof(struct buffer_head *), - GFP_NOFS); - if (!log_blocks || !real_blocks) { - brelse(c_bh); - brelse(d_bh); - kfree(log_blocks); - kfree(real_blocks); - reiserfs_warning(sb, "journal-1169", - "kmalloc failed, unable to mount FS"); - return -1; - } - /* get all the buffer heads */ - trans_half = journal_trans_half(sb->s_blocksize); - for (i = 0; i < get_desc_trans_len(desc); i++) { - log_blocks[i] = - journal_getblk(sb, - SB_ONDISK_JOURNAL_1st_BLOCK(sb) + - (trans_offset + 1 + - i) % SB_ONDISK_JOURNAL_SIZE(sb)); - if (i < trans_half) { - real_blocks[i] = - sb_getblk(sb, - le32_to_cpu(desc->j_realblock[i])); - } else { - real_blocks[i] = - sb_getblk(sb, - le32_to_cpu(commit-> - j_realblock[i - trans_half])); - } - if (real_blocks[i]->b_blocknr > SB_BLOCK_COUNT(sb)) { - reiserfs_warning(sb, "journal-1207", - "REPLAY FAILURE fsck required! " - "Block to replay is outside of " - "filesystem"); - goto abort_replay; - } - /* make sure we don't try to replay onto log or reserved area */ - if (is_block_in_log_or_reserved_area - (sb, real_blocks[i]->b_blocknr)) { - reiserfs_warning(sb, "journal-1204", - "REPLAY FAILURE fsck required! " - "Trying to replay onto a log block"); -abort_replay: - brelse_array(log_blocks, i); - brelse_array(real_blocks, i); - brelse(c_bh); - brelse(d_bh); - kfree(log_blocks); - kfree(real_blocks); - return -1; - } - } - /* read in the log blocks, memcpy to the corresponding real block */ - bh_read_batch(get_desc_trans_len(desc), log_blocks); - for (i = 0; i < get_desc_trans_len(desc); i++) { - - wait_on_buffer(log_blocks[i]); - if (!buffer_uptodate(log_blocks[i])) { - reiserfs_warning(sb, "journal-1212", - "REPLAY FAILURE fsck required! " - "buffer write failed"); - brelse_array(log_blocks + i, - get_desc_trans_len(desc) - i); - brelse_array(real_blocks, get_desc_trans_len(desc)); - brelse(c_bh); - brelse(d_bh); - kfree(log_blocks); - kfree(real_blocks); - return -1; - } - memcpy(real_blocks[i]->b_data, log_blocks[i]->b_data, - real_blocks[i]->b_size); - set_buffer_uptodate(real_blocks[i]); - brelse(log_blocks[i]); - } - /* flush out the real blocks */ - for (i = 0; i < get_desc_trans_len(desc); i++) { - set_buffer_dirty(real_blocks[i]); - write_dirty_buffer(real_blocks[i], 0); - } - for (i = 0; i < get_desc_trans_len(desc); i++) { - wait_on_buffer(real_blocks[i]); - if (!buffer_uptodate(real_blocks[i])) { - reiserfs_warning(sb, "journal-1226", - "REPLAY FAILURE, fsck required! " - "buffer write failed"); - brelse_array(real_blocks + i, - get_desc_trans_len(desc) - i); - brelse(c_bh); - brelse(d_bh); - kfree(log_blocks); - kfree(real_blocks); - return -1; - } - brelse(real_blocks[i]); - } - cur_dblock = - SB_ONDISK_JOURNAL_1st_BLOCK(sb) + - ((trans_offset + get_desc_trans_len(desc) + - 2) % SB_ONDISK_JOURNAL_SIZE(sb)); - reiserfs_debug(sb, REISERFS_DEBUG_CODE, - "journal-1095: setting journal " "start to offset %ld", - cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb)); - - /* - * init starting values for the first transaction, in case - * this is the last transaction to be replayed. - */ - journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb); - journal->j_last_flush_trans_id = trans_id; - journal->j_trans_id = trans_id + 1; - /* check for trans_id overflow */ - if (journal->j_trans_id == 0) - journal->j_trans_id = 10; - brelse(c_bh); - brelse(d_bh); - kfree(log_blocks); - kfree(real_blocks); - return 0; -} - -/* - * This function reads blocks starting from block and to max_block of bufsize - * size (but no more than BUFNR blocks at a time). This proved to improve - * mounting speed on self-rebuilding raid5 arrays at least. - * Right now it is only used from journal code. But later we might use it - * from other places. - * Note: Do not use journal_getblk/sb_getblk functions here! - */ -static struct buffer_head *reiserfs_breada(struct block_device *dev, - b_blocknr_t block, int bufsize, - b_blocknr_t max_block) -{ - struct buffer_head *bhlist[BUFNR]; - unsigned int blocks = BUFNR; - struct buffer_head *bh; - int i, j; - - bh = __getblk(dev, block, bufsize); - if (!bh || buffer_uptodate(bh)) - return (bh); - - if (block + BUFNR > max_block) { - blocks = max_block - block; - } - bhlist[0] = bh; - j = 1; - for (i = 1; i < blocks; i++) { - bh = __getblk(dev, block + i, bufsize); - if (!bh) - break; - if (buffer_uptodate(bh)) { - brelse(bh); - break; - } else - bhlist[j++] = bh; - } - bh = bhlist[0]; - bh_read_nowait(bh, 0); - bh_readahead_batch(j - 1, &bhlist[1], 0); - for (i = 1; i < j; i++) - brelse(bhlist[i]); - wait_on_buffer(bh); - if (buffer_uptodate(bh)) - return bh; - brelse(bh); - return NULL; -} - -/* - * read and replay the log - * on a clean unmount, the journal header's next unflushed pointer will be - * to an invalid transaction. This tests that before finding all the - * transactions in the log, which makes normal mount times fast. - * - * After a crash, this starts with the next unflushed transaction, and - * replays until it finds one too old, or invalid. - * - * On exit, it sets things up so the first transaction will work correctly. - * NOTE: only called during fs mount - */ -static int journal_read(struct super_block *sb) -{ - struct reiserfs_journal *journal = SB_JOURNAL(sb); - struct reiserfs_journal_desc *desc; - unsigned int oldest_trans_id = 0; - unsigned int oldest_invalid_trans_id = 0; - time64_t start; - unsigned long oldest_start = 0; - unsigned long cur_dblock = 0; - unsigned long newest_mount_id = 9; - struct buffer_head *d_bh; - struct reiserfs_journal_header *jh; - int valid_journal_header = 0; - int replay_count = 0; - int continue_replay = 1; - int ret; - - cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(sb); - reiserfs_info(sb, "checking transaction log (%pg)\n", - file_bdev(journal->j_bdev_file)); - start = ktime_get_seconds(); - - /* - * step 1, read in the journal header block. Check the transaction - * it says is the first unflushed, and if that transaction is not - * valid, replay is done - */ - journal->j_header_bh = journal_bread(sb, - SB_ONDISK_JOURNAL_1st_BLOCK(sb) - + SB_ONDISK_JOURNAL_SIZE(sb)); - if (!journal->j_header_bh) { - return 1; - } - jh = (struct reiserfs_journal_header *)(journal->j_header_bh->b_data); - if (le32_to_cpu(jh->j_first_unflushed_offset) < - SB_ONDISK_JOURNAL_SIZE(sb) - && le32_to_cpu(jh->j_last_flush_trans_id) > 0) { - oldest_start = - SB_ONDISK_JOURNAL_1st_BLOCK(sb) + - le32_to_cpu(jh->j_first_unflushed_offset); - oldest_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) + 1; - newest_mount_id = le32_to_cpu(jh->j_mount_id); - reiserfs_debug(sb, REISERFS_DEBUG_CODE, - "journal-1153: found in " - "header: first_unflushed_offset %d, last_flushed_trans_id " - "%lu", le32_to_cpu(jh->j_first_unflushed_offset), - le32_to_cpu(jh->j_last_flush_trans_id)); - valid_journal_header = 1; - - /* - * now, we try to read the first unflushed offset. If it - * is not valid, there is nothing more we can do, and it - * makes no sense to read through the whole log. - */ - d_bh = - journal_bread(sb, - SB_ONDISK_JOURNAL_1st_BLOCK(sb) + - le32_to_cpu(jh->j_first_unflushed_offset)); - ret = journal_transaction_is_valid(sb, d_bh, NULL, NULL); - if (!ret) { - continue_replay = 0; - } - brelse(d_bh); - goto start_log_replay; - } - - /* - * ok, there are transactions that need to be replayed. start - * with the first log block, find all the valid transactions, and - * pick out the oldest. - */ - while (continue_replay - && cur_dblock < - (SB_ONDISK_JOURNAL_1st_BLOCK(sb) + - SB_ONDISK_JOURNAL_SIZE(sb))) { - /* - * Note that it is required for blocksize of primary fs - * device and journal device to be the same - */ - d_bh = - reiserfs_breada(file_bdev(journal->j_bdev_file), cur_dblock, - sb->s_blocksize, - SB_ONDISK_JOURNAL_1st_BLOCK(sb) + - SB_ONDISK_JOURNAL_SIZE(sb)); - ret = - journal_transaction_is_valid(sb, d_bh, - &oldest_invalid_trans_id, - &newest_mount_id); - if (ret == 1) { - desc = (struct reiserfs_journal_desc *)d_bh->b_data; - if (oldest_start == 0) { /* init all oldest_ values */ - oldest_trans_id = get_desc_trans_id(desc); - oldest_start = d_bh->b_blocknr; - newest_mount_id = get_desc_mount_id(desc); - reiserfs_debug(sb, REISERFS_DEBUG_CODE, - "journal-1179: Setting " - "oldest_start to offset %llu, trans_id %lu", - oldest_start - - SB_ONDISK_JOURNAL_1st_BLOCK - (sb), oldest_trans_id); - } else if (oldest_trans_id > get_desc_trans_id(desc)) { - /* one we just read was older */ - oldest_trans_id = get_desc_trans_id(desc); - oldest_start = d_bh->b_blocknr; - reiserfs_debug(sb, REISERFS_DEBUG_CODE, - "journal-1180: Resetting " - "oldest_start to offset %lu, trans_id %lu", - oldest_start - - SB_ONDISK_JOURNAL_1st_BLOCK - (sb), oldest_trans_id); - } - if (newest_mount_id < get_desc_mount_id(desc)) { - newest_mount_id = get_desc_mount_id(desc); - reiserfs_debug(sb, REISERFS_DEBUG_CODE, - "journal-1299: Setting " - "newest_mount_id to %d", - get_desc_mount_id(desc)); - } - cur_dblock += get_desc_trans_len(desc) + 2; - } else { - cur_dblock++; - } - brelse(d_bh); - } - -start_log_replay: - cur_dblock = oldest_start; - if (oldest_trans_id) { - reiserfs_debug(sb, REISERFS_DEBUG_CODE, - "journal-1206: Starting replay " - "from offset %llu, trans_id %lu", - cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb), - oldest_trans_id); - - } - replay_count = 0; - while (continue_replay && oldest_trans_id > 0) { - ret = - journal_read_transaction(sb, cur_dblock, oldest_start, - oldest_trans_id, newest_mount_id); - if (ret < 0) { - return ret; - } else if (ret != 0) { - break; - } - cur_dblock = - SB_ONDISK_JOURNAL_1st_BLOCK(sb) + journal->j_start; - replay_count++; - if (cur_dblock == oldest_start) - break; - } - - if (oldest_trans_id == 0) { - reiserfs_debug(sb, REISERFS_DEBUG_CODE, - "journal-1225: No valid " "transactions found"); - } - /* - * j_start does not get set correctly if we don't replay any - * transactions. if we had a valid journal_header, set j_start - * to the first unflushed transaction value, copy the trans_id - * from the header - */ - if (valid_journal_header && replay_count == 0) { - journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset); - journal->j_trans_id = - le32_to_cpu(jh->j_last_flush_trans_id) + 1; - /* check for trans_id overflow */ - if (journal->j_trans_id == 0) - journal->j_trans_id = 10; - journal->j_last_flush_trans_id = - le32_to_cpu(jh->j_last_flush_trans_id); - journal->j_mount_id = le32_to_cpu(jh->j_mount_id) + 1; - } else { - journal->j_mount_id = newest_mount_id + 1; - } - reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1299: Setting " - "newest_mount_id to %lu", journal->j_mount_id); - journal->j_first_unflushed_offset = journal->j_start; - if (replay_count > 0) { - reiserfs_info(sb, - "replayed %d transactions in %lu seconds\n", - replay_count, ktime_get_seconds() - start); - } - /* needed to satisfy the locking in _update_journal_header_block */ - reiserfs_write_lock(sb); - if (!bdev_read_only(sb->s_bdev) && - _update_journal_header_block(sb, journal->j_start, - journal->j_last_flush_trans_id)) { - reiserfs_write_unlock(sb); - /* - * replay failed, caller must call free_journal_ram and abort - * the mount - */ - return -1; - } - reiserfs_write_unlock(sb); - return 0; -} - -static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s) -{ - struct reiserfs_journal_list *jl; - jl = kzalloc(sizeof(struct reiserfs_journal_list), - GFP_NOFS | __GFP_NOFAIL); - INIT_LIST_HEAD(&jl->j_list); - INIT_LIST_HEAD(&jl->j_working_list); - INIT_LIST_HEAD(&jl->j_tail_bh_list); - INIT_LIST_HEAD(&jl->j_bh_list); - mutex_init(&jl->j_commit_mutex); - SB_JOURNAL(s)->j_num_lists++; - get_journal_list(jl); - return jl; -} - -static void journal_list_init(struct super_block *sb) -{ - SB_JOURNAL(sb)->j_current_jl = alloc_journal_list(sb); -} - -static void release_journal_dev(struct reiserfs_journal *journal) -{ - if (journal->j_bdev_file) { - bdev_fput(journal->j_bdev_file); - journal->j_bdev_file = NULL; - } -} - -static int journal_init_dev(struct super_block *super, - struct reiserfs_journal *journal, - const char *jdev_name) -{ - blk_mode_t blkdev_mode = BLK_OPEN_READ; - void *holder = journal; - int result; - dev_t jdev; - - result = 0; - - journal->j_bdev_file = NULL; - jdev = SB_ONDISK_JOURNAL_DEVICE(super) ? - new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev; - - if (!bdev_read_only(super->s_bdev)) - blkdev_mode |= BLK_OPEN_WRITE; - - /* there is no "jdev" option and journal is on separate device */ - if ((!jdev_name || !jdev_name[0])) { - if (jdev == super->s_dev) - holder = NULL; - journal->j_bdev_file = bdev_file_open_by_dev(jdev, blkdev_mode, - holder, NULL); - if (IS_ERR(journal->j_bdev_file)) { - result = PTR_ERR(journal->j_bdev_file); - journal->j_bdev_file = NULL; - reiserfs_warning(super, "sh-458", - "cannot init journal device unknown-block(%u,%u): %i", - MAJOR(jdev), MINOR(jdev), result); - return result; - } else if (jdev != super->s_dev) - set_blocksize(journal->j_bdev_file, super->s_blocksize); - - return 0; - } - - journal->j_bdev_file = bdev_file_open_by_path(jdev_name, blkdev_mode, - holder, NULL); - if (IS_ERR(journal->j_bdev_file)) { - result = PTR_ERR(journal->j_bdev_file); - journal->j_bdev_file = NULL; - reiserfs_warning(super, "sh-457", - "journal_init_dev: Cannot open '%s': %i", - jdev_name, result); - return result; - } - - set_blocksize(journal->j_bdev_file, super->s_blocksize); - reiserfs_info(super, - "journal_init_dev: journal device: %pg\n", - file_bdev(journal->j_bdev_file)); - return 0; -} - -/* - * When creating/tuning a file system user can assign some - * journal params within boundaries which depend on the ratio - * blocksize/standard_blocksize. - * - * For blocks >= standard_blocksize transaction size should - * be not less then JOURNAL_TRANS_MIN_DEFAULT, and not more - * then JOURNAL_TRANS_MAX_DEFAULT. - * - * For blocks < standard_blocksize these boundaries should be - * decreased proportionally. - */ -#define REISERFS_STANDARD_BLKSIZE (4096) - -static int check_advise_trans_params(struct super_block *sb, - struct reiserfs_journal *journal) -{ - if (journal->j_trans_max) { - /* Non-default journal params. Do sanity check for them. */ - int ratio = 1; - if (sb->s_blocksize < REISERFS_STANDARD_BLKSIZE) - ratio = REISERFS_STANDARD_BLKSIZE / sb->s_blocksize; - - if (journal->j_trans_max > JOURNAL_TRANS_MAX_DEFAULT / ratio || - journal->j_trans_max < JOURNAL_TRANS_MIN_DEFAULT / ratio || - SB_ONDISK_JOURNAL_SIZE(sb) / journal->j_trans_max < - JOURNAL_MIN_RATIO) { - reiserfs_warning(sb, "sh-462", - "bad transaction max size (%u). " - "FSCK?", journal->j_trans_max); - return 1; - } - if (journal->j_max_batch != (journal->j_trans_max) * - JOURNAL_MAX_BATCH_DEFAULT/JOURNAL_TRANS_MAX_DEFAULT) { - reiserfs_warning(sb, "sh-463", - "bad transaction max batch (%u). " - "FSCK?", journal->j_max_batch); - return 1; - } - } else { - /* - * Default journal params. - * The file system was created by old version - * of mkreiserfs, so some fields contain zeros, - * and we need to advise proper values for them - */ - if (sb->s_blocksize != REISERFS_STANDARD_BLKSIZE) { - reiserfs_warning(sb, "sh-464", "bad blocksize (%u)", - sb->s_blocksize); - return 1; - } - journal->j_trans_max = JOURNAL_TRANS_MAX_DEFAULT; - journal->j_max_batch = JOURNAL_MAX_BATCH_DEFAULT; - journal->j_max_commit_age = JOURNAL_MAX_COMMIT_AGE; - } - return 0; -} - -/* must be called once on fs mount. calls journal_read for you */ -int journal_init(struct super_block *sb, const char *j_dev_name, - int old_format, unsigned int commit_max_age) -{ - int num_cnodes = SB_ONDISK_JOURNAL_SIZE(sb) * 2; - struct buffer_head *bhjh; - struct reiserfs_super_block *rs; - struct reiserfs_journal_header *jh; - struct reiserfs_journal *journal; - struct reiserfs_journal_list *jl; - int ret; - - journal = SB_JOURNAL(sb) = vzalloc(sizeof(struct reiserfs_journal)); - if (!journal) { - reiserfs_warning(sb, "journal-1256", - "unable to get memory for journal structure"); - return 1; - } - INIT_LIST_HEAD(&journal->j_bitmap_nodes); - INIT_LIST_HEAD(&journal->j_prealloc_list); - INIT_LIST_HEAD(&journal->j_working_list); - INIT_LIST_HEAD(&journal->j_journal_list); - journal->j_persistent_trans = 0; - if (reiserfs_allocate_list_bitmaps(sb, journal->j_list_bitmap, - reiserfs_bmap_count(sb))) - goto free_and_return; - - allocate_bitmap_nodes(sb); - - /* reserved for journal area support */ - SB_JOURNAL_1st_RESERVED_BLOCK(sb) = (old_format ? - REISERFS_OLD_DISK_OFFSET_IN_BYTES - / sb->s_blocksize + - reiserfs_bmap_count(sb) + - 1 : - REISERFS_DISK_OFFSET_IN_BYTES / - sb->s_blocksize + 2); - - /* - * Sanity check to see is the standard journal fitting - * within first bitmap (actual for small blocksizes) - */ - if (!SB_ONDISK_JOURNAL_DEVICE(sb) && - (SB_JOURNAL_1st_RESERVED_BLOCK(sb) + - SB_ONDISK_JOURNAL_SIZE(sb) > sb->s_blocksize * 8)) { - reiserfs_warning(sb, "journal-1393", - "journal does not fit for area addressed " - "by first of bitmap blocks. It starts at " - "%u and its size is %u. Block size %ld", - SB_JOURNAL_1st_RESERVED_BLOCK(sb), - SB_ONDISK_JOURNAL_SIZE(sb), - sb->s_blocksize); - goto free_and_return; - } - - /* - * Sanity check to see if journal first block is correct. - * If journal first block is invalid it can cause - * zeroing important superblock members. - */ - if (!SB_ONDISK_JOURNAL_DEVICE(sb) && - SB_ONDISK_JOURNAL_1st_BLOCK(sb) < SB_JOURNAL_1st_RESERVED_BLOCK(sb)) { - reiserfs_warning(sb, "journal-1393", - "journal 1st super block is invalid: 1st reserved block %d, but actual 1st block is %d", - SB_JOURNAL_1st_RESERVED_BLOCK(sb), - SB_ONDISK_JOURNAL_1st_BLOCK(sb)); - goto free_and_return; - } - - if (journal_init_dev(sb, journal, j_dev_name) != 0) { - reiserfs_warning(sb, "sh-462", - "unable to initialize journal device"); - goto free_and_return; - } - - rs = SB_DISK_SUPER_BLOCK(sb); - - /* read journal header */ - bhjh = journal_bread(sb, - SB_ONDISK_JOURNAL_1st_BLOCK(sb) + - SB_ONDISK_JOURNAL_SIZE(sb)); - if (!bhjh) { - reiserfs_warning(sb, "sh-459", - "unable to read journal header"); - goto free_and_return; - } - jh = (struct reiserfs_journal_header *)(bhjh->b_data); - - /* make sure that journal matches to the super block */ - if (is_reiserfs_jr(rs) - && (le32_to_cpu(jh->jh_journal.jp_journal_magic) != - sb_jp_journal_magic(rs))) { - reiserfs_warning(sb, "sh-460", - "journal header magic %x (device %pg) does " - "not match to magic found in super block %x", - jh->jh_journal.jp_journal_magic, - file_bdev(journal->j_bdev_file), - sb_jp_journal_magic(rs)); - brelse(bhjh); - goto free_and_return; - } - - journal->j_trans_max = le32_to_cpu(jh->jh_journal.jp_journal_trans_max); - journal->j_max_batch = le32_to_cpu(jh->jh_journal.jp_journal_max_batch); - journal->j_max_commit_age = - le32_to_cpu(jh->jh_journal.jp_journal_max_commit_age); - journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE; - - if (check_advise_trans_params(sb, journal) != 0) - goto free_and_return; - journal->j_default_max_commit_age = journal->j_max_commit_age; - - if (commit_max_age != 0) { - journal->j_max_commit_age = commit_max_age; - journal->j_max_trans_age = commit_max_age; - } - - reiserfs_info(sb, "journal params: device %pg, size %u, " - "journal first block %u, max trans len %u, max batch %u, " - "max commit age %u, max trans age %u\n", - file_bdev(journal->j_bdev_file), - SB_ONDISK_JOURNAL_SIZE(sb), - SB_ONDISK_JOURNAL_1st_BLOCK(sb), - journal->j_trans_max, - journal->j_max_batch, - journal->j_max_commit_age, journal->j_max_trans_age); - - brelse(bhjh); - - journal->j_list_bitmap_index = 0; - journal_list_init(sb); - - memset(journal->j_list_hash_table, 0, - JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)); - - INIT_LIST_HEAD(&journal->j_dirty_buffers); - spin_lock_init(&journal->j_dirty_buffers_lock); - - journal->j_start = 0; - journal->j_len = 0; - journal->j_len_alloc = 0; - atomic_set(&journal->j_wcount, 0); - atomic_set(&journal->j_async_throttle, 0); - journal->j_bcount = 0; - journal->j_trans_start_time = 0; - journal->j_last = NULL; - journal->j_first = NULL; - init_waitqueue_head(&journal->j_join_wait); - mutex_init(&journal->j_mutex); - mutex_init(&journal->j_flush_mutex); - - journal->j_trans_id = 10; - journal->j_mount_id = 10; - journal->j_state = 0; - atomic_set(&journal->j_jlock, 0); - journal->j_cnode_free_list = allocate_cnodes(num_cnodes); - journal->j_cnode_free_orig = journal->j_cnode_free_list; - journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0; - journal->j_cnode_used = 0; - journal->j_must_wait = 0; - - if (journal->j_cnode_free == 0) { - reiserfs_warning(sb, "journal-2004", "Journal cnode memory " - "allocation failed (%ld bytes). Journal is " - "too large for available memory. Usually " - "this is due to a journal that is too large.", - sizeof (struct reiserfs_journal_cnode) * num_cnodes); - goto free_and_return; - } - - init_journal_hash(sb); - jl = journal->j_current_jl; - - /* - * get_list_bitmap() may call flush_commit_list() which - * requires the lock. Calling flush_commit_list() shouldn't happen - * this early but I like to be paranoid. - */ - reiserfs_write_lock(sb); - jl->j_list_bitmap = get_list_bitmap(sb, jl); - reiserfs_write_unlock(sb); - if (!jl->j_list_bitmap) { - reiserfs_warning(sb, "journal-2005", - "get_list_bitmap failed for journal list 0"); - goto free_and_return; - } - - ret = journal_read(sb); - if (ret < 0) { - reiserfs_warning(sb, "reiserfs-2006", - "Replay Failure, unable to mount"); - goto free_and_return; - } - - INIT_DELAYED_WORK(&journal->j_work, flush_async_commits); - journal->j_work_sb = sb; - return 0; -free_and_return: - free_journal_ram(sb); - return 1; -} - -/* - * test for a polite end of the current transaction. Used by file_write, - * and should be used by delete to make sure they don't write more than - * can fit inside a single transaction - */ -int journal_transaction_should_end(struct reiserfs_transaction_handle *th, - int new_alloc) -{ - struct reiserfs_journal *journal = SB_JOURNAL(th->t_super); - time64_t now = ktime_get_seconds(); - /* cannot restart while nested */ - BUG_ON(!th->t_trans_id); - if (th->t_refcount > 1) - return 0; - if (journal->j_must_wait > 0 || - (journal->j_len_alloc + new_alloc) >= journal->j_max_batch || - atomic_read(&journal->j_jlock) || - (now - journal->j_trans_start_time) > journal->j_max_trans_age || - journal->j_cnode_free < (journal->j_trans_max * 3)) { - return 1; - } - - journal->j_len_alloc += new_alloc; - th->t_blocks_allocated += new_alloc ; - return 0; -} - -/* this must be called inside a transaction */ -void reiserfs_block_writes(struct reiserfs_transaction_handle *th) -{ - struct reiserfs_journal *journal = SB_JOURNAL(th->t_super); - BUG_ON(!th->t_trans_id); - journal->j_must_wait = 1; - set_bit(J_WRITERS_BLOCKED, &journal->j_state); - return; -} - -/* this must be called without a transaction started */ -void reiserfs_allow_writes(struct super_block *s) -{ - struct reiserfs_journal *journal = SB_JOURNAL(s); - clear_bit(J_WRITERS_BLOCKED, &journal->j_state); - wake_up(&journal->j_join_wait); -} - -/* this must be called without a transaction started */ -void reiserfs_wait_on_write_block(struct super_block *s) -{ - struct reiserfs_journal *journal = SB_JOURNAL(s); - wait_event(journal->j_join_wait, - !test_bit(J_WRITERS_BLOCKED, &journal->j_state)); -} - -static void queue_log_writer(struct super_block *s) -{ - wait_queue_entry_t wait; - struct reiserfs_journal *journal = SB_JOURNAL(s); - set_bit(J_WRITERS_QUEUED, &journal->j_state); - - /* - * we don't want to use wait_event here because - * we only want to wait once. - */ - init_waitqueue_entry(&wait, current); - add_wait_queue(&journal->j_join_wait, &wait); - set_current_state(TASK_UNINTERRUPTIBLE); - if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) { - int depth = reiserfs_write_unlock_nested(s); - schedule(); - reiserfs_write_lock_nested(s, depth); - } - __set_current_state(TASK_RUNNING); - remove_wait_queue(&journal->j_join_wait, &wait); -} - -static void wake_queued_writers(struct super_block *s) -{ - struct reiserfs_journal *journal = SB_JOURNAL(s); - if (test_and_clear_bit(J_WRITERS_QUEUED, &journal->j_state)) - wake_up(&journal->j_join_wait); -} - -static void let_transaction_grow(struct super_block *sb, unsigned int trans_id) -{ - struct reiserfs_journal *journal = SB_JOURNAL(sb); - unsigned long bcount = journal->j_bcount; - while (1) { - int depth; - - depth = reiserfs_write_unlock_nested(sb); - schedule_timeout_uninterruptible(1); - reiserfs_write_lock_nested(sb, depth); - - journal->j_current_jl->j_state |= LIST_COMMIT_PENDING; - while ((atomic_read(&journal->j_wcount) > 0 || - atomic_read(&journal->j_jlock)) && - journal->j_trans_id == trans_id) { - queue_log_writer(sb); - } - if (journal->j_trans_id != trans_id) - break; - if (bcount == journal->j_bcount) - break; - bcount = journal->j_bcount; - } -} - -/* - * join == true if you must join an existing transaction. - * join == false if you can deal with waiting for others to finish - * - * this will block until the transaction is joinable. send the number of - * blocks you expect to use in nblocks. -*/ -static int do_journal_begin_r(struct reiserfs_transaction_handle *th, - struct super_block *sb, unsigned long nblocks, - int join) -{ - time64_t now = ktime_get_seconds(); - unsigned int old_trans_id; - struct reiserfs_journal *journal = SB_JOURNAL(sb); - struct reiserfs_transaction_handle myth; - int retval; - int depth; - - reiserfs_check_lock_depth(sb, "journal_begin"); - BUG_ON(nblocks > journal->j_trans_max); - - PROC_INFO_INC(sb, journal.journal_being); - /* set here for journal_join */ - th->t_refcount = 1; - th->t_super = sb; - -relock: - lock_journal(sb); - if (join != JBEGIN_ABORT && reiserfs_is_journal_aborted(journal)) { - unlock_journal(sb); - retval = journal->j_errno; - goto out_fail; - } - journal->j_bcount++; - - if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) { - unlock_journal(sb); - depth = reiserfs_write_unlock_nested(sb); - reiserfs_wait_on_write_block(sb); - reiserfs_write_lock_nested(sb, depth); - PROC_INFO_INC(sb, journal.journal_relock_writers); - goto relock; - } - now = ktime_get_seconds(); - - /* - * if there is no room in the journal OR - * if this transaction is too old, and we weren't called joinable, - * wait for it to finish before beginning we don't sleep if there - * aren't other writers - */ - - if ((!join && journal->j_must_wait > 0) || - (!join - && (journal->j_len_alloc + nblocks + 2) >= journal->j_max_batch) - || (!join && atomic_read(&journal->j_wcount) > 0 - && journal->j_trans_start_time > 0 - && (now - journal->j_trans_start_time) > - journal->j_max_trans_age) || (!join - && atomic_read(&journal->j_jlock)) - || (!join && journal->j_cnode_free < (journal->j_trans_max * 3))) { - - old_trans_id = journal->j_trans_id; - /* allow others to finish this transaction */ - unlock_journal(sb); - - if (!join && (journal->j_len_alloc + nblocks + 2) >= - journal->j_max_batch && - ((journal->j_len + nblocks + 2) * 100) < - (journal->j_len_alloc * 75)) { - if (atomic_read(&journal->j_wcount) > 10) { - queue_log_writer(sb); - goto relock; - } - } - /* - * don't mess with joining the transaction if all we - * have to do is wait for someone else to do a commit - */ - if (atomic_read(&journal->j_jlock)) { - while (journal->j_trans_id == old_trans_id && - atomic_read(&journal->j_jlock)) { - queue_log_writer(sb); - } - goto relock; - } - retval = journal_join(&myth, sb); - if (retval) - goto out_fail; - - /* someone might have ended the transaction while we joined */ - if (old_trans_id != journal->j_trans_id) { - retval = do_journal_end(&myth, 0); - } else { - retval = do_journal_end(&myth, COMMIT_NOW); - } - - if (retval) - goto out_fail; - - PROC_INFO_INC(sb, journal.journal_relock_wcount); - goto relock; - } - /* we are the first writer, set trans_id */ - if (journal->j_trans_start_time == 0) { - journal->j_trans_start_time = ktime_get_seconds(); - } - atomic_inc(&journal->j_wcount); - journal->j_len_alloc += nblocks; - th->t_blocks_logged = 0; - th->t_blocks_allocated = nblocks; - th->t_trans_id = journal->j_trans_id; - unlock_journal(sb); - INIT_LIST_HEAD(&th->t_list); - return 0; - -out_fail: - memset(th, 0, sizeof(*th)); - /* - * Re-set th->t_super, so we can properly keep track of how many - * persistent transactions there are. We need to do this so if this - * call is part of a failed restart_transaction, we can free it later - */ - th->t_super = sb; - return retval; -} - -struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct - super_block - *s, - int nblocks) -{ - int ret; - struct reiserfs_transaction_handle *th; - - /* - * if we're nesting into an existing transaction. It will be - * persistent on its own - */ - if (reiserfs_transaction_running(s)) { - th = current->journal_info; - th->t_refcount++; - BUG_ON(th->t_refcount < 2); - - return th; - } - th = kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS); - if (!th) - return NULL; - ret = journal_begin(th, s, nblocks); - if (ret) { - kfree(th); - return NULL; - } - - SB_JOURNAL(s)->j_persistent_trans++; - return th; -} - -int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *th) -{ - struct super_block *s = th->t_super; - int ret = 0; - if (th->t_trans_id) - ret = journal_end(th); - else - ret = -EIO; - if (th->t_refcount == 0) { - SB_JOURNAL(s)->j_persistent_trans--; - kfree(th); - } - return ret; -} - -static int journal_join(struct reiserfs_transaction_handle *th, - struct super_block *sb) -{ - struct reiserfs_transaction_handle *cur_th = current->journal_info; - - /* - * this keeps do_journal_end from NULLing out the - * current->journal_info pointer - */ - th->t_handle_save = cur_th; - BUG_ON(cur_th && cur_th->t_refcount > 1); - return do_journal_begin_r(th, sb, 1, JBEGIN_JOIN); -} - -int journal_join_abort(struct reiserfs_transaction_handle *th, - struct super_block *sb) -{ - struct reiserfs_transaction_handle *cur_th = current->journal_info; - - /* - * this keeps do_journal_end from NULLing out the - * current->journal_info pointer - */ - th->t_handle_save = cur_th; - BUG_ON(cur_th && cur_th->t_refcount > 1); - return do_journal_begin_r(th, sb, 1, JBEGIN_ABORT); -} - -int journal_begin(struct reiserfs_transaction_handle *th, - struct super_block *sb, unsigned long nblocks) -{ - struct reiserfs_transaction_handle *cur_th = current->journal_info; - int ret; - - th->t_handle_save = NULL; - if (cur_th) { - /* we are nesting into the current transaction */ - if (cur_th->t_super == sb) { - BUG_ON(!cur_th->t_refcount); - cur_th->t_refcount++; - memcpy(th, cur_th, sizeof(*th)); - if (th->t_refcount <= 1) - reiserfs_warning(sb, "reiserfs-2005", - "BAD: refcount <= 1, but " - "journal_info != 0"); - return 0; - } else { - /* - * we've ended up with a handle from a different - * filesystem. save it and restore on journal_end. - * This should never really happen... - */ - reiserfs_warning(sb, "clm-2100", - "nesting info a different FS"); - th->t_handle_save = current->journal_info; - current->journal_info = th; - } - } else { - current->journal_info = th; - } - ret = do_journal_begin_r(th, sb, nblocks, JBEGIN_REG); - BUG_ON(current->journal_info != th); - - /* - * I guess this boils down to being the reciprocal of clm-2100 above. - * If do_journal_begin_r fails, we need to put it back, since - * journal_end won't be called to do it. */ - if (ret) - current->journal_info = th->t_handle_save; - else - BUG_ON(!th->t_refcount); - - return ret; -} - -/* - * puts bh into the current transaction. If it was already there, reorders - * removes the old pointers from the hash, and puts new ones in (to make - * sure replay happen in the right order). - * - * if it was dirty, cleans and files onto the clean list. I can't let it - * be dirty again until the transaction is committed. - * - * if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len. - */ -int journal_mark_dirty(struct reiserfs_transaction_handle *th, - struct buffer_head *bh) -{ - struct super_block *sb = th->t_super; - struct reiserfs_journal *journal = SB_JOURNAL(sb); - struct reiserfs_journal_cnode *cn = NULL; - int count_already_incd = 0; - int prepared = 0; - BUG_ON(!th->t_trans_id); - - PROC_INFO_INC(sb, journal.mark_dirty); - if (th->t_trans_id != journal->j_trans_id) { - reiserfs_panic(th->t_super, "journal-1577", - "handle trans id %ld != current trans id %ld", - th->t_trans_id, journal->j_trans_id); - } - - prepared = test_clear_buffer_journal_prepared(bh); - clear_buffer_journal_restore_dirty(bh); - /* already in this transaction, we are done */ - if (buffer_journaled(bh)) { - PROC_INFO_INC(sb, journal.mark_dirty_already); - return 0; - } - - /* - * this must be turned into a panic instead of a warning. We can't - * allow a dirty or journal_dirty or locked buffer to be logged, as - * some changes could get to disk too early. NOT GOOD. - */ - if (!prepared || buffer_dirty(bh)) { - reiserfs_warning(sb, "journal-1777", - "buffer %llu bad state " - "%cPREPARED %cLOCKED %cDIRTY %cJDIRTY_WAIT", - (unsigned long long)bh->b_blocknr, - prepared ? ' ' : '!', - buffer_locked(bh) ? ' ' : '!', - buffer_dirty(bh) ? ' ' : '!', - buffer_journal_dirty(bh) ? ' ' : '!'); - } - - if (atomic_read(&journal->j_wcount) <= 0) { - reiserfs_warning(sb, "journal-1409", - "returning because j_wcount was %d", - atomic_read(&journal->j_wcount)); - return 1; - } - /* - * this error means I've screwed up, and we've overflowed - * the transaction. Nothing can be done here, except make the - * FS readonly or panic. - */ - if (journal->j_len >= journal->j_trans_max) { - reiserfs_panic(th->t_super, "journal-1413", - "j_len (%lu) is too big", - journal->j_len); - } - - if (buffer_journal_dirty(bh)) { - count_already_incd = 1; - PROC_INFO_INC(sb, journal.mark_dirty_notjournal); - clear_buffer_journal_dirty(bh); - } - - if (journal->j_len > journal->j_len_alloc) { - journal->j_len_alloc = journal->j_len + JOURNAL_PER_BALANCE_CNT; - } - - set_buffer_journaled(bh); - - /* now put this guy on the end */ - if (!cn) { - cn = get_cnode(sb); - if (!cn) { - reiserfs_panic(sb, "journal-4", "get_cnode failed!"); - } - - if (th->t_blocks_logged == th->t_blocks_allocated) { - th->t_blocks_allocated += JOURNAL_PER_BALANCE_CNT; - journal->j_len_alloc += JOURNAL_PER_BALANCE_CNT; - } - th->t_blocks_logged++; - journal->j_len++; - - cn->bh = bh; - cn->blocknr = bh->b_blocknr; - cn->sb = sb; - cn->jlist = NULL; - insert_journal_hash(journal->j_hash_table, cn); - if (!count_already_incd) { - get_bh(bh); - } - } - cn->next = NULL; - cn->prev = journal->j_last; - cn->bh = bh; - if (journal->j_last) { - journal->j_last->next = cn; - journal->j_last = cn; - } else { - journal->j_first = cn; - journal->j_last = cn; - } - reiserfs_schedule_old_flush(sb); - return 0; -} - -int journal_end(struct reiserfs_transaction_handle *th) -{ - struct super_block *sb = th->t_super; - if (!current->journal_info && th->t_refcount > 1) - reiserfs_warning(sb, "REISER-NESTING", - "th NULL, refcount %d", th->t_refcount); - - if (!th->t_trans_id) { - WARN_ON(1); - return -EIO; - } - - th->t_refcount--; - if (th->t_refcount > 0) { - struct reiserfs_transaction_handle *cur_th = - current->journal_info; - - /* - * we aren't allowed to close a nested transaction on a - * different filesystem from the one in the task struct - */ - BUG_ON(cur_th->t_super != th->t_super); - - if (th != cur_th) { - memcpy(current->journal_info, th, sizeof(*th)); - th->t_trans_id = 0; - } - return 0; - } else { - return do_journal_end(th, 0); - } -} - -/* - * removes from the current transaction, relsing and descrementing any counters. - * also files the removed buffer directly onto the clean list - * - * called by journal_mark_freed when a block has been deleted - * - * returns 1 if it cleaned and relsed the buffer. 0 otherwise - */ -static int remove_from_transaction(struct super_block *sb, - b_blocknr_t blocknr, int already_cleaned) -{ - struct buffer_head *bh; - struct reiserfs_journal_cnode *cn; - struct reiserfs_journal *journal = SB_JOURNAL(sb); - int ret = 0; - - cn = get_journal_hash_dev(sb, journal->j_hash_table, blocknr); - if (!cn || !cn->bh) { - return ret; - } - bh = cn->bh; - if (cn->prev) { - cn->prev->next = cn->next; - } - if (cn->next) { - cn->next->prev = cn->prev; - } - if (cn == journal->j_first) { - journal->j_first = cn->next; - } - if (cn == journal->j_last) { - journal->j_last = cn->prev; - } - remove_journal_hash(sb, journal->j_hash_table, NULL, - bh->b_blocknr, 0); - clear_buffer_journaled(bh); /* don't log this one */ - - if (!already_cleaned) { - clear_buffer_journal_dirty(bh); - clear_buffer_dirty(bh); - clear_buffer_journal_test(bh); - put_bh(bh); - if (atomic_read(&bh->b_count) < 0) { - reiserfs_warning(sb, "journal-1752", - "b_count < 0"); - } - ret = 1; - } - journal->j_len--; - journal->j_len_alloc--; - free_cnode(sb, cn); - return ret; -} - -/* - * for any cnode in a journal list, it can only be dirtied of all the - * transactions that include it are committed to disk. - * this checks through each transaction, and returns 1 if you are allowed - * to dirty, and 0 if you aren't - * - * it is called by dirty_journal_list, which is called after - * flush_commit_list has gotten all the log blocks for a given - * transaction on disk - * - */ -static int can_dirty(struct reiserfs_journal_cnode *cn) -{ - struct super_block *sb = cn->sb; - b_blocknr_t blocknr = cn->blocknr; - struct reiserfs_journal_cnode *cur = cn->hprev; - int can_dirty = 1; - - /* - * first test hprev. These are all newer than cn, so any node here - * with the same block number and dev means this node can't be sent - * to disk right now. - */ - while (cur && can_dirty) { - if (cur->jlist && cur->bh && cur->blocknr && cur->sb == sb && - cur->blocknr == blocknr) { - can_dirty = 0; - } - cur = cur->hprev; - } - /* - * then test hnext. These are all older than cn. As long as they - * are committed to the log, it is safe to write cn to disk - */ - cur = cn->hnext; - while (cur && can_dirty) { - if (cur->jlist && cur->jlist->j_len > 0 && - atomic_read(&cur->jlist->j_commit_left) > 0 && cur->bh && - cur->blocknr && cur->sb == sb && cur->blocknr == blocknr) { - can_dirty = 0; - } - cur = cur->hnext; - } - return can_dirty; -} - -/* - * syncs the commit blocks, but does not force the real buffers to disk - * will wait until the current transaction is done/committed before returning - */ -int journal_end_sync(struct reiserfs_transaction_handle *th) -{ - struct super_block *sb = th->t_super; - struct reiserfs_journal *journal = SB_JOURNAL(sb); - - BUG_ON(!th->t_trans_id); - /* you can sync while nested, very, very bad */ - BUG_ON(th->t_refcount > 1); - if (journal->j_len == 0) { - reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb), - 1); - journal_mark_dirty(th, SB_BUFFER_WITH_SB(sb)); - } - return do_journal_end(th, COMMIT_NOW | WAIT); -} - -/* writeback the pending async commits to disk */ -static void flush_async_commits(struct work_struct *work) -{ - struct reiserfs_journal *journal = - container_of(work, struct reiserfs_journal, j_work.work); - struct super_block *sb = journal->j_work_sb; - struct reiserfs_journal_list *jl; - struct list_head *entry; - - reiserfs_write_lock(sb); - if (!list_empty(&journal->j_journal_list)) { - /* last entry is the youngest, commit it and you get everything */ - entry = journal->j_journal_list.prev; - jl = JOURNAL_LIST_ENTRY(entry); - flush_commit_list(sb, jl, 1); - } - reiserfs_write_unlock(sb); -} - -/* - * flushes any old transactions to disk - * ends the current transaction if it is too old - */ -void reiserfs_flush_old_commits(struct super_block *sb) -{ - time64_t now; - struct reiserfs_transaction_handle th; - struct reiserfs_journal *journal = SB_JOURNAL(sb); - - now = ktime_get_seconds(); - /* - * safety check so we don't flush while we are replaying the log during - * mount - */ - if (list_empty(&journal->j_journal_list)) - return; - - /* - * check the current transaction. If there are no writers, and it is - * too old, finish it, and force the commit blocks to disk - */ - if (atomic_read(&journal->j_wcount) <= 0 && - journal->j_trans_start_time > 0 && - journal->j_len > 0 && - (now - journal->j_trans_start_time) > journal->j_max_trans_age) { - if (!journal_join(&th, sb)) { - reiserfs_prepare_for_journal(sb, - SB_BUFFER_WITH_SB(sb), - 1); - journal_mark_dirty(&th, SB_BUFFER_WITH_SB(sb)); - - /* - * we're only being called from kreiserfsd, it makes - * no sense to do an async commit so that kreiserfsd - * can do it later - */ - do_journal_end(&th, COMMIT_NOW | WAIT); - } - } -} - -/* - * returns 0 if do_journal_end should return right away, returns 1 if - * do_journal_end should finish the commit - * - * if the current transaction is too old, but still has writers, this will - * wait on j_join_wait until all the writers are done. By the time it - * wakes up, the transaction it was called has already ended, so it just - * flushes the commit list and returns 0. - * - * Won't batch when flush or commit_now is set. Also won't batch when - * others are waiting on j_join_wait. - * - * Note, we can't allow the journal_end to proceed while there are still - * writers in the log. - */ -static int check_journal_end(struct reiserfs_transaction_handle *th, int flags) -{ - - time64_t now; - int flush = flags & FLUSH_ALL; - int commit_now = flags & COMMIT_NOW; - int wait_on_commit = flags & WAIT; - struct reiserfs_journal_list *jl; - struct super_block *sb = th->t_super; - struct reiserfs_journal *journal = SB_JOURNAL(sb); - - BUG_ON(!th->t_trans_id); - - if (th->t_trans_id != journal->j_trans_id) { - reiserfs_panic(th->t_super, "journal-1577", - "handle trans id %ld != current trans id %ld", - th->t_trans_id, journal->j_trans_id); - } - - journal->j_len_alloc -= (th->t_blocks_allocated - th->t_blocks_logged); - /* <= 0 is allowed. unmounting might not call begin */ - if (atomic_read(&journal->j_wcount) > 0) - atomic_dec(&journal->j_wcount); - - /* - * BUG, deal with case where j_len is 0, but people previously - * freed blocks need to be released will be dealt with by next - * transaction that actually writes something, but should be taken - * care of in this trans - */ - BUG_ON(journal->j_len == 0); - - /* - * if wcount > 0, and we are called to with flush or commit_now, - * we wait on j_join_wait. We will wake up when the last writer has - * finished the transaction, and started it on its way to the disk. - * Then, we flush the commit or journal list, and just return 0 - * because the rest of journal end was already done for this - * transaction. - */ - if (atomic_read(&journal->j_wcount) > 0) { - if (flush || commit_now) { - unsigned trans_id; - - jl = journal->j_current_jl; - trans_id = jl->j_trans_id; - if (wait_on_commit) - jl->j_state |= LIST_COMMIT_PENDING; - atomic_set(&journal->j_jlock, 1); - if (flush) { - journal->j_next_full_flush = 1; - } - unlock_journal(sb); - - /* - * sleep while the current transaction is - * still j_jlocked - */ - while (journal->j_trans_id == trans_id) { - if (atomic_read(&journal->j_jlock)) { - queue_log_writer(sb); - } else { - lock_journal(sb); - if (journal->j_trans_id == trans_id) { - atomic_set(&journal->j_jlock, - 1); - } - unlock_journal(sb); - } - } - BUG_ON(journal->j_trans_id == trans_id); - - if (commit_now - && journal_list_still_alive(sb, trans_id) - && wait_on_commit) { - flush_commit_list(sb, jl, 1); - } - return 0; - } - unlock_journal(sb); - return 0; - } - - /* deal with old transactions where we are the last writers */ - now = ktime_get_seconds(); - if ((now - journal->j_trans_start_time) > journal->j_max_trans_age) { - commit_now = 1; - journal->j_next_async_flush = 1; - } - /* don't batch when someone is waiting on j_join_wait */ - /* don't batch when syncing the commit or flushing the whole trans */ - if (!(journal->j_must_wait > 0) && !(atomic_read(&journal->j_jlock)) - && !flush && !commit_now && (journal->j_len < journal->j_max_batch) - && journal->j_len_alloc < journal->j_max_batch - && journal->j_cnode_free > (journal->j_trans_max * 3)) { - journal->j_bcount++; - unlock_journal(sb); - return 0; - } - - if (journal->j_start > SB_ONDISK_JOURNAL_SIZE(sb)) { - reiserfs_panic(sb, "journal-003", - "j_start (%ld) is too high", - journal->j_start); - } - return 1; -} - -/* - * Does all the work that makes deleting blocks safe. - * when deleting a block mark BH_JNew, just remove it from the current - * transaction, clean it's buffer_head and move on. - * - * otherwise: - * set a bit for the block in the journal bitmap. That will prevent it from - * being allocated for unformatted nodes before this transaction has finished. - * - * mark any cnodes for this block as BLOCK_FREED, and clear their bh pointers. - * That will prevent any old transactions with this block from trying to flush - * to the real location. Since we aren't removing the cnode from the - * journal_list_hash, *the block can't be reallocated yet. - * - * Then remove it from the current transaction, decrementing any counters and - * filing it on the clean list. - */ -int journal_mark_freed(struct reiserfs_transaction_handle *th, - struct super_block *sb, b_blocknr_t blocknr) -{ - struct reiserfs_journal *journal = SB_JOURNAL(sb); - struct reiserfs_journal_cnode *cn = NULL; - struct buffer_head *bh = NULL; - struct reiserfs_list_bitmap *jb = NULL; - int cleaned = 0; - BUG_ON(!th->t_trans_id); - - cn = get_journal_hash_dev(sb, journal->j_hash_table, blocknr); - if (cn && cn->bh) { - bh = cn->bh; - get_bh(bh); - } - /* if it is journal new, we just remove it from this transaction */ - if (bh && buffer_journal_new(bh)) { - clear_buffer_journal_new(bh); - clear_prepared_bits(bh); - reiserfs_clean_and_file_buffer(bh); - cleaned = remove_from_transaction(sb, blocknr, cleaned); - } else { - /* - * set the bit for this block in the journal bitmap - * for this transaction - */ - jb = journal->j_current_jl->j_list_bitmap; - if (!jb) { - reiserfs_panic(sb, "journal-1702", - "journal_list_bitmap is NULL"); - } - set_bit_in_list_bitmap(sb, blocknr, jb); - - /* Note, the entire while loop is not allowed to schedule. */ - - if (bh) { - clear_prepared_bits(bh); - reiserfs_clean_and_file_buffer(bh); - } - cleaned = remove_from_transaction(sb, blocknr, cleaned); - - /* - * find all older transactions with this block, - * make sure they don't try to write it out - */ - cn = get_journal_hash_dev(sb, journal->j_list_hash_table, - blocknr); - while (cn) { - if (sb == cn->sb && blocknr == cn->blocknr) { - set_bit(BLOCK_FREED, &cn->state); - if (cn->bh) { - /* - * remove_from_transaction will brelse - * the buffer if it was in the current - * trans - */ - if (!cleaned) { - clear_buffer_journal_dirty(cn-> - bh); - clear_buffer_dirty(cn->bh); - clear_buffer_journal_test(cn-> - bh); - cleaned = 1; - put_bh(cn->bh); - if (atomic_read - (&cn->bh->b_count) < 0) { - reiserfs_warning(sb, - "journal-2138", - "cn->bh->b_count < 0"); - } - } - /* - * since we are clearing the bh, - * we MUST dec nonzerolen - */ - if (cn->jlist) { - atomic_dec(&cn->jlist-> - j_nonzerolen); - } - cn->bh = NULL; - } - } - cn = cn->hnext; - } - } - - if (bh) - release_buffer_page(bh); /* get_hash grabs the buffer */ - return 0; -} - -void reiserfs_update_inode_transaction(struct inode *inode) -{ - struct reiserfs_journal *journal = SB_JOURNAL(inode->i_sb); - REISERFS_I(inode)->i_jl = journal->j_current_jl; - REISERFS_I(inode)->i_trans_id = journal->j_trans_id; -} - -/* - * returns -1 on error, 0 if no commits/barriers were done and 1 - * if a transaction was actually committed and the barrier was done - */ -static int __commit_trans_jl(struct inode *inode, unsigned long id, - struct reiserfs_journal_list *jl) -{ - struct reiserfs_transaction_handle th; - struct super_block *sb = inode->i_sb; - struct reiserfs_journal *journal = SB_JOURNAL(sb); - int ret = 0; - - /* - * is it from the current transaction, - * or from an unknown transaction? - */ - if (id == journal->j_trans_id) { - jl = journal->j_current_jl; - /* - * try to let other writers come in and - * grow this transaction - */ - let_transaction_grow(sb, id); - if (journal->j_trans_id != id) { - goto flush_commit_only; - } - - ret = journal_begin(&th, sb, 1); - if (ret) - return ret; - - /* someone might have ended this transaction while we joined */ - if (journal->j_trans_id != id) { - reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb), - 1); - journal_mark_dirty(&th, SB_BUFFER_WITH_SB(sb)); - ret = journal_end(&th); - goto flush_commit_only; - } - - ret = journal_end_sync(&th); - if (!ret) - ret = 1; - - } else { - /* - * this gets tricky, we have to make sure the journal list in - * the inode still exists. We know the list is still around - * if we've got a larger transaction id than the oldest list - */ -flush_commit_only: - if (journal_list_still_alive(inode->i_sb, id)) { - /* - * we only set ret to 1 when we know for sure - * the barrier hasn't been started yet on the commit - * block. - */ - if (atomic_read(&jl->j_commit_left) > 1) - ret = 1; - flush_commit_list(sb, jl, 1); - if (journal->j_errno) - ret = journal->j_errno; - } - } - /* otherwise the list is gone, and long since committed */ - return ret; -} - -int reiserfs_commit_for_inode(struct inode *inode) -{ - unsigned int id = REISERFS_I(inode)->i_trans_id; - struct reiserfs_journal_list *jl = REISERFS_I(inode)->i_jl; - - /* - * for the whole inode, assume unset id means it was - * changed in the current transaction. More conservative - */ - if (!id || !jl) { - reiserfs_update_inode_transaction(inode); - id = REISERFS_I(inode)->i_trans_id; - /* jl will be updated in __commit_trans_jl */ - } - - return __commit_trans_jl(inode, id, jl); -} - -void reiserfs_restore_prepared_buffer(struct super_block *sb, - struct buffer_head *bh) -{ - struct reiserfs_journal *journal = SB_JOURNAL(sb); - PROC_INFO_INC(sb, journal.restore_prepared); - if (!bh) { - return; - } - if (test_clear_buffer_journal_restore_dirty(bh) && - buffer_journal_dirty(bh)) { - struct reiserfs_journal_cnode *cn; - reiserfs_write_lock(sb); - cn = get_journal_hash_dev(sb, - journal->j_list_hash_table, - bh->b_blocknr); - if (cn && can_dirty(cn)) { - set_buffer_journal_test(bh); - mark_buffer_dirty(bh); - } - reiserfs_write_unlock(sb); - } - clear_buffer_journal_prepared(bh); -} - -extern struct tree_balance *cur_tb; -/* - * before we can change a metadata block, we have to make sure it won't - * be written to disk while we are altering it. So, we must: - * clean it - * wait on it. - */ -int reiserfs_prepare_for_journal(struct super_block *sb, - struct buffer_head *bh, int wait) -{ - PROC_INFO_INC(sb, journal.prepare); - - if (!trylock_buffer(bh)) { - if (!wait) - return 0; - lock_buffer(bh); - } - set_buffer_journal_prepared(bh); - if (test_clear_buffer_dirty(bh) && buffer_journal_dirty(bh)) { - clear_buffer_journal_test(bh); - set_buffer_journal_restore_dirty(bh); - } - unlock_buffer(bh); - return 1; -} - -/* - * long and ugly. If flush, will not return until all commit - * blocks and all real buffers in the trans are on disk. - * If no_async, won't return until all commit blocks are on disk. - * - * keep reading, there are comments as you go along - * - * If the journal is aborted, we just clean up. Things like flushing - * journal lists, etc just won't happen. - */ -static int do_journal_end(struct reiserfs_transaction_handle *th, int flags) -{ - struct super_block *sb = th->t_super; - struct reiserfs_journal *journal = SB_JOURNAL(sb); - struct reiserfs_journal_cnode *cn, *next, *jl_cn; - struct reiserfs_journal_cnode *last_cn = NULL; - struct reiserfs_journal_desc *desc; - struct reiserfs_journal_commit *commit; - struct buffer_head *c_bh; /* commit bh */ - struct buffer_head *d_bh; /* desc bh */ - int cur_write_start = 0; /* start index of current log write */ - int i; - int flush; - int wait_on_commit; - struct reiserfs_journal_list *jl, *temp_jl; - struct list_head *entry, *safe; - unsigned long jindex; - unsigned int commit_trans_id; - int trans_half; - int depth; - - BUG_ON(th->t_refcount > 1); - BUG_ON(!th->t_trans_id); - BUG_ON(!th->t_super); - - /* - * protect flush_older_commits from doing mistakes if the - * transaction ID counter gets overflowed. - */ - if (th->t_trans_id == ~0U) - flags |= FLUSH_ALL | COMMIT_NOW | WAIT; - flush = flags & FLUSH_ALL; - wait_on_commit = flags & WAIT; - - current->journal_info = th->t_handle_save; - reiserfs_check_lock_depth(sb, "journal end"); - if (journal->j_len == 0) { - reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb), - 1); - journal_mark_dirty(th, SB_BUFFER_WITH_SB(sb)); - } - - lock_journal(sb); - if (journal->j_next_full_flush) { - flags |= FLUSH_ALL; - flush = 1; - } - if (journal->j_next_async_flush) { - flags |= COMMIT_NOW | WAIT; - wait_on_commit = 1; - } - - /* - * check_journal_end locks the journal, and unlocks if it does - * not return 1 it tells us if we should continue with the - * journal_end, or just return - */ - if (!check_journal_end(th, flags)) { - reiserfs_schedule_old_flush(sb); - wake_queued_writers(sb); - reiserfs_async_progress_wait(sb); - goto out; - } - - /* check_journal_end might set these, check again */ - if (journal->j_next_full_flush) { - flush = 1; - } - - /* - * j must wait means we have to flush the log blocks, and the - * real blocks for this transaction - */ - if (journal->j_must_wait > 0) { - flush = 1; - } -#ifdef REISERFS_PREALLOCATE - /* - * quota ops might need to nest, setup the journal_info pointer - * for them and raise the refcount so that it is > 0. - */ - current->journal_info = th; - th->t_refcount++; - - /* it should not involve new blocks into the transaction */ - reiserfs_discard_all_prealloc(th); - - th->t_refcount--; - current->journal_info = th->t_handle_save; -#endif - - /* setup description block */ - d_bh = - journal_getblk(sb, - SB_ONDISK_JOURNAL_1st_BLOCK(sb) + - journal->j_start); - set_buffer_uptodate(d_bh); - desc = (struct reiserfs_journal_desc *)(d_bh)->b_data; - memset(d_bh->b_data, 0, d_bh->b_size); - memcpy(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8); - set_desc_trans_id(desc, journal->j_trans_id); - - /* - * setup commit block. Don't write (keep it clean too) this one - * until after everyone else is written - */ - c_bh = journal_getblk(sb, SB_ONDISK_JOURNAL_1st_BLOCK(sb) + - ((journal->j_start + journal->j_len + - 1) % SB_ONDISK_JOURNAL_SIZE(sb))); - commit = (struct reiserfs_journal_commit *)c_bh->b_data; - memset(c_bh->b_data, 0, c_bh->b_size); - set_commit_trans_id(commit, journal->j_trans_id); - set_buffer_uptodate(c_bh); - - /* init this journal list */ - jl = journal->j_current_jl; - - /* - * we lock the commit before doing anything because - * we want to make sure nobody tries to run flush_commit_list until - * the new transaction is fully setup, and we've already flushed the - * ordered bh list - */ - reiserfs_mutex_lock_safe(&jl->j_commit_mutex, sb); - - /* save the transaction id in case we need to commit it later */ - commit_trans_id = jl->j_trans_id; - - atomic_set(&jl->j_older_commits_done, 0); - jl->j_trans_id = journal->j_trans_id; - jl->j_timestamp = journal->j_trans_start_time; - jl->j_commit_bh = c_bh; - jl->j_start = journal->j_start; - jl->j_len = journal->j_len; - atomic_set(&jl->j_nonzerolen, journal->j_len); - atomic_set(&jl->j_commit_left, journal->j_len + 2); - jl->j_realblock = NULL; - - /* - * The ENTIRE FOR LOOP MUST not cause schedule to occur. - * for each real block, add it to the journal list hash, - * copy into real block index array in the commit or desc block - */ - trans_half = journal_trans_half(sb->s_blocksize); - for (i = 0, cn = journal->j_first; cn; cn = cn->next, i++) { - if (buffer_journaled(cn->bh)) { - jl_cn = get_cnode(sb); - if (!jl_cn) { - reiserfs_panic(sb, "journal-1676", - "get_cnode returned NULL"); - } - if (i == 0) { - jl->j_realblock = jl_cn; - } - jl_cn->prev = last_cn; - jl_cn->next = NULL; - if (last_cn) { - last_cn->next = jl_cn; - } - last_cn = jl_cn; - /* - * make sure the block we are trying to log - * is not a block of journal or reserved area - */ - if (is_block_in_log_or_reserved_area - (sb, cn->bh->b_blocknr)) { - reiserfs_panic(sb, "journal-2332", - "Trying to log block %lu, " - "which is a log block", - cn->bh->b_blocknr); - } - jl_cn->blocknr = cn->bh->b_blocknr; - jl_cn->state = 0; - jl_cn->sb = sb; - jl_cn->bh = cn->bh; - jl_cn->jlist = jl; - insert_journal_hash(journal->j_list_hash_table, jl_cn); - if (i < trans_half) { - desc->j_realblock[i] = - cpu_to_le32(cn->bh->b_blocknr); - } else { - commit->j_realblock[i - trans_half] = - cpu_to_le32(cn->bh->b_blocknr); - } - } else { - i--; - } - } - set_desc_trans_len(desc, journal->j_len); - set_desc_mount_id(desc, journal->j_mount_id); - set_desc_trans_id(desc, journal->j_trans_id); - set_commit_trans_len(commit, journal->j_len); - - /* - * special check in case all buffers in the journal - * were marked for not logging - */ - BUG_ON(journal->j_len == 0); - - /* - * we're about to dirty all the log blocks, mark the description block - * dirty now too. Don't mark the commit block dirty until all the - * others are on disk - */ - mark_buffer_dirty(d_bh); - - /* - * first data block is j_start + 1, so add one to - * cur_write_start wherever you use it - */ - cur_write_start = journal->j_start; - cn = journal->j_first; - jindex = 1; /* start at one so we don't get the desc again */ - while (cn) { - clear_buffer_journal_new(cn->bh); - /* copy all the real blocks into log area. dirty log blocks */ - if (buffer_journaled(cn->bh)) { - struct buffer_head *tmp_bh; - char *addr; - struct page *page; - tmp_bh = - journal_getblk(sb, - SB_ONDISK_JOURNAL_1st_BLOCK(sb) + - ((cur_write_start + - jindex) % - SB_ONDISK_JOURNAL_SIZE(sb))); - set_buffer_uptodate(tmp_bh); - page = cn->bh->b_page; - addr = kmap(page); - memcpy(tmp_bh->b_data, - addr + offset_in_page(cn->bh->b_data), - cn->bh->b_size); - kunmap(page); - mark_buffer_dirty(tmp_bh); - jindex++; - set_buffer_journal_dirty(cn->bh); - clear_buffer_journaled(cn->bh); - } else { - /* - * JDirty cleared sometime during transaction. - * don't log this one - */ - reiserfs_warning(sb, "journal-2048", - "BAD, buffer in journal hash, " - "but not JDirty!"); - brelse(cn->bh); - } - next = cn->next; - free_cnode(sb, cn); - cn = next; - reiserfs_cond_resched(sb); - } - - /* - * we are done with both the c_bh and d_bh, but - * c_bh must be written after all other commit blocks, - * so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1. - */ - - journal->j_current_jl = alloc_journal_list(sb); - - /* now it is safe to insert this transaction on the main list */ - list_add_tail(&jl->j_list, &journal->j_journal_list); - list_add_tail(&jl->j_working_list, &journal->j_working_list); - journal->j_num_work_lists++; - - /* reset journal values for the next transaction */ - journal->j_start = - (journal->j_start + journal->j_len + - 2) % SB_ONDISK_JOURNAL_SIZE(sb); - atomic_set(&journal->j_wcount, 0); - journal->j_bcount = 0; - journal->j_last = NULL; - journal->j_first = NULL; - journal->j_len = 0; - journal->j_trans_start_time = 0; - /* check for trans_id overflow */ - if (++journal->j_trans_id == 0) - journal->j_trans_id = 10; - journal->j_current_jl->j_trans_id = journal->j_trans_id; - journal->j_must_wait = 0; - journal->j_len_alloc = 0; - journal->j_next_full_flush = 0; - journal->j_next_async_flush = 0; - init_journal_hash(sb); - - /* - * make sure reiserfs_add_jh sees the new current_jl before we - * write out the tails - */ - smp_mb(); - - /* - * tail conversion targets have to hit the disk before we end the - * transaction. Otherwise a later transaction might repack the tail - * before this transaction commits, leaving the data block unflushed - * and clean, if we crash before the later transaction commits, the - * data block is lost. - */ - if (!list_empty(&jl->j_tail_bh_list)) { - depth = reiserfs_write_unlock_nested(sb); - write_ordered_buffers(&journal->j_dirty_buffers_lock, - journal, jl, &jl->j_tail_bh_list); - reiserfs_write_lock_nested(sb, depth); - } - BUG_ON(!list_empty(&jl->j_tail_bh_list)); - mutex_unlock(&jl->j_commit_mutex); - - /* - * honor the flush wishes from the caller, simple commits can - * be done outside the journal lock, they are done below - * - * if we don't flush the commit list right now, we put it into - * the work queue so the people waiting on the async progress work - * queue don't wait for this proc to flush journal lists and such. - */ - if (flush) { - flush_commit_list(sb, jl, 1); - flush_journal_list(sb, jl, 1); - } else if (!(jl->j_state & LIST_COMMIT_PENDING)) { - /* - * Avoid queueing work when sb is being shut down. Transaction - * will be flushed on journal shutdown. - */ - if (sb->s_flags & SB_ACTIVE) - queue_delayed_work(REISERFS_SB(sb)->commit_wq, - &journal->j_work, HZ / 10); - } - - /* - * if the next transaction has any chance of wrapping, flush - * transactions that might get overwritten. If any journal lists - * are very old flush them as well. - */ -first_jl: - list_for_each_safe(entry, safe, &journal->j_journal_list) { - temp_jl = JOURNAL_LIST_ENTRY(entry); - if (journal->j_start <= temp_jl->j_start) { - if ((journal->j_start + journal->j_trans_max + 1) >= - temp_jl->j_start) { - flush_used_journal_lists(sb, temp_jl); - goto first_jl; - } else if ((journal->j_start + - journal->j_trans_max + 1) < - SB_ONDISK_JOURNAL_SIZE(sb)) { - /* - * if we don't cross into the next - * transaction and we don't wrap, there is - * no way we can overlap any later transactions - * break now - */ - break; - } - } else if ((journal->j_start + - journal->j_trans_max + 1) > - SB_ONDISK_JOURNAL_SIZE(sb)) { - if (((journal->j_start + journal->j_trans_max + 1) % - SB_ONDISK_JOURNAL_SIZE(sb)) >= - temp_jl->j_start) { - flush_used_journal_lists(sb, temp_jl); - goto first_jl; - } else { - /* - * we don't overlap anything from out start - * to the end of the log, and our wrapped - * portion doesn't overlap anything at - * the start of the log. We can break - */ - break; - } - } - } - - journal->j_current_jl->j_list_bitmap = - get_list_bitmap(sb, journal->j_current_jl); - - if (!(journal->j_current_jl->j_list_bitmap)) { - reiserfs_panic(sb, "journal-1996", - "could not get a list bitmap"); - } - - atomic_set(&journal->j_jlock, 0); - unlock_journal(sb); - /* wake up any body waiting to join. */ - clear_bit(J_WRITERS_QUEUED, &journal->j_state); - wake_up(&journal->j_join_wait); - - if (!flush && wait_on_commit && - journal_list_still_alive(sb, commit_trans_id)) { - flush_commit_list(sb, jl, 1); - } -out: - reiserfs_check_lock_depth(sb, "journal end2"); - - memset(th, 0, sizeof(*th)); - /* - * Re-set th->t_super, so we can properly keep track of how many - * persistent transactions there are. We need to do this so if this - * call is part of a failed restart_transaction, we can free it later - */ - th->t_super = sb; - - return journal->j_errno; -} - -/* Send the file system read only and refuse new transactions */ -void reiserfs_abort_journal(struct super_block *sb, int errno) -{ - struct reiserfs_journal *journal = SB_JOURNAL(sb); - if (test_bit(J_ABORTED, &journal->j_state)) - return; - - if (!journal->j_errno) - journal->j_errno = errno; - - sb->s_flags |= SB_RDONLY; - set_bit(J_ABORTED, &journal->j_state); - -#ifdef CONFIG_REISERFS_CHECK - dump_stack(); -#endif -} diff --git a/fs/reiserfs/lbalance.c b/fs/reiserfs/lbalance.c deleted file mode 100644 index 7f868569d4d0..000000000000 --- a/fs/reiserfs/lbalance.c +++ /dev/null @@ -1,1426 +0,0 @@ -/* - * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README - */ - -#include -#include -#include -#include "reiserfs.h" -#include - -/* - * copy copy_count entries from source directory item to dest buffer - * (creating new item if needed) - */ -static void leaf_copy_dir_entries(struct buffer_info *dest_bi, - struct buffer_head *source, int last_first, - int item_num, int from, int copy_count) -{ - struct buffer_head *dest = dest_bi->bi_bh; - /* - * either the number of target item, or if we must create a - * new item, the number of the item we will create it next to - */ - int item_num_in_dest; - - struct item_head *ih; - struct reiserfs_de_head *deh; - int copy_records_len; /* length of all records in item to be copied */ - char *records; - - ih = item_head(source, item_num); - - RFALSE(!is_direntry_le_ih(ih), "vs-10000: item must be directory item"); - - /* - * length of all record to be copied and first byte of - * the last of them - */ - deh = B_I_DEH(source, ih); - if (copy_count) { - copy_records_len = (from ? deh_location(&deh[from - 1]) : - ih_item_len(ih)) - - deh_location(&deh[from + copy_count - 1]); - records = - source->b_data + ih_location(ih) + - deh_location(&deh[from + copy_count - 1]); - } else { - copy_records_len = 0; - records = NULL; - } - - /* when copy last to first, dest buffer can contain 0 items */ - item_num_in_dest = - (last_first == - LAST_TO_FIRST) ? ((B_NR_ITEMS(dest)) ? 0 : -1) : (B_NR_ITEMS(dest) - - 1); - - /* - * if there are no items in dest or the first/last item in - * dest is not item of the same directory - */ - if ((item_num_in_dest == -1) || - (last_first == FIRST_TO_LAST && le_ih_k_offset(ih) == DOT_OFFSET) || - (last_first == LAST_TO_FIRST - && comp_short_le_keys /*COMP_SHORT_KEYS */ (&ih->ih_key, - leaf_key(dest, - item_num_in_dest)))) - { - /* create new item in dest */ - struct item_head new_ih; - - /* form item header */ - memcpy(&new_ih.ih_key, &ih->ih_key, KEY_SIZE); - put_ih_version(&new_ih, KEY_FORMAT_3_5); - /* calculate item len */ - put_ih_item_len(&new_ih, - DEH_SIZE * copy_count + copy_records_len); - put_ih_entry_count(&new_ih, 0); - - if (last_first == LAST_TO_FIRST) { - /* form key by the following way */ - if (from < ih_entry_count(ih)) { - set_le_ih_k_offset(&new_ih, - deh_offset(&deh[from])); - } else { - /* - * no entries will be copied to this - * item in this function - */ - set_le_ih_k_offset(&new_ih, U32_MAX); - /* - * this item is not yet valid, but we - * want I_IS_DIRECTORY_ITEM to return 1 - * for it, so we -1 - */ - } - set_le_key_k_type(KEY_FORMAT_3_5, &new_ih.ih_key, - TYPE_DIRENTRY); - } - - /* insert item into dest buffer */ - leaf_insert_into_buf(dest_bi, - (last_first == - LAST_TO_FIRST) ? 0 : B_NR_ITEMS(dest), - &new_ih, NULL, 0); - } else { - /* prepare space for entries */ - leaf_paste_in_buffer(dest_bi, - (last_first == - FIRST_TO_LAST) ? (B_NR_ITEMS(dest) - - 1) : 0, MAX_US_INT, - DEH_SIZE * copy_count + copy_records_len, - records, 0); - } - - item_num_in_dest = - (last_first == FIRST_TO_LAST) ? (B_NR_ITEMS(dest) - 1) : 0; - - leaf_paste_entries(dest_bi, item_num_in_dest, - (last_first == - FIRST_TO_LAST) ? ih_entry_count(item_head(dest, - item_num_in_dest)) - : 0, copy_count, deh + from, records, - DEH_SIZE * copy_count + copy_records_len); -} - -/* - * Copy the first (if last_first == FIRST_TO_LAST) or last - * (last_first == LAST_TO_FIRST) item or part of it or nothing - * (see the return 0 below) from SOURCE to the end (if last_first) - * or beginning (!last_first) of the DEST - */ -/* returns 1 if anything was copied, else 0 */ -static int leaf_copy_boundary_item(struct buffer_info *dest_bi, - struct buffer_head *src, int last_first, - int bytes_or_entries) -{ - struct buffer_head *dest = dest_bi->bi_bh; - /* number of items in the source and destination buffers */ - int dest_nr_item, src_nr_item; - struct item_head *ih; - struct item_head *dih; - - dest_nr_item = B_NR_ITEMS(dest); - - /* - * if ( DEST is empty or first item of SOURCE and last item of - * DEST are the items of different objects or of different types ) - * then there is no need to treat this item differently from the - * other items that we copy, so we return - */ - if (last_first == FIRST_TO_LAST) { - ih = item_head(src, 0); - dih = item_head(dest, dest_nr_item - 1); - - /* there is nothing to merge */ - if (!dest_nr_item - || (!op_is_left_mergeable(&ih->ih_key, src->b_size))) - return 0; - - RFALSE(!ih_item_len(ih), - "vs-10010: item can not have empty length"); - - if (is_direntry_le_ih(ih)) { - if (bytes_or_entries == -1) - /* copy all entries to dest */ - bytes_or_entries = ih_entry_count(ih); - leaf_copy_dir_entries(dest_bi, src, FIRST_TO_LAST, 0, 0, - bytes_or_entries); - return 1; - } - - /* - * copy part of the body of the first item of SOURCE - * to the end of the body of the last item of the DEST - * part defined by 'bytes_or_entries'; if bytes_or_entries - * == -1 copy whole body; don't create new item header - */ - if (bytes_or_entries == -1) - bytes_or_entries = ih_item_len(ih); - -#ifdef CONFIG_REISERFS_CHECK - else { - if (bytes_or_entries == ih_item_len(ih) - && is_indirect_le_ih(ih)) - if (get_ih_free_space(ih)) - reiserfs_panic(sb_from_bi(dest_bi), - "vs-10020", - "last unformatted node " - "must be filled " - "entirely (%h)", ih); - } -#endif - - /* - * merge first item (or its part) of src buffer with the last - * item of dest buffer. Both are of the same file - */ - leaf_paste_in_buffer(dest_bi, - dest_nr_item - 1, ih_item_len(dih), - bytes_or_entries, ih_item_body(src, ih), 0); - - if (is_indirect_le_ih(dih)) { - RFALSE(get_ih_free_space(dih), - "vs-10030: merge to left: last unformatted node of non-last indirect item %h must have zerto free space", - ih); - if (bytes_or_entries == ih_item_len(ih)) - set_ih_free_space(dih, get_ih_free_space(ih)); - } - - return 1; - } - - /* copy boundary item to right (last_first == LAST_TO_FIRST) */ - - /* - * (DEST is empty or last item of SOURCE and first item of DEST - * are the items of different object or of different types) - */ - src_nr_item = B_NR_ITEMS(src); - ih = item_head(src, src_nr_item - 1); - dih = item_head(dest, 0); - - if (!dest_nr_item || !op_is_left_mergeable(&dih->ih_key, src->b_size)) - return 0; - - if (is_direntry_le_ih(ih)) { - /* - * bytes_or_entries = entries number in last - * item body of SOURCE - */ - if (bytes_or_entries == -1) - bytes_or_entries = ih_entry_count(ih); - - leaf_copy_dir_entries(dest_bi, src, LAST_TO_FIRST, - src_nr_item - 1, - ih_entry_count(ih) - bytes_or_entries, - bytes_or_entries); - return 1; - } - - /* - * copy part of the body of the last item of SOURCE to the - * begin of the body of the first item of the DEST; part defined - * by 'bytes_or_entries'; if byte_or_entriess == -1 copy whole body; - * change first item key of the DEST; don't create new item header - */ - - RFALSE(is_indirect_le_ih(ih) && get_ih_free_space(ih), - "vs-10040: merge to right: last unformatted node of non-last indirect item must be filled entirely (%h)", - ih); - - if (bytes_or_entries == -1) { - /* bytes_or_entries = length of last item body of SOURCE */ - bytes_or_entries = ih_item_len(ih); - - RFALSE(le_ih_k_offset(dih) != - le_ih_k_offset(ih) + op_bytes_number(ih, src->b_size), - "vs-10050: items %h and %h do not match", ih, dih); - - /* change first item key of the DEST */ - set_le_ih_k_offset(dih, le_ih_k_offset(ih)); - - /* item becomes non-mergeable */ - /* or mergeable if left item was */ - set_le_ih_k_type(dih, le_ih_k_type(ih)); - } else { - /* merge to right only part of item */ - RFALSE(ih_item_len(ih) <= bytes_or_entries, - "vs-10060: no so much bytes %lu (needed %lu)", - (unsigned long)ih_item_len(ih), - (unsigned long)bytes_or_entries); - - /* change first item key of the DEST */ - if (is_direct_le_ih(dih)) { - RFALSE(le_ih_k_offset(dih) <= - (unsigned long)bytes_or_entries, - "vs-10070: dih %h, bytes_or_entries(%d)", dih, - bytes_or_entries); - set_le_ih_k_offset(dih, - le_ih_k_offset(dih) - - bytes_or_entries); - } else { - RFALSE(le_ih_k_offset(dih) <= - (bytes_or_entries / UNFM_P_SIZE) * dest->b_size, - "vs-10080: dih %h, bytes_or_entries(%d)", - dih, - (bytes_or_entries / UNFM_P_SIZE) * dest->b_size); - set_le_ih_k_offset(dih, - le_ih_k_offset(dih) - - ((bytes_or_entries / UNFM_P_SIZE) * - dest->b_size)); - } - } - - leaf_paste_in_buffer(dest_bi, 0, 0, bytes_or_entries, - ih_item_body(src, - ih) + ih_item_len(ih) - bytes_or_entries, - 0); - return 1; -} - -/* - * copy cpy_mun items from buffer src to buffer dest - * last_first == FIRST_TO_LAST means, that we copy cpy_num items beginning - * from first-th item in src to tail of dest - * last_first == LAST_TO_FIRST means, that we copy cpy_num items beginning - * from first-th item in src to head of dest - */ -static void leaf_copy_items_entirely(struct buffer_info *dest_bi, - struct buffer_head *src, int last_first, - int first, int cpy_num) -{ - struct buffer_head *dest; - int nr, free_space; - int dest_before; - int last_loc, last_inserted_loc, location; - int i, j; - struct block_head *blkh; - struct item_head *ih; - - RFALSE(last_first != LAST_TO_FIRST && last_first != FIRST_TO_LAST, - "vs-10090: bad last_first parameter %d", last_first); - RFALSE(B_NR_ITEMS(src) - first < cpy_num, - "vs-10100: too few items in source %d, required %d from %d", - B_NR_ITEMS(src), cpy_num, first); - RFALSE(cpy_num < 0, "vs-10110: can not copy negative amount of items"); - RFALSE(!dest_bi, "vs-10120: can not copy negative amount of items"); - - dest = dest_bi->bi_bh; - - RFALSE(!dest, "vs-10130: can not copy negative amount of items"); - - if (cpy_num == 0) - return; - - blkh = B_BLK_HEAD(dest); - nr = blkh_nr_item(blkh); - free_space = blkh_free_space(blkh); - - /* - * we will insert items before 0-th or nr-th item in dest buffer. - * It depends of last_first parameter - */ - dest_before = (last_first == LAST_TO_FIRST) ? 0 : nr; - - /* location of head of first new item */ - ih = item_head(dest, dest_before); - - RFALSE(blkh_free_space(blkh) < cpy_num * IH_SIZE, - "vs-10140: not enough free space for headers %d (needed %d)", - B_FREE_SPACE(dest), cpy_num * IH_SIZE); - - /* prepare space for headers */ - memmove(ih + cpy_num, ih, (nr - dest_before) * IH_SIZE); - - /* copy item headers */ - memcpy(ih, item_head(src, first), cpy_num * IH_SIZE); - - free_space -= (IH_SIZE * cpy_num); - set_blkh_free_space(blkh, free_space); - - /* location of unmovable item */ - j = location = (dest_before == 0) ? dest->b_size : ih_location(ih - 1); - for (i = dest_before; i < nr + cpy_num; i++) { - location -= ih_item_len(ih + i - dest_before); - put_ih_location(ih + i - dest_before, location); - } - - /* prepare space for items */ - last_loc = ih_location(&ih[nr + cpy_num - 1 - dest_before]); - last_inserted_loc = ih_location(&ih[cpy_num - 1]); - - /* check free space */ - RFALSE(free_space < j - last_inserted_loc, - "vs-10150: not enough free space for items %d (needed %d)", - free_space, j - last_inserted_loc); - - memmove(dest->b_data + last_loc, - dest->b_data + last_loc + j - last_inserted_loc, - last_inserted_loc - last_loc); - - /* copy items */ - memcpy(dest->b_data + last_inserted_loc, - item_body(src, (first + cpy_num - 1)), - j - last_inserted_loc); - - /* sizes, item number */ - set_blkh_nr_item(blkh, nr + cpy_num); - set_blkh_free_space(blkh, free_space - (j - last_inserted_loc)); - - do_balance_mark_leaf_dirty(dest_bi->tb, dest, 0); - - if (dest_bi->bi_parent) { - struct disk_child *t_dc; - t_dc = B_N_CHILD(dest_bi->bi_parent, dest_bi->bi_position); - RFALSE(dc_block_number(t_dc) != dest->b_blocknr, - "vs-10160: block number in bh does not match to field in disk_child structure %lu and %lu", - (long unsigned)dest->b_blocknr, - (long unsigned)dc_block_number(t_dc)); - put_dc_size(t_dc, - dc_size(t_dc) + (j - last_inserted_loc + - IH_SIZE * cpy_num)); - - do_balance_mark_internal_dirty(dest_bi->tb, dest_bi->bi_parent, - 0); - } -} - -/* - * This function splits the (liquid) item into two items (useful when - * shifting part of an item into another node.) - */ -static void leaf_item_bottle(struct buffer_info *dest_bi, - struct buffer_head *src, int last_first, - int item_num, int cpy_bytes) -{ - struct buffer_head *dest = dest_bi->bi_bh; - struct item_head *ih; - - RFALSE(cpy_bytes == -1, - "vs-10170: bytes == - 1 means: do not split item"); - - if (last_first == FIRST_TO_LAST) { - /* - * if ( if item in position item_num in buffer SOURCE - * is directory item ) - */ - ih = item_head(src, item_num); - if (is_direntry_le_ih(ih)) - leaf_copy_dir_entries(dest_bi, src, FIRST_TO_LAST, - item_num, 0, cpy_bytes); - else { - struct item_head n_ih; - - /* - * copy part of the body of the item number 'item_num' - * of SOURCE to the end of the DEST part defined by - * 'cpy_bytes'; create new item header; change old - * item_header (????); n_ih = new item_header; - */ - memcpy(&n_ih, ih, IH_SIZE); - put_ih_item_len(&n_ih, cpy_bytes); - if (is_indirect_le_ih(ih)) { - RFALSE(cpy_bytes == ih_item_len(ih) - && get_ih_free_space(ih), - "vs-10180: when whole indirect item is bottle to left neighbor, it must have free_space==0 (not %lu)", - (long unsigned)get_ih_free_space(ih)); - set_ih_free_space(&n_ih, 0); - } - - RFALSE(op_is_left_mergeable(&ih->ih_key, src->b_size), - "vs-10190: bad mergeability of item %h", ih); - n_ih.ih_version = ih->ih_version; /* JDM Endian safe, both le */ - leaf_insert_into_buf(dest_bi, B_NR_ITEMS(dest), &n_ih, - item_body(src, item_num), 0); - } - } else { - /* - * if ( if item in position item_num in buffer - * SOURCE is directory item ) - */ - ih = item_head(src, item_num); - if (is_direntry_le_ih(ih)) - leaf_copy_dir_entries(dest_bi, src, LAST_TO_FIRST, - item_num, - ih_entry_count(ih) - cpy_bytes, - cpy_bytes); - else { - struct item_head n_ih; - - /* - * copy part of the body of the item number 'item_num' - * of SOURCE to the begin of the DEST part defined by - * 'cpy_bytes'; create new item header; - * n_ih = new item_header; - */ - memcpy(&n_ih.ih_key, &ih->ih_key, KEY_SIZE); - - /* Endian safe, both le */ - n_ih.ih_version = ih->ih_version; - - if (is_direct_le_ih(ih)) { - set_le_ih_k_offset(&n_ih, - le_ih_k_offset(ih) + - ih_item_len(ih) - cpy_bytes); - set_le_ih_k_type(&n_ih, TYPE_DIRECT); - set_ih_free_space(&n_ih, MAX_US_INT); - } else { - /* indirect item */ - RFALSE(!cpy_bytes && get_ih_free_space(ih), - "vs-10200: ih->ih_free_space must be 0 when indirect item will be appended"); - set_le_ih_k_offset(&n_ih, - le_ih_k_offset(ih) + - (ih_item_len(ih) - - cpy_bytes) / UNFM_P_SIZE * - dest->b_size); - set_le_ih_k_type(&n_ih, TYPE_INDIRECT); - set_ih_free_space(&n_ih, get_ih_free_space(ih)); - } - - /* set item length */ - put_ih_item_len(&n_ih, cpy_bytes); - - /* Endian safe, both le */ - n_ih.ih_version = ih->ih_version; - - leaf_insert_into_buf(dest_bi, 0, &n_ih, - item_body(src, item_num) + - ih_item_len(ih) - cpy_bytes, 0); - } - } -} - -/* - * If cpy_bytes equals minus one than copy cpy_num whole items from SOURCE - * to DEST. If cpy_bytes not equal to minus one than copy cpy_num-1 whole - * items from SOURCE to DEST. From last item copy cpy_num bytes for regular - * item and cpy_num directory entries for directory item. - */ -static int leaf_copy_items(struct buffer_info *dest_bi, struct buffer_head *src, - int last_first, int cpy_num, int cpy_bytes) -{ - struct buffer_head *dest; - int pos, i, src_nr_item, bytes; - - dest = dest_bi->bi_bh; - RFALSE(!dest || !src, "vs-10210: !dest || !src"); - RFALSE(last_first != FIRST_TO_LAST && last_first != LAST_TO_FIRST, - "vs-10220:last_first != FIRST_TO_LAST && last_first != LAST_TO_FIRST"); - RFALSE(B_NR_ITEMS(src) < cpy_num, - "vs-10230: No enough items: %d, req. %d", B_NR_ITEMS(src), - cpy_num); - RFALSE(cpy_num < 0, "vs-10240: cpy_num < 0 (%d)", cpy_num); - - if (cpy_num == 0) - return 0; - - if (last_first == FIRST_TO_LAST) { - /* copy items to left */ - pos = 0; - if (cpy_num == 1) - bytes = cpy_bytes; - else - bytes = -1; - - /* - * copy the first item or it part or nothing to the end of - * the DEST (i = leaf_copy_boundary_item(DEST,SOURCE,0,bytes)) - */ - i = leaf_copy_boundary_item(dest_bi, src, FIRST_TO_LAST, bytes); - cpy_num -= i; - if (cpy_num == 0) - return i; - pos += i; - if (cpy_bytes == -1) - /* - * copy first cpy_num items starting from position - * 'pos' of SOURCE to end of DEST - */ - leaf_copy_items_entirely(dest_bi, src, FIRST_TO_LAST, - pos, cpy_num); - else { - /* - * copy first cpy_num-1 items starting from position - * 'pos-1' of the SOURCE to the end of the DEST - */ - leaf_copy_items_entirely(dest_bi, src, FIRST_TO_LAST, - pos, cpy_num - 1); - - /* - * copy part of the item which number is - * cpy_num+pos-1 to the end of the DEST - */ - leaf_item_bottle(dest_bi, src, FIRST_TO_LAST, - cpy_num + pos - 1, cpy_bytes); - } - } else { - /* copy items to right */ - src_nr_item = B_NR_ITEMS(src); - if (cpy_num == 1) - bytes = cpy_bytes; - else - bytes = -1; - - /* - * copy the last item or it part or nothing to the - * begin of the DEST - * (i = leaf_copy_boundary_item(DEST,SOURCE,1,bytes)); - */ - i = leaf_copy_boundary_item(dest_bi, src, LAST_TO_FIRST, bytes); - - cpy_num -= i; - if (cpy_num == 0) - return i; - - pos = src_nr_item - cpy_num - i; - if (cpy_bytes == -1) { - /* - * starting from position 'pos' copy last cpy_num - * items of SOURCE to begin of DEST - */ - leaf_copy_items_entirely(dest_bi, src, LAST_TO_FIRST, - pos, cpy_num); - } else { - /* - * copy last cpy_num-1 items starting from position - * 'pos+1' of the SOURCE to the begin of the DEST; - */ - leaf_copy_items_entirely(dest_bi, src, LAST_TO_FIRST, - pos + 1, cpy_num - 1); - - /* - * copy part of the item which number is pos to - * the begin of the DEST - */ - leaf_item_bottle(dest_bi, src, LAST_TO_FIRST, pos, - cpy_bytes); - } - } - return i; -} - -/* - * there are types of coping: from S[0] to L[0], from S[0] to R[0], - * from R[0] to L[0]. for each of these we have to define parent and - * positions of destination and source buffers - */ -static void leaf_define_dest_src_infos(int shift_mode, struct tree_balance *tb, - struct buffer_info *dest_bi, - struct buffer_info *src_bi, - int *first_last, - struct buffer_head *Snew) -{ - memset(dest_bi, 0, sizeof(struct buffer_info)); - memset(src_bi, 0, sizeof(struct buffer_info)); - - /* define dest, src, dest parent, dest position */ - switch (shift_mode) { - case LEAF_FROM_S_TO_L: /* it is used in leaf_shift_left */ - src_bi->tb = tb; - src_bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path); - src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0); - - /* src->b_item_order */ - src_bi->bi_position = PATH_H_B_ITEM_ORDER(tb->tb_path, 0); - dest_bi->tb = tb; - dest_bi->bi_bh = tb->L[0]; - dest_bi->bi_parent = tb->FL[0]; - dest_bi->bi_position = get_left_neighbor_position(tb, 0); - *first_last = FIRST_TO_LAST; - break; - - case LEAF_FROM_S_TO_R: /* it is used in leaf_shift_right */ - src_bi->tb = tb; - src_bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path); - src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0); - src_bi->bi_position = PATH_H_B_ITEM_ORDER(tb->tb_path, 0); - dest_bi->tb = tb; - dest_bi->bi_bh = tb->R[0]; - dest_bi->bi_parent = tb->FR[0]; - dest_bi->bi_position = get_right_neighbor_position(tb, 0); - *first_last = LAST_TO_FIRST; - break; - - case LEAF_FROM_R_TO_L: /* it is used in balance_leaf_when_delete */ - src_bi->tb = tb; - src_bi->bi_bh = tb->R[0]; - src_bi->bi_parent = tb->FR[0]; - src_bi->bi_position = get_right_neighbor_position(tb, 0); - dest_bi->tb = tb; - dest_bi->bi_bh = tb->L[0]; - dest_bi->bi_parent = tb->FL[0]; - dest_bi->bi_position = get_left_neighbor_position(tb, 0); - *first_last = FIRST_TO_LAST; - break; - - case LEAF_FROM_L_TO_R: /* it is used in balance_leaf_when_delete */ - src_bi->tb = tb; - src_bi->bi_bh = tb->L[0]; - src_bi->bi_parent = tb->FL[0]; - src_bi->bi_position = get_left_neighbor_position(tb, 0); - dest_bi->tb = tb; - dest_bi->bi_bh = tb->R[0]; - dest_bi->bi_parent = tb->FR[0]; - dest_bi->bi_position = get_right_neighbor_position(tb, 0); - *first_last = LAST_TO_FIRST; - break; - - case LEAF_FROM_S_TO_SNEW: - src_bi->tb = tb; - src_bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path); - src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0); - src_bi->bi_position = PATH_H_B_ITEM_ORDER(tb->tb_path, 0); - dest_bi->tb = tb; - dest_bi->bi_bh = Snew; - dest_bi->bi_parent = NULL; - dest_bi->bi_position = 0; - *first_last = LAST_TO_FIRST; - break; - - default: - reiserfs_panic(sb_from_bi(src_bi), "vs-10250", - "shift type is unknown (%d)", shift_mode); - } - RFALSE(!src_bi->bi_bh || !dest_bi->bi_bh, - "vs-10260: mode==%d, source (%p) or dest (%p) buffer is initialized incorrectly", - shift_mode, src_bi->bi_bh, dest_bi->bi_bh); -} - -/* - * copy mov_num items and mov_bytes of the (mov_num-1)th item to - * neighbor. Delete them from source - */ -int leaf_move_items(int shift_mode, struct tree_balance *tb, int mov_num, - int mov_bytes, struct buffer_head *Snew) -{ - int ret_value; - struct buffer_info dest_bi, src_bi; - int first_last; - - leaf_define_dest_src_infos(shift_mode, tb, &dest_bi, &src_bi, - &first_last, Snew); - - ret_value = - leaf_copy_items(&dest_bi, src_bi.bi_bh, first_last, mov_num, - mov_bytes); - - leaf_delete_items(&src_bi, first_last, - (first_last == - FIRST_TO_LAST) ? 0 : (B_NR_ITEMS(src_bi.bi_bh) - - mov_num), mov_num, mov_bytes); - - return ret_value; -} - -/* - * Shift shift_num items (and shift_bytes of last shifted item if - * shift_bytes != -1) from S[0] to L[0] and replace the delimiting key - */ -int leaf_shift_left(struct tree_balance *tb, int shift_num, int shift_bytes) -{ - struct buffer_head *S0 = PATH_PLAST_BUFFER(tb->tb_path); - int i; - - /* - * move shift_num (and shift_bytes bytes) items from S[0] - * to left neighbor L[0] - */ - i = leaf_move_items(LEAF_FROM_S_TO_L, tb, shift_num, shift_bytes, NULL); - - if (shift_num) { - /* number of items in S[0] == 0 */ - if (B_NR_ITEMS(S0) == 0) { - - RFALSE(shift_bytes != -1, - "vs-10270: S0 is empty now, but shift_bytes != -1 (%d)", - shift_bytes); -#ifdef CONFIG_REISERFS_CHECK - if (tb->tb_mode == M_PASTE || tb->tb_mode == M_INSERT) { - print_cur_tb("vs-10275"); - reiserfs_panic(tb->tb_sb, "vs-10275", - "balance condition corrupted " - "(%c)", tb->tb_mode); - } -#endif - - if (PATH_H_POSITION(tb->tb_path, 1) == 0) - replace_key(tb, tb->CFL[0], tb->lkey[0], - PATH_H_PPARENT(tb->tb_path, 0), 0); - - } else { - /* replace lkey in CFL[0] by 0-th key from S[0]; */ - replace_key(tb, tb->CFL[0], tb->lkey[0], S0, 0); - - RFALSE((shift_bytes != -1 && - !(is_direntry_le_ih(item_head(S0, 0)) - && !ih_entry_count(item_head(S0, 0)))) && - (!op_is_left_mergeable - (leaf_key(S0, 0), S0->b_size)), - "vs-10280: item must be mergeable"); - } - } - - return i; -} - -/* CLEANING STOPPED HERE */ - -/* - * Shift shift_num (shift_bytes) items from S[0] to the right neighbor, - * and replace the delimiting key - */ -int leaf_shift_right(struct tree_balance *tb, int shift_num, int shift_bytes) -{ - int ret_value; - - /* - * move shift_num (and shift_bytes) items from S[0] to - * right neighbor R[0] - */ - ret_value = - leaf_move_items(LEAF_FROM_S_TO_R, tb, shift_num, shift_bytes, NULL); - - /* replace rkey in CFR[0] by the 0-th key from R[0] */ - if (shift_num) { - replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0); - - } - - return ret_value; -} - -static void leaf_delete_items_entirely(struct buffer_info *bi, - int first, int del_num); -/* - * If del_bytes == -1, starting from position 'first' delete del_num - * items in whole in buffer CUR. - * If not. - * If last_first == 0. Starting from position 'first' delete del_num-1 - * items in whole. Delete part of body of the first item. Part defined by - * del_bytes. Don't delete first item header - * If last_first == 1. Starting from position 'first+1' delete del_num-1 - * items in whole. Delete part of body of the last item . Part defined by - * del_bytes. Don't delete last item header. -*/ -void leaf_delete_items(struct buffer_info *cur_bi, int last_first, - int first, int del_num, int del_bytes) -{ - struct buffer_head *bh; - int item_amount = B_NR_ITEMS(bh = cur_bi->bi_bh); - - RFALSE(!bh, "10155: bh is not defined"); - RFALSE(del_num < 0, "10160: del_num can not be < 0. del_num==%d", - del_num); - RFALSE(first < 0 - || first + del_num > item_amount, - "10165: invalid number of first item to be deleted (%d) or " - "no so much items (%d) to delete (only %d)", first, - first + del_num, item_amount); - - if (del_num == 0) - return; - - if (first == 0 && del_num == item_amount && del_bytes == -1) { - make_empty_node(cur_bi); - do_balance_mark_leaf_dirty(cur_bi->tb, bh, 0); - return; - } - - if (del_bytes == -1) - /* delete del_num items beginning from item in position first */ - leaf_delete_items_entirely(cur_bi, first, del_num); - else { - if (last_first == FIRST_TO_LAST) { - /* - * delete del_num-1 items beginning from - * item in position first - */ - leaf_delete_items_entirely(cur_bi, first, del_num - 1); - - /* - * delete the part of the first item of the bh - * do not delete item header - */ - leaf_cut_from_buffer(cur_bi, 0, 0, del_bytes); - } else { - struct item_head *ih; - int len; - - /* - * delete del_num-1 items beginning from - * item in position first+1 - */ - leaf_delete_items_entirely(cur_bi, first + 1, - del_num - 1); - - ih = item_head(bh, B_NR_ITEMS(bh) - 1); - if (is_direntry_le_ih(ih)) - /* the last item is directory */ - /* - * len = numbers of directory entries - * in this item - */ - len = ih_entry_count(ih); - else - /* len = body len of item */ - len = ih_item_len(ih); - - /* - * delete the part of the last item of the bh - * do not delete item header - */ - leaf_cut_from_buffer(cur_bi, B_NR_ITEMS(bh) - 1, - len - del_bytes, del_bytes); - } - } -} - -/* insert item into the leaf node in position before */ -void leaf_insert_into_buf(struct buffer_info *bi, int before, - struct item_head * const inserted_item_ih, - const char * const inserted_item_body, - int zeros_number) -{ - struct buffer_head *bh = bi->bi_bh; - int nr, free_space; - struct block_head *blkh; - struct item_head *ih; - int i; - int last_loc, unmoved_loc; - char *to; - - blkh = B_BLK_HEAD(bh); - nr = blkh_nr_item(blkh); - free_space = blkh_free_space(blkh); - - /* check free space */ - RFALSE(free_space < ih_item_len(inserted_item_ih) + IH_SIZE, - "vs-10170: not enough free space in block %z, new item %h", - bh, inserted_item_ih); - RFALSE(zeros_number > ih_item_len(inserted_item_ih), - "vs-10172: zero number == %d, item length == %d", - zeros_number, ih_item_len(inserted_item_ih)); - - /* get item new item must be inserted before */ - ih = item_head(bh, before); - - /* prepare space for the body of new item */ - last_loc = nr ? ih_location(&ih[nr - before - 1]) : bh->b_size; - unmoved_loc = before ? ih_location(ih - 1) : bh->b_size; - - memmove(bh->b_data + last_loc - ih_item_len(inserted_item_ih), - bh->b_data + last_loc, unmoved_loc - last_loc); - - to = bh->b_data + unmoved_loc - ih_item_len(inserted_item_ih); - memset(to, 0, zeros_number); - to += zeros_number; - - /* copy body to prepared space */ - if (inserted_item_body) - memmove(to, inserted_item_body, - ih_item_len(inserted_item_ih) - zeros_number); - else - memset(to, '\0', ih_item_len(inserted_item_ih) - zeros_number); - - /* insert item header */ - memmove(ih + 1, ih, IH_SIZE * (nr - before)); - memmove(ih, inserted_item_ih, IH_SIZE); - - /* change locations */ - for (i = before; i < nr + 1; i++) { - unmoved_loc -= ih_item_len(&ih[i - before]); - put_ih_location(&ih[i - before], unmoved_loc); - } - - /* sizes, free space, item number */ - set_blkh_nr_item(blkh, blkh_nr_item(blkh) + 1); - set_blkh_free_space(blkh, - free_space - (IH_SIZE + - ih_item_len(inserted_item_ih))); - do_balance_mark_leaf_dirty(bi->tb, bh, 1); - - if (bi->bi_parent) { - struct disk_child *t_dc; - t_dc = B_N_CHILD(bi->bi_parent, bi->bi_position); - put_dc_size(t_dc, - dc_size(t_dc) + (IH_SIZE + - ih_item_len(inserted_item_ih))); - do_balance_mark_internal_dirty(bi->tb, bi->bi_parent, 0); - } -} - -/* - * paste paste_size bytes to affected_item_num-th item. - * When item is a directory, this only prepare space for new entries - */ -void leaf_paste_in_buffer(struct buffer_info *bi, int affected_item_num, - int pos_in_item, int paste_size, - const char *body, int zeros_number) -{ - struct buffer_head *bh = bi->bi_bh; - int nr, free_space; - struct block_head *blkh; - struct item_head *ih; - int i; - int last_loc, unmoved_loc; - - blkh = B_BLK_HEAD(bh); - nr = blkh_nr_item(blkh); - free_space = blkh_free_space(blkh); - - /* check free space */ - RFALSE(free_space < paste_size, - "vs-10175: not enough free space: needed %d, available %d", - paste_size, free_space); - -#ifdef CONFIG_REISERFS_CHECK - if (zeros_number > paste_size) { - struct super_block *sb = NULL; - if (bi && bi->tb) - sb = bi->tb->tb_sb; - print_cur_tb("10177"); - reiserfs_panic(sb, "vs-10177", - "zeros_number == %d, paste_size == %d", - zeros_number, paste_size); - } -#endif /* CONFIG_REISERFS_CHECK */ - - /* item to be appended */ - ih = item_head(bh, affected_item_num); - - last_loc = ih_location(&ih[nr - affected_item_num - 1]); - unmoved_loc = affected_item_num ? ih_location(ih - 1) : bh->b_size; - - /* prepare space */ - memmove(bh->b_data + last_loc - paste_size, bh->b_data + last_loc, - unmoved_loc - last_loc); - - /* change locations */ - for (i = affected_item_num; i < nr; i++) - put_ih_location(&ih[i - affected_item_num], - ih_location(&ih[i - affected_item_num]) - - paste_size); - - if (body) { - if (!is_direntry_le_ih(ih)) { - if (!pos_in_item) { - /* shift data to right */ - memmove(bh->b_data + ih_location(ih) + - paste_size, - bh->b_data + ih_location(ih), - ih_item_len(ih)); - /* paste data in the head of item */ - memset(bh->b_data + ih_location(ih), 0, - zeros_number); - memcpy(bh->b_data + ih_location(ih) + - zeros_number, body, - paste_size - zeros_number); - } else { - memset(bh->b_data + unmoved_loc - paste_size, 0, - zeros_number); - memcpy(bh->b_data + unmoved_loc - paste_size + - zeros_number, body, - paste_size - zeros_number); - } - } - } else - memset(bh->b_data + unmoved_loc - paste_size, '\0', paste_size); - - put_ih_item_len(ih, ih_item_len(ih) + paste_size); - - /* change free space */ - set_blkh_free_space(blkh, free_space - paste_size); - - do_balance_mark_leaf_dirty(bi->tb, bh, 0); - - if (bi->bi_parent) { - struct disk_child *t_dc = - B_N_CHILD(bi->bi_parent, bi->bi_position); - put_dc_size(t_dc, dc_size(t_dc) + paste_size); - do_balance_mark_internal_dirty(bi->tb, bi->bi_parent, 0); - } -} - -/* - * cuts DEL_COUNT entries beginning from FROM-th entry. Directory item - * does not have free space, so it moves DEHs and remaining records as - * necessary. Return value is size of removed part of directory item - * in bytes. - */ -static int leaf_cut_entries(struct buffer_head *bh, - struct item_head *ih, int from, int del_count) -{ - char *item; - struct reiserfs_de_head *deh; - int prev_record_offset; /* offset of record, that is (from-1)th */ - char *prev_record; /* */ - int cut_records_len; /* length of all removed records */ - int i; - - /* - * make sure that item is directory and there are enough entries to - * remove - */ - RFALSE(!is_direntry_le_ih(ih), "10180: item is not directory item"); - RFALSE(ih_entry_count(ih) < from + del_count, - "10185: item contains not enough entries: entry_count = %d, from = %d, to delete = %d", - ih_entry_count(ih), from, del_count); - - if (del_count == 0) - return 0; - - /* first byte of item */ - item = bh->b_data + ih_location(ih); - - /* entry head array */ - deh = B_I_DEH(bh, ih); - - /* - * first byte of remaining entries, those are BEFORE cut entries - * (prev_record) and length of all removed records (cut_records_len) - */ - prev_record_offset = - (from ? deh_location(&deh[from - 1]) : ih_item_len(ih)); - cut_records_len = prev_record_offset /*from_record */ - - deh_location(&deh[from + del_count - 1]); - prev_record = item + prev_record_offset; - - /* adjust locations of remaining entries */ - for (i = ih_entry_count(ih) - 1; i > from + del_count - 1; i--) - put_deh_location(&deh[i], - deh_location(&deh[i]) - - (DEH_SIZE * del_count)); - - for (i = 0; i < from; i++) - put_deh_location(&deh[i], - deh_location(&deh[i]) - (DEH_SIZE * del_count + - cut_records_len)); - - put_ih_entry_count(ih, ih_entry_count(ih) - del_count); - - /* shift entry head array and entries those are AFTER removed entries */ - memmove((char *)(deh + from), - deh + from + del_count, - prev_record - cut_records_len - (char *)(deh + from + - del_count)); - - /* shift records, those are BEFORE removed entries */ - memmove(prev_record - cut_records_len - DEH_SIZE * del_count, - prev_record, item + ih_item_len(ih) - prev_record); - - return DEH_SIZE * del_count + cut_records_len; -} - -/* - * when cut item is part of regular file - * pos_in_item - first byte that must be cut - * cut_size - number of bytes to be cut beginning from pos_in_item - * - * when cut item is part of directory - * pos_in_item - number of first deleted entry - * cut_size - count of deleted entries - */ -void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num, - int pos_in_item, int cut_size) -{ - int nr; - struct buffer_head *bh = bi->bi_bh; - struct block_head *blkh; - struct item_head *ih; - int last_loc, unmoved_loc; - int i; - - blkh = B_BLK_HEAD(bh); - nr = blkh_nr_item(blkh); - - /* item head of truncated item */ - ih = item_head(bh, cut_item_num); - - if (is_direntry_le_ih(ih)) { - /* first cut entry () */ - cut_size = leaf_cut_entries(bh, ih, pos_in_item, cut_size); - if (pos_in_item == 0) { - /* change key */ - RFALSE(cut_item_num, - "when 0-th enrty of item is cut, that item must be first in the node, not %d-th", - cut_item_num); - /* change item key by key of first entry in the item */ - set_le_ih_k_offset(ih, deh_offset(B_I_DEH(bh, ih))); - } - } else { - /* item is direct or indirect */ - RFALSE(is_statdata_le_ih(ih), "10195: item is stat data"); - RFALSE(pos_in_item && pos_in_item + cut_size != ih_item_len(ih), - "10200: invalid offset (%lu) or trunc_size (%lu) or ih_item_len (%lu)", - (long unsigned)pos_in_item, (long unsigned)cut_size, - (long unsigned)ih_item_len(ih)); - - /* shift item body to left if cut is from the head of item */ - if (pos_in_item == 0) { - memmove(bh->b_data + ih_location(ih), - bh->b_data + ih_location(ih) + cut_size, - ih_item_len(ih) - cut_size); - - /* change key of item */ - if (is_direct_le_ih(ih)) - set_le_ih_k_offset(ih, - le_ih_k_offset(ih) + - cut_size); - else { - set_le_ih_k_offset(ih, - le_ih_k_offset(ih) + - (cut_size / UNFM_P_SIZE) * - bh->b_size); - RFALSE(ih_item_len(ih) == cut_size - && get_ih_free_space(ih), - "10205: invalid ih_free_space (%h)", ih); - } - } - } - - /* location of the last item */ - last_loc = ih_location(&ih[nr - cut_item_num - 1]); - - /* location of the item, which is remaining at the same place */ - unmoved_loc = cut_item_num ? ih_location(ih - 1) : bh->b_size; - - /* shift */ - memmove(bh->b_data + last_loc + cut_size, bh->b_data + last_loc, - unmoved_loc - last_loc - cut_size); - - /* change item length */ - put_ih_item_len(ih, ih_item_len(ih) - cut_size); - - if (is_indirect_le_ih(ih)) { - if (pos_in_item) - set_ih_free_space(ih, 0); - } - - /* change locations */ - for (i = cut_item_num; i < nr; i++) - put_ih_location(&ih[i - cut_item_num], - ih_location(&ih[i - cut_item_num]) + cut_size); - - /* size, free space */ - set_blkh_free_space(blkh, blkh_free_space(blkh) + cut_size); - - do_balance_mark_leaf_dirty(bi->tb, bh, 0); - - if (bi->bi_parent) { - struct disk_child *t_dc; - t_dc = B_N_CHILD(bi->bi_parent, bi->bi_position); - put_dc_size(t_dc, dc_size(t_dc) - cut_size); - do_balance_mark_internal_dirty(bi->tb, bi->bi_parent, 0); - } -} - -/* delete del_num items from buffer starting from the first'th item */ -static void leaf_delete_items_entirely(struct buffer_info *bi, - int first, int del_num) -{ - struct buffer_head *bh = bi->bi_bh; - int nr; - int i, j; - int last_loc, last_removed_loc; - struct block_head *blkh; - struct item_head *ih; - - RFALSE(bh == NULL, "10210: buffer is 0"); - RFALSE(del_num < 0, "10215: del_num less than 0 (%d)", del_num); - - if (del_num == 0) - return; - - blkh = B_BLK_HEAD(bh); - nr = blkh_nr_item(blkh); - - RFALSE(first < 0 || first + del_num > nr, - "10220: first=%d, number=%d, there is %d items", first, del_num, - nr); - - if (first == 0 && del_num == nr) { - /* this does not work */ - make_empty_node(bi); - - do_balance_mark_leaf_dirty(bi->tb, bh, 0); - return; - } - - ih = item_head(bh, first); - - /* location of unmovable item */ - j = (first == 0) ? bh->b_size : ih_location(ih - 1); - - /* delete items */ - last_loc = ih_location(&ih[nr - 1 - first]); - last_removed_loc = ih_location(&ih[del_num - 1]); - - memmove(bh->b_data + last_loc + j - last_removed_loc, - bh->b_data + last_loc, last_removed_loc - last_loc); - - /* delete item headers */ - memmove(ih, ih + del_num, (nr - first - del_num) * IH_SIZE); - - /* change item location */ - for (i = first; i < nr - del_num; i++) - put_ih_location(&ih[i - first], - ih_location(&ih[i - first]) + (j - - last_removed_loc)); - - /* sizes, item number */ - set_blkh_nr_item(blkh, blkh_nr_item(blkh) - del_num); - set_blkh_free_space(blkh, - blkh_free_space(blkh) + (j - last_removed_loc + - IH_SIZE * del_num)); - - do_balance_mark_leaf_dirty(bi->tb, bh, 0); - - if (bi->bi_parent) { - struct disk_child *t_dc = - B_N_CHILD(bi->bi_parent, bi->bi_position); - put_dc_size(t_dc, - dc_size(t_dc) - (j - last_removed_loc + - IH_SIZE * del_num)); - do_balance_mark_internal_dirty(bi->tb, bi->bi_parent, 0); - } -} - -/* - * paste new_entry_count entries (new_dehs, records) into position - * before to item_num-th item - */ -void leaf_paste_entries(struct buffer_info *bi, - int item_num, - int before, - int new_entry_count, - struct reiserfs_de_head *new_dehs, - const char *records, int paste_size) -{ - struct item_head *ih; - char *item; - struct reiserfs_de_head *deh; - char *insert_point; - int i; - struct buffer_head *bh = bi->bi_bh; - - if (new_entry_count == 0) - return; - - ih = item_head(bh, item_num); - - /* - * make sure, that item is directory, and there are enough - * records in it - */ - RFALSE(!is_direntry_le_ih(ih), "10225: item is not directory item"); - RFALSE(ih_entry_count(ih) < before, - "10230: there are no entry we paste entries before. entry_count = %d, before = %d", - ih_entry_count(ih), before); - - /* first byte of dest item */ - item = bh->b_data + ih_location(ih); - - /* entry head array */ - deh = B_I_DEH(bh, ih); - - /* new records will be pasted at this point */ - insert_point = - item + - (before ? deh_location(&deh[before - 1]) - : (ih_item_len(ih) - paste_size)); - - /* adjust locations of records that will be AFTER new records */ - for (i = ih_entry_count(ih) - 1; i >= before; i--) - put_deh_location(&deh[i], - deh_location(&deh[i]) + - (DEH_SIZE * new_entry_count)); - - /* adjust locations of records that will be BEFORE new records */ - for (i = 0; i < before; i++) - put_deh_location(&deh[i], - deh_location(&deh[i]) + paste_size); - - put_ih_entry_count(ih, ih_entry_count(ih) + new_entry_count); - - /* prepare space for pasted records */ - memmove(insert_point + paste_size, insert_point, - item + (ih_item_len(ih) - paste_size) - insert_point); - - /* copy new records */ - memcpy(insert_point + DEH_SIZE * new_entry_count, records, - paste_size - DEH_SIZE * new_entry_count); - - /* prepare space for new entry heads */ - deh += before; - memmove((char *)(deh + new_entry_count), deh, - insert_point - (char *)deh); - - /* copy new entry heads */ - deh = (struct reiserfs_de_head *)((char *)deh); - memcpy(deh, new_dehs, DEH_SIZE * new_entry_count); - - /* set locations of new records */ - for (i = 0; i < new_entry_count; i++) { - put_deh_location(&deh[i], - deh_location(&deh[i]) + - (-deh_location - (&new_dehs[new_entry_count - 1]) + - insert_point + DEH_SIZE * new_entry_count - - item)); - } - - /* change item key if necessary (when we paste before 0-th entry */ - if (!before) { - set_le_ih_k_offset(ih, deh_offset(new_dehs)); - } -#ifdef CONFIG_REISERFS_CHECK - { - int prev, next; - /* check record locations */ - deh = B_I_DEH(bh, ih); - for (i = 0; i < ih_entry_count(ih); i++) { - next = - (i < - ih_entry_count(ih) - - 1) ? deh_location(&deh[i + 1]) : 0; - prev = (i != 0) ? deh_location(&deh[i - 1]) : 0; - - if (prev && prev <= deh_location(&deh[i])) - reiserfs_error(sb_from_bi(bi), "vs-10240", - "directory item (%h) " - "corrupted (prev %a, " - "cur(%d) %a)", - ih, deh + i - 1, i, deh + i); - if (next && next >= deh_location(&deh[i])) - reiserfs_error(sb_from_bi(bi), "vs-10250", - "directory item (%h) " - "corrupted (cur(%d) %a, " - "next %a)", - ih, i, deh + i, deh + i + 1); - } - } -#endif - -} diff --git a/fs/reiserfs/lock.c b/fs/reiserfs/lock.c deleted file mode 100644 index 46bd7bd63a71..000000000000 --- a/fs/reiserfs/lock.c +++ /dev/null @@ -1,101 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include "reiserfs.h" -#include - -/* - * The previous reiserfs locking scheme was heavily based on - * the tricky properties of the Bkl: - * - * - it was acquired recursively by a same task - * - the performances relied on the release-while-schedule() property - * - * Now that we replace it by a mutex, we still want to keep the same - * recursive property to avoid big changes in the code structure. - * We use our own lock_owner here because the owner field on a mutex - * is only available in SMP or mutex debugging, also we only need this field - * for this mutex, no need for a system wide mutex facility. - * - * Also this lock is often released before a call that could block because - * reiserfs performances were partially based on the release while schedule() - * property of the Bkl. - */ -void reiserfs_write_lock(struct super_block *s) -{ - struct reiserfs_sb_info *sb_i = REISERFS_SB(s); - - if (sb_i->lock_owner != current) { - mutex_lock(&sb_i->lock); - sb_i->lock_owner = current; - } - - /* No need to protect it, only the current task touches it */ - sb_i->lock_depth++; -} - -void reiserfs_write_unlock(struct super_block *s) -{ - struct reiserfs_sb_info *sb_i = REISERFS_SB(s); - - /* - * Are we unlocking without even holding the lock? - * Such a situation must raise a BUG() if we don't want - * to corrupt the data. - */ - BUG_ON(sb_i->lock_owner != current); - - if (--sb_i->lock_depth == -1) { - sb_i->lock_owner = NULL; - mutex_unlock(&sb_i->lock); - } -} - -int __must_check reiserfs_write_unlock_nested(struct super_block *s) -{ - struct reiserfs_sb_info *sb_i = REISERFS_SB(s); - int depth; - - /* this can happen when the lock isn't always held */ - if (sb_i->lock_owner != current) - return -1; - - depth = sb_i->lock_depth; - - sb_i->lock_depth = -1; - sb_i->lock_owner = NULL; - mutex_unlock(&sb_i->lock); - - return depth; -} - -void reiserfs_write_lock_nested(struct super_block *s, int depth) -{ - struct reiserfs_sb_info *sb_i = REISERFS_SB(s); - - /* this can happen when the lock isn't always held */ - if (depth == -1) - return; - - mutex_lock(&sb_i->lock); - sb_i->lock_owner = current; - sb_i->lock_depth = depth; -} - -/* - * Utility function to force a BUG if it is called without the superblock - * write lock held. caller is the string printed just before calling BUG() - */ -void reiserfs_check_lock_depth(struct super_block *sb, char *caller) -{ - struct reiserfs_sb_info *sb_i = REISERFS_SB(sb); - - WARN_ON(sb_i->lock_depth < 0); -} - -#ifdef CONFIG_REISERFS_CHECK -void reiserfs_lock_check_recursive(struct super_block *sb) -{ - struct reiserfs_sb_info *sb_i = REISERFS_SB(sb); - - WARN_ONCE((sb_i->lock_depth > 0), "Unwanted recursive reiserfs lock!\n"); -} -#endif diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c deleted file mode 100644 index 7e7b531fcc49..000000000000 --- a/fs/reiserfs/namei.c +++ /dev/null @@ -1,1725 +0,0 @@ -/* - * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README - * - * Trivial changes by Alan Cox to remove EHASHCOLLISION for compatibility - * - * Trivial Changes: - * Rights granted to Hans Reiser to redistribute under other terms providing - * he accepts all liability including but not limited to patent, fitness - * for purpose, and direct or indirect claims arising from failure to perform. - * - * NO WARRANTY - */ - -#include -#include -#include -#include "reiserfs.h" -#include "acl.h" -#include "xattr.h" -#include - -#define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) set_nlink(i, 1); } -#define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) drop_nlink(i); - -/* - * directory item contains array of entry headers. This performs - * binary search through that array - */ -static int bin_search_in_dir_item(struct reiserfs_dir_entry *de, loff_t off) -{ - struct item_head *ih = de->de_ih; - struct reiserfs_de_head *deh = de->de_deh; - int rbound, lbound, j; - - lbound = 0; - rbound = ih_entry_count(ih) - 1; - - for (j = (rbound + lbound) / 2; lbound <= rbound; - j = (rbound + lbound) / 2) { - if (off < deh_offset(deh + j)) { - rbound = j - 1; - continue; - } - if (off > deh_offset(deh + j)) { - lbound = j + 1; - continue; - } - /* this is not name found, but matched third key component */ - de->de_entry_num = j; - return NAME_FOUND; - } - - de->de_entry_num = lbound; - return NAME_NOT_FOUND; -} - -/* - * comment? maybe something like set de to point to what the path points to? - */ -static inline void set_de_item_location(struct reiserfs_dir_entry *de, - struct treepath *path) -{ - de->de_bh = get_last_bh(path); - de->de_ih = tp_item_head(path); - de->de_deh = B_I_DEH(de->de_bh, de->de_ih); - de->de_item_num = PATH_LAST_POSITION(path); -} - -/* - * de_bh, de_ih, de_deh (points to first element of array), de_item_num is set - */ -inline void set_de_name_and_namelen(struct reiserfs_dir_entry *de) -{ - struct reiserfs_de_head *deh = de->de_deh + de->de_entry_num; - - BUG_ON(de->de_entry_num >= ih_entry_count(de->de_ih)); - - de->de_entrylen = entry_length(de->de_bh, de->de_ih, de->de_entry_num); - de->de_namelen = de->de_entrylen - (de_with_sd(deh) ? SD_SIZE : 0); - de->de_name = ih_item_body(de->de_bh, de->de_ih) + deh_location(deh); - if (de->de_name[de->de_namelen - 1] == 0) - de->de_namelen = strlen(de->de_name); -} - -/* what entry points to */ -static inline void set_de_object_key(struct reiserfs_dir_entry *de) -{ - BUG_ON(de->de_entry_num >= ih_entry_count(de->de_ih)); - de->de_dir_id = deh_dir_id(&de->de_deh[de->de_entry_num]); - de->de_objectid = deh_objectid(&de->de_deh[de->de_entry_num]); -} - -static inline void store_de_entry_key(struct reiserfs_dir_entry *de) -{ - struct reiserfs_de_head *deh = de->de_deh + de->de_entry_num; - - BUG_ON(de->de_entry_num >= ih_entry_count(de->de_ih)); - - /* store key of the found entry */ - de->de_entry_key.version = KEY_FORMAT_3_5; - de->de_entry_key.on_disk_key.k_dir_id = - le32_to_cpu(de->de_ih->ih_key.k_dir_id); - de->de_entry_key.on_disk_key.k_objectid = - le32_to_cpu(de->de_ih->ih_key.k_objectid); - set_cpu_key_k_offset(&de->de_entry_key, deh_offset(deh)); - set_cpu_key_k_type(&de->de_entry_key, TYPE_DIRENTRY); -} - -/* - * We assign a key to each directory item, and place multiple entries in a - * single directory item. A directory item has a key equal to the key of - * the first directory entry in it. - - * This function first calls search_by_key, then, if item whose first entry - * matches is not found it looks for the entry inside directory item found - * by search_by_key. Fills the path to the entry, and to the entry position - * in the item - */ -/* The function is NOT SCHEDULE-SAFE! */ -int search_by_entry_key(struct super_block *sb, const struct cpu_key *key, - struct treepath *path, struct reiserfs_dir_entry *de) -{ - int retval; - - retval = search_item(sb, key, path); - switch (retval) { - case ITEM_NOT_FOUND: - if (!PATH_LAST_POSITION(path)) { - reiserfs_error(sb, "vs-7000", "search_by_key " - "returned item position == 0"); - pathrelse(path); - return IO_ERROR; - } - PATH_LAST_POSITION(path)--; - break; - - case ITEM_FOUND: - break; - - case IO_ERROR: - return retval; - - default: - pathrelse(path); - reiserfs_error(sb, "vs-7002", "no path to here"); - return IO_ERROR; - } - - set_de_item_location(de, path); - -#ifdef CONFIG_REISERFS_CHECK - if (!is_direntry_le_ih(de->de_ih) || - COMP_SHORT_KEYS(&de->de_ih->ih_key, key)) { - print_block(de->de_bh, 0, -1, -1); - reiserfs_panic(sb, "vs-7005", "found item %h is not directory " - "item or does not belong to the same directory " - "as key %K", de->de_ih, key); - } -#endif /* CONFIG_REISERFS_CHECK */ - - /* - * binary search in directory item by third component of the - * key. sets de->de_entry_num of de - */ - retval = bin_search_in_dir_item(de, cpu_key_k_offset(key)); - path->pos_in_item = de->de_entry_num; - if (retval != NAME_NOT_FOUND) { - /* - * ugly, but rename needs de_bh, de_deh, de_name, - * de_namelen, de_objectid set - */ - set_de_name_and_namelen(de); - set_de_object_key(de); - } - return retval; -} - -/* Keyed 32-bit hash function using TEA in a Davis-Meyer function */ - -/* - * The third component is hashed, and you can choose from more than - * one hash function. Per directory hashes are not yet implemented - * but are thought about. This function should be moved to hashes.c - * Jedi, please do so. -Hans - */ -static __u32 get_third_component(struct super_block *s, - const char *name, int len) -{ - __u32 res; - - if (!len || (len == 1 && name[0] == '.')) - return DOT_OFFSET; - if (len == 2 && name[0] == '.' && name[1] == '.') - return DOT_DOT_OFFSET; - - res = REISERFS_SB(s)->s_hash_function(name, len); - - /* take bits from 7-th to 30-th including both bounds */ - res = GET_HASH_VALUE(res); - if (res == 0) - /* - * needed to have no names before "." and ".." those have hash - * value == 0 and generation conters 1 and 2 accordingly - */ - res = 128; - return res + MAX_GENERATION_NUMBER; -} - -static int reiserfs_match(struct reiserfs_dir_entry *de, - const char *name, int namelen) -{ - int retval = NAME_NOT_FOUND; - - if ((namelen == de->de_namelen) && - !memcmp(de->de_name, name, de->de_namelen)) - retval = - (de_visible(de->de_deh + de->de_entry_num) ? NAME_FOUND : - NAME_FOUND_INVISIBLE); - - return retval; -} - -/* de's de_bh, de_ih, de_deh, de_item_num, de_entry_num are set already */ - -/* used when hash collisions exist */ - -static int linear_search_in_dir_item(struct cpu_key *key, - struct reiserfs_dir_entry *de, - const char *name, int namelen) -{ - struct reiserfs_de_head *deh = de->de_deh; - int retval; - int i; - - i = de->de_entry_num; - - if (i == ih_entry_count(de->de_ih) || - GET_HASH_VALUE(deh_offset(deh + i)) != - GET_HASH_VALUE(cpu_key_k_offset(key))) { - i--; - } - - RFALSE(de->de_deh != B_I_DEH(de->de_bh, de->de_ih), - "vs-7010: array of entry headers not found"); - - deh += i; - - for (; i >= 0; i--, deh--) { - /* hash value does not match, no need to check whole name */ - if (GET_HASH_VALUE(deh_offset(deh)) != - GET_HASH_VALUE(cpu_key_k_offset(key))) { - return NAME_NOT_FOUND; - } - - /* mark that this generation number is used */ - if (de->de_gen_number_bit_string) - set_bit(GET_GENERATION_NUMBER(deh_offset(deh)), - de->de_gen_number_bit_string); - - /* calculate pointer to name and namelen */ - de->de_entry_num = i; - set_de_name_and_namelen(de); - - /* - * de's de_name, de_namelen, de_recordlen are set. - * Fill the rest. - */ - if ((retval = - reiserfs_match(de, name, namelen)) != NAME_NOT_FOUND) { - - /* key of pointed object */ - set_de_object_key(de); - - store_de_entry_key(de); - - /* retval can be NAME_FOUND or NAME_FOUND_INVISIBLE */ - return retval; - } - } - - if (GET_GENERATION_NUMBER(le_ih_k_offset(de->de_ih)) == 0) - /* - * we have reached left most entry in the node. In common we - * have to go to the left neighbor, but if generation counter - * is 0 already, we know for sure, that there is no name with - * the same hash value - */ - /* - * FIXME: this work correctly only because hash value can not - * be 0. Btw, in case of Yura's hash it is probably possible, - * so, this is a bug - */ - return NAME_NOT_FOUND; - - RFALSE(de->de_item_num, - "vs-7015: two diritems of the same directory in one node?"); - - return GOTO_PREVIOUS_ITEM; -} - -/* - * may return NAME_FOUND, NAME_FOUND_INVISIBLE, NAME_NOT_FOUND - * FIXME: should add something like IOERROR - */ -static int reiserfs_find_entry(struct inode *dir, const char *name, int namelen, - struct treepath *path_to_entry, - struct reiserfs_dir_entry *de) -{ - struct cpu_key key_to_search; - int retval; - - if (namelen > REISERFS_MAX_NAME(dir->i_sb->s_blocksize)) - return NAME_NOT_FOUND; - - /* we will search for this key in the tree */ - make_cpu_key(&key_to_search, dir, - get_third_component(dir->i_sb, name, namelen), - TYPE_DIRENTRY, 3); - - while (1) { - retval = - search_by_entry_key(dir->i_sb, &key_to_search, - path_to_entry, de); - if (retval == IO_ERROR) { - reiserfs_error(dir->i_sb, "zam-7001", "io error"); - return IO_ERROR; - } - - /* compare names for all entries having given hash value */ - retval = - linear_search_in_dir_item(&key_to_search, de, name, - namelen); - /* - * there is no need to scan directory anymore. - * Given entry found or does not exist - */ - if (retval != GOTO_PREVIOUS_ITEM) { - path_to_entry->pos_in_item = de->de_entry_num; - return retval; - } - - /* - * there is left neighboring item of this directory - * and given entry can be there - */ - set_cpu_key_k_offset(&key_to_search, - le_ih_k_offset(de->de_ih) - 1); - pathrelse(path_to_entry); - - } /* while (1) */ -} - -static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry, - unsigned int flags) -{ - int retval; - struct inode *inode = NULL; - struct reiserfs_dir_entry de; - INITIALIZE_PATH(path_to_entry); - - if (REISERFS_MAX_NAME(dir->i_sb->s_blocksize) < dentry->d_name.len) - return ERR_PTR(-ENAMETOOLONG); - - reiserfs_write_lock(dir->i_sb); - - de.de_gen_number_bit_string = NULL; - retval = - reiserfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, - &path_to_entry, &de); - pathrelse(&path_to_entry); - if (retval == NAME_FOUND) { - inode = reiserfs_iget(dir->i_sb, - (struct cpu_key *)&de.de_dir_id); - if (!inode || IS_ERR(inode)) { - reiserfs_write_unlock(dir->i_sb); - return ERR_PTR(-EACCES); - } - - /* - * Propagate the private flag so we know we're - * in the priv tree. Also clear xattr support - * since we don't have xattrs on xattr files. - */ - if (IS_PRIVATE(dir)) - reiserfs_init_priv_inode(inode); - } - reiserfs_write_unlock(dir->i_sb); - if (retval == IO_ERROR) { - return ERR_PTR(-EIO); - } - - return d_splice_alias(inode, dentry); -} - -/* - * looks up the dentry of the parent directory for child. - * taken from ext2_get_parent - */ -struct dentry *reiserfs_get_parent(struct dentry *child) -{ - int retval; - struct inode *inode = NULL; - struct reiserfs_dir_entry de; - INITIALIZE_PATH(path_to_entry); - struct inode *dir = d_inode(child); - - if (dir->i_nlink == 0) { - return ERR_PTR(-ENOENT); - } - de.de_gen_number_bit_string = NULL; - - reiserfs_write_lock(dir->i_sb); - retval = reiserfs_find_entry(dir, "..", 2, &path_to_entry, &de); - pathrelse(&path_to_entry); - if (retval != NAME_FOUND) { - reiserfs_write_unlock(dir->i_sb); - return ERR_PTR(-ENOENT); - } - inode = reiserfs_iget(dir->i_sb, (struct cpu_key *)&de.de_dir_id); - reiserfs_write_unlock(dir->i_sb); - - return d_obtain_alias(inode); -} - -/* add entry to the directory (entry can be hidden). - -insert definition of when hidden directories are used here -Hans - - Does not mark dir inode dirty, do it after successesfull call to it */ - -static int reiserfs_add_entry(struct reiserfs_transaction_handle *th, - struct inode *dir, const char *name, int namelen, - struct inode *inode, int visible) -{ - struct cpu_key entry_key; - struct reiserfs_de_head *deh; - INITIALIZE_PATH(path); - struct reiserfs_dir_entry de; - DECLARE_BITMAP(bit_string, MAX_GENERATION_NUMBER + 1); - int gen_number; - - /* - * 48 bytes now and we avoid kmalloc if we - * create file with short name - */ - char small_buf[32 + DEH_SIZE]; - - char *buffer; - int buflen, paste_size; - int retval; - - BUG_ON(!th->t_trans_id); - - /* each entry has unique key. compose it */ - make_cpu_key(&entry_key, dir, - get_third_component(dir->i_sb, name, namelen), - TYPE_DIRENTRY, 3); - - /* get memory for composing the entry */ - buflen = DEH_SIZE + ROUND_UP(namelen); - if (buflen > sizeof(small_buf)) { - buffer = kmalloc(buflen, GFP_NOFS); - if (!buffer) - return -ENOMEM; - } else - buffer = small_buf; - - paste_size = - (get_inode_sd_version(dir) == - STAT_DATA_V1) ? (DEH_SIZE + namelen) : buflen; - - /* - * fill buffer : directory entry head, name[, dir objectid | , - * stat data | ,stat data, dir objectid ] - */ - deh = (struct reiserfs_de_head *)buffer; - deh->deh_location = 0; /* JDM Endian safe if 0 */ - put_deh_offset(deh, cpu_key_k_offset(&entry_key)); - deh->deh_state = 0; /* JDM Endian safe if 0 */ - /* put key (ino analog) to de */ - - /* safe: k_dir_id is le */ - deh->deh_dir_id = INODE_PKEY(inode)->k_dir_id; - /* safe: k_objectid is le */ - deh->deh_objectid = INODE_PKEY(inode)->k_objectid; - - /* copy name */ - memcpy((char *)(deh + 1), name, namelen); - /* padd by 0s to the 4 byte boundary */ - padd_item((char *)(deh + 1), ROUND_UP(namelen), namelen); - - /* - * entry is ready to be pasted into tree, set 'visibility' - * and 'stat data in entry' attributes - */ - mark_de_without_sd(deh); - visible ? mark_de_visible(deh) : mark_de_hidden(deh); - - /* find the proper place for the new entry */ - memset(bit_string, 0, sizeof(bit_string)); - de.de_gen_number_bit_string = bit_string; - retval = reiserfs_find_entry(dir, name, namelen, &path, &de); - if (retval != NAME_NOT_FOUND) { - if (buffer != small_buf) - kfree(buffer); - pathrelse(&path); - - if (retval == IO_ERROR) { - return -EIO; - } - - if (retval != NAME_FOUND) { - reiserfs_error(dir->i_sb, "zam-7002", - "reiserfs_find_entry() returned " - "unexpected value (%d)", retval); - } - - return -EEXIST; - } - - gen_number = - find_first_zero_bit(bit_string, - MAX_GENERATION_NUMBER + 1); - if (gen_number > MAX_GENERATION_NUMBER) { - /* there is no free generation number */ - reiserfs_warning(dir->i_sb, "reiserfs-7010", - "Congratulations! we have got hash function " - "screwed up"); - if (buffer != small_buf) - kfree(buffer); - pathrelse(&path); - return -EBUSY; - } - /* adjust offset of directory enrty */ - put_deh_offset(deh, SET_GENERATION_NUMBER(deh_offset(deh), gen_number)); - set_cpu_key_k_offset(&entry_key, deh_offset(deh)); - - /* update max-hash-collisions counter in reiserfs_sb_info */ - PROC_INFO_MAX(th->t_super, max_hash_collisions, gen_number); - - /* we need to re-search for the insertion point */ - if (gen_number != 0) { - if (search_by_entry_key(dir->i_sb, &entry_key, &path, &de) != - NAME_NOT_FOUND) { - reiserfs_warning(dir->i_sb, "vs-7032", - "entry with this key (%K) already " - "exists", &entry_key); - - if (buffer != small_buf) - kfree(buffer); - pathrelse(&path); - return -EBUSY; - } - } - - /* perform the insertion of the entry that we have prepared */ - retval = - reiserfs_paste_into_item(th, &path, &entry_key, dir, buffer, - paste_size); - if (buffer != small_buf) - kfree(buffer); - if (retval) { - reiserfs_check_path(&path); - return retval; - } - - dir->i_size += paste_size; - inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); - if (!S_ISDIR(inode->i_mode) && visible) - /* reiserfs_mkdir or reiserfs_rename will do that by itself */ - reiserfs_update_sd(th, dir); - - reiserfs_check_path(&path); - return 0; -} - -/* - * quota utility function, call if you've had to abort after calling - * new_inode_init, and have not called reiserfs_new_inode yet. - * This should only be called on inodes that do not have stat data - * inserted into the tree yet. - */ -static int drop_new_inode(struct inode *inode) -{ - dquot_drop(inode); - make_bad_inode(inode); - inode->i_flags |= S_NOQUOTA; - iput(inode); - return 0; -} - -/* - * utility function that does setup for reiserfs_new_inode. - * dquot_initialize needs lots of credits so it's better to have it - * outside of a transaction, so we had to pull some bits of - * reiserfs_new_inode out into this func. - */ -static int new_inode_init(struct inode *inode, struct inode *dir, umode_t mode) -{ - /* - * Make inode invalid - just in case we are going to drop it before - * the initialization happens - */ - INODE_PKEY(inode)->k_objectid = 0; - - /* - * the quota init calls have to know who to charge the quota to, so - * we have to set uid and gid here - */ - inode_init_owner(&nop_mnt_idmap, inode, dir, mode); - return dquot_initialize(inode); -} - -static int reiserfs_create(struct mnt_idmap *idmap, struct inode *dir, - struct dentry *dentry, umode_t mode, bool excl) -{ - int retval; - struct inode *inode; - /* - * We need blocks for transaction + (user+group)*(quotas - * for new inode + update of quota for directory owner) - */ - int jbegin_count = - JOURNAL_PER_BALANCE_CNT * 2 + - 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + - REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb)); - struct reiserfs_transaction_handle th; - struct reiserfs_security_handle security; - - retval = dquot_initialize(dir); - if (retval) - return retval; - - if (!(inode = new_inode(dir->i_sb))) { - return -ENOMEM; - } - retval = new_inode_init(inode, dir, mode); - if (retval) { - drop_new_inode(inode); - return retval; - } - - jbegin_count += reiserfs_cache_default_acl(dir); - retval = reiserfs_security_init(dir, inode, &dentry->d_name, &security); - if (retval < 0) { - drop_new_inode(inode); - return retval; - } - jbegin_count += retval; - reiserfs_write_lock(dir->i_sb); - - retval = journal_begin(&th, dir->i_sb, jbegin_count); - if (retval) { - drop_new_inode(inode); - goto out_failed; - } - - retval = - reiserfs_new_inode(&th, dir, mode, NULL, 0 /*i_size */ , dentry, - inode, &security); - if (retval) - goto out_failed; - - inode->i_op = &reiserfs_file_inode_operations; - inode->i_fop = &reiserfs_file_operations; - inode->i_mapping->a_ops = &reiserfs_address_space_operations; - - retval = - reiserfs_add_entry(&th, dir, dentry->d_name.name, - dentry->d_name.len, inode, 1 /*visible */ ); - if (retval) { - int err; - drop_nlink(inode); - reiserfs_update_sd(&th, inode); - err = journal_end(&th); - if (err) - retval = err; - unlock_new_inode(inode); - iput(inode); - goto out_failed; - } - reiserfs_update_inode_transaction(inode); - reiserfs_update_inode_transaction(dir); - - d_instantiate_new(dentry, inode); - retval = journal_end(&th); - -out_failed: - reiserfs_write_unlock(dir->i_sb); - reiserfs_security_free(&security); - return retval; -} - -static int reiserfs_mknod(struct mnt_idmap *idmap, struct inode *dir, - struct dentry *dentry, umode_t mode, dev_t rdev) -{ - int retval; - struct inode *inode; - struct reiserfs_transaction_handle th; - struct reiserfs_security_handle security; - /* - * We need blocks for transaction + (user+group)*(quotas - * for new inode + update of quota for directory owner) - */ - int jbegin_count = - JOURNAL_PER_BALANCE_CNT * 3 + - 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + - REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb)); - - retval = dquot_initialize(dir); - if (retval) - return retval; - - if (!(inode = new_inode(dir->i_sb))) { - return -ENOMEM; - } - retval = new_inode_init(inode, dir, mode); - if (retval) { - drop_new_inode(inode); - return retval; - } - - jbegin_count += reiserfs_cache_default_acl(dir); - retval = reiserfs_security_init(dir, inode, &dentry->d_name, &security); - if (retval < 0) { - drop_new_inode(inode); - return retval; - } - jbegin_count += retval; - reiserfs_write_lock(dir->i_sb); - - retval = journal_begin(&th, dir->i_sb, jbegin_count); - if (retval) { - drop_new_inode(inode); - goto out_failed; - } - - retval = - reiserfs_new_inode(&th, dir, mode, NULL, 0 /*i_size */ , dentry, - inode, &security); - if (retval) { - goto out_failed; - } - - inode->i_op = &reiserfs_special_inode_operations; - init_special_inode(inode, inode->i_mode, rdev); - - /* FIXME: needed for block and char devices only */ - reiserfs_update_sd(&th, inode); - - reiserfs_update_inode_transaction(inode); - reiserfs_update_inode_transaction(dir); - - retval = - reiserfs_add_entry(&th, dir, dentry->d_name.name, - dentry->d_name.len, inode, 1 /*visible */ ); - if (retval) { - int err; - drop_nlink(inode); - reiserfs_update_sd(&th, inode); - err = journal_end(&th); - if (err) - retval = err; - unlock_new_inode(inode); - iput(inode); - goto out_failed; - } - - d_instantiate_new(dentry, inode); - retval = journal_end(&th); - -out_failed: - reiserfs_write_unlock(dir->i_sb); - reiserfs_security_free(&security); - return retval; -} - -static int reiserfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, - struct dentry *dentry, umode_t mode) -{ - int retval; - struct inode *inode; - struct reiserfs_transaction_handle th; - struct reiserfs_security_handle security; - /* - * We need blocks for transaction + (user+group)*(quotas - * for new inode + update of quota for directory owner) - */ - int jbegin_count = - JOURNAL_PER_BALANCE_CNT * 3 + - 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + - REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb)); - - retval = dquot_initialize(dir); - if (retval) - return retval; - -#ifdef DISPLACE_NEW_PACKING_LOCALITIES - /* - * set flag that new packing locality created and new blocks - * for the content of that directory are not displaced yet - */ - REISERFS_I(dir)->new_packing_locality = 1; -#endif - mode = S_IFDIR | mode; - if (!(inode = new_inode(dir->i_sb))) { - return -ENOMEM; - } - retval = new_inode_init(inode, dir, mode); - if (retval) { - drop_new_inode(inode); - return retval; - } - - jbegin_count += reiserfs_cache_default_acl(dir); - retval = reiserfs_security_init(dir, inode, &dentry->d_name, &security); - if (retval < 0) { - drop_new_inode(inode); - return retval; - } - jbegin_count += retval; - reiserfs_write_lock(dir->i_sb); - - retval = journal_begin(&th, dir->i_sb, jbegin_count); - if (retval) { - drop_new_inode(inode); - goto out_failed; - } - - /* - * inc the link count now, so another writer doesn't overflow - * it while we sleep later on. - */ - INC_DIR_INODE_NLINK(dir) - - retval = reiserfs_new_inode(&th, dir, mode, NULL /*symlink */, - old_format_only(dir->i_sb) ? - EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE, - dentry, inode, &security); - if (retval) { - DEC_DIR_INODE_NLINK(dir) - goto out_failed; - } - - reiserfs_update_inode_transaction(inode); - reiserfs_update_inode_transaction(dir); - - inode->i_op = &reiserfs_dir_inode_operations; - inode->i_fop = &reiserfs_dir_operations; - - /* note, _this_ add_entry will not update dir's stat data */ - retval = - reiserfs_add_entry(&th, dir, dentry->d_name.name, - dentry->d_name.len, inode, 1 /*visible */ ); - if (retval) { - int err; - clear_nlink(inode); - DEC_DIR_INODE_NLINK(dir); - reiserfs_update_sd(&th, inode); - err = journal_end(&th); - if (err) - retval = err; - unlock_new_inode(inode); - iput(inode); - goto out_failed; - } - /* the above add_entry did not update dir's stat data */ - reiserfs_update_sd(&th, dir); - - d_instantiate_new(dentry, inode); - retval = journal_end(&th); -out_failed: - reiserfs_write_unlock(dir->i_sb); - reiserfs_security_free(&security); - return retval; -} - -static inline int reiserfs_empty_dir(struct inode *inode) -{ - /* - * we can cheat because an old format dir cannot have - * EMPTY_DIR_SIZE, and a new format dir cannot have - * EMPTY_DIR_SIZE_V1. So, if the inode is either size, - * regardless of disk format version, the directory is empty. - */ - if (inode->i_size != EMPTY_DIR_SIZE && - inode->i_size != EMPTY_DIR_SIZE_V1) { - return 0; - } - return 1; -} - -static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry) -{ - int retval, err; - struct inode *inode; - struct reiserfs_transaction_handle th; - int jbegin_count; - INITIALIZE_PATH(path); - struct reiserfs_dir_entry de; - - /* - * we will be doing 2 balancings and update 2 stat data, we - * change quotas of the owner of the directory and of the owner - * of the parent directory. The quota structure is possibly - * deleted only on last iput => outside of this transaction - */ - jbegin_count = - JOURNAL_PER_BALANCE_CNT * 2 + 2 + - 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); - - retval = dquot_initialize(dir); - if (retval) - return retval; - - reiserfs_write_lock(dir->i_sb); - retval = journal_begin(&th, dir->i_sb, jbegin_count); - if (retval) - goto out_rmdir; - - de.de_gen_number_bit_string = NULL; - if ((retval = - reiserfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, - &path, &de)) == NAME_NOT_FOUND) { - retval = -ENOENT; - goto end_rmdir; - } else if (retval == IO_ERROR) { - retval = -EIO; - goto end_rmdir; - } - - inode = d_inode(dentry); - - reiserfs_update_inode_transaction(inode); - reiserfs_update_inode_transaction(dir); - - if (de.de_objectid != inode->i_ino) { - /* - * FIXME: compare key of an object and a key found in the entry - */ - retval = -EIO; - goto end_rmdir; - } - if (!reiserfs_empty_dir(inode)) { - retval = -ENOTEMPTY; - goto end_rmdir; - } - - /* cut entry from dir directory */ - retval = reiserfs_cut_from_item(&th, &path, &de.de_entry_key, - dir, NULL, /* page */ - 0 /*new file size - not used here */ ); - if (retval < 0) - goto end_rmdir; - - if (inode->i_nlink != 2 && inode->i_nlink != 1) - reiserfs_error(inode->i_sb, "reiserfs-7040", - "empty directory has nlink != 2 (%d)", - inode->i_nlink); - - clear_nlink(inode); - inode_set_mtime_to_ts(dir, - inode_set_ctime_to_ts(dir, inode_set_ctime_current(inode))); - reiserfs_update_sd(&th, inode); - - DEC_DIR_INODE_NLINK(dir) - dir->i_size -= (DEH_SIZE + de.de_entrylen); - reiserfs_update_sd(&th, dir); - - /* prevent empty directory from getting lost */ - add_save_link(&th, inode, 0 /* not truncate */ ); - - retval = journal_end(&th); - reiserfs_check_path(&path); -out_rmdir: - reiserfs_write_unlock(dir->i_sb); - return retval; - -end_rmdir: - /* - * we must release path, because we did not call - * reiserfs_cut_from_item, or reiserfs_cut_from_item does not - * release path if operation was not complete - */ - pathrelse(&path); - err = journal_end(&th); - reiserfs_write_unlock(dir->i_sb); - return err ? err : retval; -} - -static int reiserfs_unlink(struct inode *dir, struct dentry *dentry) -{ - int retval, err; - struct inode *inode; - struct reiserfs_dir_entry de; - INITIALIZE_PATH(path); - struct reiserfs_transaction_handle th; - int jbegin_count; - unsigned long savelink; - - retval = dquot_initialize(dir); - if (retval) - return retval; - - inode = d_inode(dentry); - - /* - * in this transaction we can be doing at max two balancings and - * update two stat datas, we change quotas of the owner of the - * directory and of the owner of the parent directory. The quota - * structure is possibly deleted only on iput => outside of - * this transaction - */ - jbegin_count = - JOURNAL_PER_BALANCE_CNT * 2 + 2 + - 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); - - reiserfs_write_lock(dir->i_sb); - retval = journal_begin(&th, dir->i_sb, jbegin_count); - if (retval) - goto out_unlink; - - de.de_gen_number_bit_string = NULL; - if ((retval = - reiserfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, - &path, &de)) == NAME_NOT_FOUND) { - retval = -ENOENT; - goto end_unlink; - } else if (retval == IO_ERROR) { - retval = -EIO; - goto end_unlink; - } - - reiserfs_update_inode_transaction(inode); - reiserfs_update_inode_transaction(dir); - - if (de.de_objectid != inode->i_ino) { - /* - * FIXME: compare key of an object and a key found in the entry - */ - retval = -EIO; - goto end_unlink; - } - - if (!inode->i_nlink) { - reiserfs_warning(inode->i_sb, "reiserfs-7042", - "deleting nonexistent file (%lu), %d", - inode->i_ino, inode->i_nlink); - set_nlink(inode, 1); - } - - drop_nlink(inode); - - /* - * we schedule before doing the add_save_link call, save the link - * count so we don't race - */ - savelink = inode->i_nlink; - - retval = - reiserfs_cut_from_item(&th, &path, &de.de_entry_key, dir, NULL, - 0); - if (retval < 0) { - inc_nlink(inode); - goto end_unlink; - } - inode_set_ctime_current(inode); - reiserfs_update_sd(&th, inode); - - dir->i_size -= (de.de_entrylen + DEH_SIZE); - inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); - reiserfs_update_sd(&th, dir); - - if (!savelink) - /* prevent file from getting lost */ - add_save_link(&th, inode, 0 /* not truncate */ ); - - retval = journal_end(&th); - reiserfs_check_path(&path); - reiserfs_write_unlock(dir->i_sb); - return retval; - -end_unlink: - pathrelse(&path); - err = journal_end(&th); - reiserfs_check_path(&path); - if (err) - retval = err; -out_unlink: - reiserfs_write_unlock(dir->i_sb); - return retval; -} - -static int reiserfs_symlink(struct mnt_idmap *idmap, - struct inode *parent_dir, struct dentry *dentry, - const char *symname) -{ - int retval; - struct inode *inode; - char *name; - int item_len; - struct reiserfs_transaction_handle th; - struct reiserfs_security_handle security; - int mode = S_IFLNK | S_IRWXUGO; - /* - * We need blocks for transaction + (user+group)*(quotas for - * new inode + update of quota for directory owner) - */ - int jbegin_count = - JOURNAL_PER_BALANCE_CNT * 3 + - 2 * (REISERFS_QUOTA_INIT_BLOCKS(parent_dir->i_sb) + - REISERFS_QUOTA_TRANS_BLOCKS(parent_dir->i_sb)); - - retval = dquot_initialize(parent_dir); - if (retval) - return retval; - - if (!(inode = new_inode(parent_dir->i_sb))) { - return -ENOMEM; - } - retval = new_inode_init(inode, parent_dir, mode); - if (retval) { - drop_new_inode(inode); - return retval; - } - - retval = reiserfs_security_init(parent_dir, inode, &dentry->d_name, - &security); - if (retval < 0) { - drop_new_inode(inode); - return retval; - } - jbegin_count += retval; - - reiserfs_write_lock(parent_dir->i_sb); - item_len = ROUND_UP(strlen(symname)); - if (item_len > MAX_DIRECT_ITEM_LEN(parent_dir->i_sb->s_blocksize)) { - retval = -ENAMETOOLONG; - drop_new_inode(inode); - goto out_failed; - } - - name = kmalloc(item_len, GFP_NOFS); - if (!name) { - drop_new_inode(inode); - retval = -ENOMEM; - goto out_failed; - } - memcpy(name, symname, strlen(symname)); - padd_item(name, item_len, strlen(symname)); - - retval = journal_begin(&th, parent_dir->i_sb, jbegin_count); - if (retval) { - drop_new_inode(inode); - kfree(name); - goto out_failed; - } - - retval = - reiserfs_new_inode(&th, parent_dir, mode, name, strlen(symname), - dentry, inode, &security); - kfree(name); - if (retval) { /* reiserfs_new_inode iputs for us */ - goto out_failed; - } - - reiserfs_update_inode_transaction(inode); - reiserfs_update_inode_transaction(parent_dir); - - inode->i_op = &reiserfs_symlink_inode_operations; - inode_nohighmem(inode); - inode->i_mapping->a_ops = &reiserfs_address_space_operations; - - retval = reiserfs_add_entry(&th, parent_dir, dentry->d_name.name, - dentry->d_name.len, inode, 1 /*visible */ ); - if (retval) { - int err; - drop_nlink(inode); - reiserfs_update_sd(&th, inode); - err = journal_end(&th); - if (err) - retval = err; - unlock_new_inode(inode); - iput(inode); - goto out_failed; - } - - d_instantiate_new(dentry, inode); - retval = journal_end(&th); -out_failed: - reiserfs_write_unlock(parent_dir->i_sb); - reiserfs_security_free(&security); - return retval; -} - -static int reiserfs_link(struct dentry *old_dentry, struct inode *dir, - struct dentry *dentry) -{ - int retval; - struct inode *inode = d_inode(old_dentry); - struct reiserfs_transaction_handle th; - /* - * We need blocks for transaction + update of quotas for - * the owners of the directory - */ - int jbegin_count = - JOURNAL_PER_BALANCE_CNT * 3 + - 2 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); - - retval = dquot_initialize(dir); - if (retval) - return retval; - - reiserfs_write_lock(dir->i_sb); - if (inode->i_nlink >= REISERFS_LINK_MAX) { - /* FIXME: sd_nlink is 32 bit for new files */ - reiserfs_write_unlock(dir->i_sb); - return -EMLINK; - } - - /* inc before scheduling so reiserfs_unlink knows we are here */ - inc_nlink(inode); - - retval = journal_begin(&th, dir->i_sb, jbegin_count); - if (retval) { - drop_nlink(inode); - reiserfs_write_unlock(dir->i_sb); - return retval; - } - - /* create new entry */ - retval = - reiserfs_add_entry(&th, dir, dentry->d_name.name, - dentry->d_name.len, inode, 1 /*visible */ ); - - reiserfs_update_inode_transaction(inode); - reiserfs_update_inode_transaction(dir); - - if (retval) { - int err; - drop_nlink(inode); - err = journal_end(&th); - reiserfs_write_unlock(dir->i_sb); - return err ? err : retval; - } - - inode_set_ctime_current(inode); - reiserfs_update_sd(&th, inode); - - ihold(inode); - d_instantiate(dentry, inode); - retval = journal_end(&th); - reiserfs_write_unlock(dir->i_sb); - return retval; -} - -/* de contains information pointing to an entry which */ -static int de_still_valid(const char *name, int len, - struct reiserfs_dir_entry *de) -{ - struct reiserfs_dir_entry tmp = *de; - - /* recalculate pointer to name and name length */ - set_de_name_and_namelen(&tmp); - /* FIXME: could check more */ - if (tmp.de_namelen != len || memcmp(name, de->de_name, len)) - return 0; - return 1; -} - -static int entry_points_to_object(const char *name, int len, - struct reiserfs_dir_entry *de, - struct inode *inode) -{ - if (!de_still_valid(name, len, de)) - return 0; - - if (inode) { - if (!de_visible(de->de_deh + de->de_entry_num)) - reiserfs_panic(inode->i_sb, "vs-7042", - "entry must be visible"); - return (de->de_objectid == inode->i_ino) ? 1 : 0; - } - - /* this must be added hidden entry */ - if (de_visible(de->de_deh + de->de_entry_num)) - reiserfs_panic(NULL, "vs-7043", "entry must be visible"); - - return 1; -} - -/* sets key of objectid the entry has to point to */ -static void set_ino_in_dir_entry(struct reiserfs_dir_entry *de, - struct reiserfs_key *key) -{ - /* JDM These operations are endian safe - both are le */ - de->de_deh[de->de_entry_num].deh_dir_id = key->k_dir_id; - de->de_deh[de->de_entry_num].deh_objectid = key->k_objectid; -} - -/* - * process, that is going to call fix_nodes/do_balance must hold only - * one path. If it holds 2 or more, it can get into endless waiting in - * get_empty_nodes or its clones - */ -static int reiserfs_rename(struct mnt_idmap *idmap, - struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry, - unsigned int flags) -{ - int retval; - INITIALIZE_PATH(old_entry_path); - INITIALIZE_PATH(new_entry_path); - INITIALIZE_PATH(dot_dot_entry_path); - struct item_head new_entry_ih, old_entry_ih, dot_dot_ih; - struct reiserfs_dir_entry old_de, new_de, dot_dot_de; - struct inode *old_inode, *new_dentry_inode; - struct reiserfs_transaction_handle th; - int jbegin_count; - unsigned long savelink = 1; - bool update_dir_parent = false; - - if (flags & ~RENAME_NOREPLACE) - return -EINVAL; - - /* - * three balancings: (1) old name removal, (2) new name insertion - * and (3) maybe "save" link insertion - * stat data updates: (1) old directory, - * (2) new directory and (3) maybe old object stat data (when it is - * directory) and (4) maybe stat data of object to which new entry - * pointed initially and (5) maybe block containing ".." of - * renamed directory - * quota updates: two parent directories - */ - jbegin_count = - JOURNAL_PER_BALANCE_CNT * 3 + 5 + - 4 * REISERFS_QUOTA_TRANS_BLOCKS(old_dir->i_sb); - - retval = dquot_initialize(old_dir); - if (retval) - return retval; - retval = dquot_initialize(new_dir); - if (retval) - return retval; - - old_inode = d_inode(old_dentry); - new_dentry_inode = d_inode(new_dentry); - - /* - * make sure that oldname still exists and points to an object we - * are going to rename - */ - old_de.de_gen_number_bit_string = NULL; - reiserfs_write_lock(old_dir->i_sb); - retval = - reiserfs_find_entry(old_dir, old_dentry->d_name.name, - old_dentry->d_name.len, &old_entry_path, - &old_de); - pathrelse(&old_entry_path); - if (retval == IO_ERROR) { - reiserfs_write_unlock(old_dir->i_sb); - return -EIO; - } - - if (retval != NAME_FOUND || old_de.de_objectid != old_inode->i_ino) { - reiserfs_write_unlock(old_dir->i_sb); - return -ENOENT; - } - - if (S_ISDIR(old_inode->i_mode)) { - /* - * make sure that directory being renamed has correct ".." - * and that its new parent directory has not too many links - * already - */ - if (new_dentry_inode) { - if (!reiserfs_empty_dir(new_dentry_inode)) { - reiserfs_write_unlock(old_dir->i_sb); - return -ENOTEMPTY; - } - } - - if (old_dir != new_dir) { - /* - * directory is renamed, its parent directory will be - * changed, so find ".." entry - */ - dot_dot_de.de_gen_number_bit_string = NULL; - retval = - reiserfs_find_entry(old_inode, "..", 2, - &dot_dot_entry_path, - &dot_dot_de); - pathrelse(&dot_dot_entry_path); - if (retval != NAME_FOUND) { - reiserfs_write_unlock(old_dir->i_sb); - return -EIO; - } - - /* inode number of .. must equal old_dir->i_ino */ - if (dot_dot_de.de_objectid != old_dir->i_ino) { - reiserfs_write_unlock(old_dir->i_sb); - return -EIO; - } - update_dir_parent = true; - } - } - - retval = journal_begin(&th, old_dir->i_sb, jbegin_count); - if (retval) { - reiserfs_write_unlock(old_dir->i_sb); - return retval; - } - - /* add new entry (or find the existing one) */ - retval = - reiserfs_add_entry(&th, new_dir, new_dentry->d_name.name, - new_dentry->d_name.len, old_inode, 0); - if (retval == -EEXIST) { - if (!new_dentry_inode) { - reiserfs_panic(old_dir->i_sb, "vs-7050", - "new entry is found, new inode == 0"); - } - } else if (retval) { - int err = journal_end(&th); - reiserfs_write_unlock(old_dir->i_sb); - return err ? err : retval; - } - - reiserfs_update_inode_transaction(old_dir); - reiserfs_update_inode_transaction(new_dir); - - /* - * this makes it so an fsync on an open fd for the old name will - * commit the rename operation - */ - reiserfs_update_inode_transaction(old_inode); - - if (new_dentry_inode) - reiserfs_update_inode_transaction(new_dentry_inode); - - while (1) { - /* - * look for old name using corresponding entry key - * (found by reiserfs_find_entry) - */ - if ((retval = - search_by_entry_key(new_dir->i_sb, &old_de.de_entry_key, - &old_entry_path, - &old_de)) != NAME_FOUND) { - pathrelse(&old_entry_path); - journal_end(&th); - reiserfs_write_unlock(old_dir->i_sb); - return -EIO; - } - - copy_item_head(&old_entry_ih, tp_item_head(&old_entry_path)); - - reiserfs_prepare_for_journal(old_inode->i_sb, old_de.de_bh, 1); - - /* look for new name by reiserfs_find_entry */ - new_de.de_gen_number_bit_string = NULL; - retval = - reiserfs_find_entry(new_dir, new_dentry->d_name.name, - new_dentry->d_name.len, &new_entry_path, - &new_de); - /* - * reiserfs_add_entry should not return IO_ERROR, - * because it is called with essentially same parameters from - * reiserfs_add_entry above, and we'll catch any i/o errors - * before we get here. - */ - if (retval != NAME_FOUND_INVISIBLE && retval != NAME_FOUND) { - pathrelse(&new_entry_path); - pathrelse(&old_entry_path); - journal_end(&th); - reiserfs_write_unlock(old_dir->i_sb); - return -EIO; - } - - copy_item_head(&new_entry_ih, tp_item_head(&new_entry_path)); - - reiserfs_prepare_for_journal(old_inode->i_sb, new_de.de_bh, 1); - - if (update_dir_parent) { - if ((retval = - search_by_entry_key(new_dir->i_sb, - &dot_dot_de.de_entry_key, - &dot_dot_entry_path, - &dot_dot_de)) != NAME_FOUND) { - pathrelse(&dot_dot_entry_path); - pathrelse(&new_entry_path); - pathrelse(&old_entry_path); - journal_end(&th); - reiserfs_write_unlock(old_dir->i_sb); - return -EIO; - } - copy_item_head(&dot_dot_ih, - tp_item_head(&dot_dot_entry_path)); - /* node containing ".." gets into transaction */ - reiserfs_prepare_for_journal(old_inode->i_sb, - dot_dot_de.de_bh, 1); - } - /* - * we should check seals here, not do - * this stuff, yes? Then, having - * gathered everything into RAM we - * should lock the buffers, yes? -Hans - */ - /* - * probably. our rename needs to hold more - * than one path at once. The seals would - * have to be written to deal with multi-path - * issues -chris - */ - /* - * sanity checking before doing the rename - avoid races many - * of the above checks could have scheduled. We have to be - * sure our items haven't been shifted by another process. - */ - if (item_moved(&new_entry_ih, &new_entry_path) || - !entry_points_to_object(new_dentry->d_name.name, - new_dentry->d_name.len, - &new_de, new_dentry_inode) || - item_moved(&old_entry_ih, &old_entry_path) || - !entry_points_to_object(old_dentry->d_name.name, - old_dentry->d_name.len, - &old_de, old_inode)) { - reiserfs_restore_prepared_buffer(old_inode->i_sb, - new_de.de_bh); - reiserfs_restore_prepared_buffer(old_inode->i_sb, - old_de.de_bh); - if (update_dir_parent) - reiserfs_restore_prepared_buffer(old_inode-> - i_sb, - dot_dot_de. - de_bh); - continue; - } - if (update_dir_parent) { - if (item_moved(&dot_dot_ih, &dot_dot_entry_path) || - !entry_points_to_object("..", 2, &dot_dot_de, - old_dir)) { - reiserfs_restore_prepared_buffer(old_inode-> - i_sb, - old_de.de_bh); - reiserfs_restore_prepared_buffer(old_inode-> - i_sb, - new_de.de_bh); - reiserfs_restore_prepared_buffer(old_inode-> - i_sb, - dot_dot_de. - de_bh); - continue; - } - } - - RFALSE(update_dir_parent && - !buffer_journal_prepared(dot_dot_de.de_bh), ""); - - break; - } - - /* - * ok, all the changes can be done in one fell swoop when we - * have claimed all the buffers needed. - */ - - mark_de_visible(new_de.de_deh + new_de.de_entry_num); - set_ino_in_dir_entry(&new_de, INODE_PKEY(old_inode)); - journal_mark_dirty(&th, new_de.de_bh); - - mark_de_hidden(old_de.de_deh + old_de.de_entry_num); - journal_mark_dirty(&th, old_de.de_bh); - /* - * thanks to Alex Adriaanse for patch - * which adds ctime update of renamed object - */ - simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry); - - if (new_dentry_inode) { - /* adjust link number of the victim */ - if (S_ISDIR(new_dentry_inode->i_mode)) { - clear_nlink(new_dentry_inode); - } else { - drop_nlink(new_dentry_inode); - } - savelink = new_dentry_inode->i_nlink; - } - - if (update_dir_parent) { - /* adjust ".." of renamed directory */ - set_ino_in_dir_entry(&dot_dot_de, INODE_PKEY(new_dir)); - journal_mark_dirty(&th, dot_dot_de.de_bh); - } - if (S_ISDIR(old_inode->i_mode)) { - /* - * there (in new_dir) was no directory, so it got new link - * (".." of renamed directory) - */ - if (!new_dentry_inode) - INC_DIR_INODE_NLINK(new_dir); - - /* old directory lost one link - ".. " of renamed directory */ - DEC_DIR_INODE_NLINK(old_dir); - } - /* - * looks like in 2.3.99pre3 brelse is atomic. - * so we can use pathrelse - */ - pathrelse(&new_entry_path); - pathrelse(&dot_dot_entry_path); - - /* - * FIXME: this reiserfs_cut_from_item's return value may screw up - * anybody, but it will panic if will not be able to find the - * entry. This needs one more clean up - */ - if (reiserfs_cut_from_item - (&th, &old_entry_path, &old_de.de_entry_key, old_dir, NULL, - 0) < 0) - reiserfs_error(old_dir->i_sb, "vs-7060", - "couldn't not cut old name. Fsck later?"); - - old_dir->i_size -= DEH_SIZE + old_de.de_entrylen; - - reiserfs_update_sd(&th, old_dir); - reiserfs_update_sd(&th, new_dir); - reiserfs_update_sd(&th, old_inode); - - if (new_dentry_inode) { - if (savelink == 0) - add_save_link(&th, new_dentry_inode, - 0 /* not truncate */ ); - reiserfs_update_sd(&th, new_dentry_inode); - } - - retval = journal_end(&th); - reiserfs_write_unlock(old_dir->i_sb); - return retval; -} - -static const struct inode_operations reiserfs_priv_dir_inode_operations = { - .create = reiserfs_create, - .lookup = reiserfs_lookup, - .link = reiserfs_link, - .unlink = reiserfs_unlink, - .symlink = reiserfs_symlink, - .mkdir = reiserfs_mkdir, - .rmdir = reiserfs_rmdir, - .mknod = reiserfs_mknod, - .rename = reiserfs_rename, - .setattr = reiserfs_setattr, - .permission = reiserfs_permission, - .fileattr_get = reiserfs_fileattr_get, - .fileattr_set = reiserfs_fileattr_set, -}; - -static const struct inode_operations reiserfs_priv_symlink_inode_operations = { - .get_link = page_get_link, - .setattr = reiserfs_setattr, - .permission = reiserfs_permission, -}; - -static const struct inode_operations reiserfs_priv_special_inode_operations = { - .setattr = reiserfs_setattr, - .permission = reiserfs_permission, -}; - -void reiserfs_init_priv_inode(struct inode *inode) -{ - inode->i_flags |= S_PRIVATE; - inode->i_opflags &= ~IOP_XATTR; - - if (S_ISREG(inode->i_mode)) - inode->i_op = &reiserfs_priv_file_inode_operations; - else if (S_ISDIR(inode->i_mode)) - inode->i_op = &reiserfs_priv_dir_inode_operations; - else if (S_ISLNK(inode->i_mode)) - inode->i_op = &reiserfs_priv_symlink_inode_operations; - else - inode->i_op = &reiserfs_priv_special_inode_operations; -} - -/* directories can handle most operations... */ -const struct inode_operations reiserfs_dir_inode_operations = { - .create = reiserfs_create, - .lookup = reiserfs_lookup, - .link = reiserfs_link, - .unlink = reiserfs_unlink, - .symlink = reiserfs_symlink, - .mkdir = reiserfs_mkdir, - .rmdir = reiserfs_rmdir, - .mknod = reiserfs_mknod, - .rename = reiserfs_rename, - .setattr = reiserfs_setattr, - .listxattr = reiserfs_listxattr, - .permission = reiserfs_permission, - .get_inode_acl = reiserfs_get_acl, - .set_acl = reiserfs_set_acl, - .fileattr_get = reiserfs_fileattr_get, - .fileattr_set = reiserfs_fileattr_set, -}; - -/* - * symlink operations.. same as page_symlink_inode_operations, with xattr - * stuff added - */ -const struct inode_operations reiserfs_symlink_inode_operations = { - .get_link = page_get_link, - .setattr = reiserfs_setattr, - .listxattr = reiserfs_listxattr, - .permission = reiserfs_permission, -}; - -/* - * special file operations.. just xattr/acl stuff - */ -const struct inode_operations reiserfs_special_inode_operations = { - .setattr = reiserfs_setattr, - .listxattr = reiserfs_listxattr, - .permission = reiserfs_permission, - .get_inode_acl = reiserfs_get_acl, - .set_acl = reiserfs_set_acl, -}; diff --git a/fs/reiserfs/objectid.c b/fs/reiserfs/objectid.c deleted file mode 100644 index 34baf5c0f265..000000000000 --- a/fs/reiserfs/objectid.c +++ /dev/null @@ -1,216 +0,0 @@ -/* - * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README - */ - -#include -#include -#include -#include "reiserfs.h" - -/* find where objectid map starts */ -#define objectid_map(s,rs) (old_format_only (s) ? \ - (__le32 *)((struct reiserfs_super_block_v1 *)(rs) + 1) :\ - (__le32 *)((rs) + 1)) - -#ifdef CONFIG_REISERFS_CHECK - -static void check_objectid_map(struct super_block *s, __le32 * map) -{ - if (le32_to_cpu(map[0]) != 1) - reiserfs_panic(s, "vs-15010", "map corrupted: %lx", - (long unsigned int)le32_to_cpu(map[0])); - - /* FIXME: add something else here */ -} - -#else -static void check_objectid_map(struct super_block *s, __le32 * map) -{; -} -#endif - -/* - * When we allocate objectids we allocate the first unused objectid. - * Each sequence of objectids in use (the odd sequences) is followed - * by a sequence of objectids not in use (the even sequences). We - * only need to record the last objectid in each of these sequences - * (both the odd and even sequences) in order to fully define the - * boundaries of the sequences. A consequence of allocating the first - * objectid not in use is that under most conditions this scheme is - * extremely compact. The exception is immediately after a sequence - * of operations which deletes a large number of objects of - * non-sequential objectids, and even then it will become compact - * again as soon as more objects are created. Note that many - * interesting optimizations of layout could result from complicating - * objectid assignment, but we have deferred making them for now. - */ - -/* get unique object identifier */ -__u32 reiserfs_get_unused_objectid(struct reiserfs_transaction_handle *th) -{ - struct super_block *s = th->t_super; - struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(s); - __le32 *map = objectid_map(s, rs); - __u32 unused_objectid; - - BUG_ON(!th->t_trans_id); - - check_objectid_map(s, map); - - reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); - /* comment needed -Hans */ - unused_objectid = le32_to_cpu(map[1]); - if (unused_objectid == U32_MAX) { - reiserfs_warning(s, "reiserfs-15100", "no more object ids"); - reiserfs_restore_prepared_buffer(s, SB_BUFFER_WITH_SB(s)); - return 0; - } - - /* - * This incrementation allocates the first unused objectid. That - * is to say, the first entry on the objectid map is the first - * unused objectid, and by incrementing it we use it. See below - * where we check to see if we eliminated a sequence of unused - * objectids.... - */ - map[1] = cpu_to_le32(unused_objectid + 1); - - /* - * Now we check to see if we eliminated the last remaining member of - * the first even sequence (and can eliminate the sequence by - * eliminating its last objectid from oids), and can collapse the - * first two odd sequences into one sequence. If so, then the net - * result is to eliminate a pair of objectids from oids. We do this - * by shifting the entire map to the left. - */ - if (sb_oid_cursize(rs) > 2 && map[1] == map[2]) { - memmove(map + 1, map + 3, - (sb_oid_cursize(rs) - 3) * sizeof(__u32)); - set_sb_oid_cursize(rs, sb_oid_cursize(rs) - 2); - } - - journal_mark_dirty(th, SB_BUFFER_WITH_SB(s)); - return unused_objectid; -} - -/* makes object identifier unused */ -void reiserfs_release_objectid(struct reiserfs_transaction_handle *th, - __u32 objectid_to_release) -{ - struct super_block *s = th->t_super; - struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(s); - __le32 *map = objectid_map(s, rs); - int i = 0; - - BUG_ON(!th->t_trans_id); - /*return; */ - check_objectid_map(s, map); - - reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); - journal_mark_dirty(th, SB_BUFFER_WITH_SB(s)); - - /* - * start at the beginning of the objectid map (i = 0) and go to - * the end of it (i = disk_sb->s_oid_cursize). Linear search is - * what we use, though it is possible that binary search would be - * more efficient after performing lots of deletions (which is - * when oids is large.) We only check even i's. - */ - while (i < sb_oid_cursize(rs)) { - if (objectid_to_release == le32_to_cpu(map[i])) { - /* This incrementation unallocates the objectid. */ - le32_add_cpu(&map[i], 1); - - /* - * Did we unallocate the last member of an - * odd sequence, and can shrink oids? - */ - if (map[i] == map[i + 1]) { - /* shrink objectid map */ - memmove(map + i, map + i + 2, - (sb_oid_cursize(rs) - i - - 2) * sizeof(__u32)); - set_sb_oid_cursize(rs, sb_oid_cursize(rs) - 2); - - RFALSE(sb_oid_cursize(rs) < 2 || - sb_oid_cursize(rs) > sb_oid_maxsize(rs), - "vs-15005: objectid map corrupted cur_size == %d (max == %d)", - sb_oid_cursize(rs), sb_oid_maxsize(rs)); - } - return; - } - - if (objectid_to_release > le32_to_cpu(map[i]) && - objectid_to_release < le32_to_cpu(map[i + 1])) { - /* size of objectid map is not changed */ - if (objectid_to_release + 1 == le32_to_cpu(map[i + 1])) { - le32_add_cpu(&map[i + 1], -1); - return; - } - - /* - * JDM comparing two little-endian values for - * equality -- safe - */ - /* - * objectid map must be expanded, but - * there is no space - */ - if (sb_oid_cursize(rs) == sb_oid_maxsize(rs)) { - PROC_INFO_INC(s, leaked_oid); - return; - } - - /* expand the objectid map */ - memmove(map + i + 3, map + i + 1, - (sb_oid_cursize(rs) - i - 1) * sizeof(__u32)); - map[i + 1] = cpu_to_le32(objectid_to_release); - map[i + 2] = cpu_to_le32(objectid_to_release + 1); - set_sb_oid_cursize(rs, sb_oid_cursize(rs) + 2); - return; - } - i += 2; - } - - reiserfs_error(s, "vs-15011", "tried to free free object id (%lu)", - (long unsigned)objectid_to_release); -} - -int reiserfs_convert_objectid_map_v1(struct super_block *s) -{ - struct reiserfs_super_block *disk_sb = SB_DISK_SUPER_BLOCK(s); - int cur_size = sb_oid_cursize(disk_sb); - int new_size = (s->s_blocksize - SB_SIZE) / sizeof(__u32) / 2 * 2; - int old_max = sb_oid_maxsize(disk_sb); - struct reiserfs_super_block_v1 *disk_sb_v1; - __le32 *objectid_map; - int i; - - disk_sb_v1 = - (struct reiserfs_super_block_v1 *)(SB_BUFFER_WITH_SB(s)->b_data); - objectid_map = (__le32 *) (disk_sb_v1 + 1); - - if (cur_size > new_size) { - /* - * mark everyone used that was listed as free at - * the end of the objectid map - */ - objectid_map[new_size - 1] = objectid_map[cur_size - 1]; - set_sb_oid_cursize(disk_sb, new_size); - } - /* move the smaller objectid map past the end of the new super */ - for (i = new_size - 1; i >= 0; i--) { - objectid_map[i + (old_max - new_size)] = objectid_map[i]; - } - - /* set the max size so we don't overflow later */ - set_sb_oid_maxsize(disk_sb, new_size); - - /* Zero out label and generate random UUID */ - memset(disk_sb->s_label, 0, sizeof(disk_sb->s_label)); - generate_random_uuid(disk_sb->s_uuid); - - /* finally, zero out the unused chunk of the new super */ - memset(disk_sb->s_unused, 0, sizeof(disk_sb->s_unused)); - return 0; -} diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c deleted file mode 100644 index 84a194b77f19..000000000000 --- a/fs/reiserfs/prints.c +++ /dev/null @@ -1,792 +0,0 @@ -/* - * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README - */ - -#include -#include -#include "reiserfs.h" -#include -#include - -#include - -static char error_buf[1024]; -static char fmt_buf[1024]; -static char off_buf[80]; - -static char *reiserfs_cpu_offset(struct cpu_key *key) -{ - if (cpu_key_k_type(key) == TYPE_DIRENTRY) - sprintf(off_buf, "%llu(%llu)", - (unsigned long long) - GET_HASH_VALUE(cpu_key_k_offset(key)), - (unsigned long long) - GET_GENERATION_NUMBER(cpu_key_k_offset(key))); - else - sprintf(off_buf, "0x%Lx", - (unsigned long long)cpu_key_k_offset(key)); - return off_buf; -} - -static char *le_offset(struct reiserfs_key *key) -{ - int version; - - version = le_key_version(key); - if (le_key_k_type(version, key) == TYPE_DIRENTRY) - sprintf(off_buf, "%llu(%llu)", - (unsigned long long) - GET_HASH_VALUE(le_key_k_offset(version, key)), - (unsigned long long) - GET_GENERATION_NUMBER(le_key_k_offset(version, key))); - else - sprintf(off_buf, "0x%Lx", - (unsigned long long)le_key_k_offset(version, key)); - return off_buf; -} - -static char *cpu_type(struct cpu_key *key) -{ - if (cpu_key_k_type(key) == TYPE_STAT_DATA) - return "SD"; - if (cpu_key_k_type(key) == TYPE_DIRENTRY) - return "DIR"; - if (cpu_key_k_type(key) == TYPE_DIRECT) - return "DIRECT"; - if (cpu_key_k_type(key) == TYPE_INDIRECT) - return "IND"; - return "UNKNOWN"; -} - -static char *le_type(struct reiserfs_key *key) -{ - int version; - - version = le_key_version(key); - - if (le_key_k_type(version, key) == TYPE_STAT_DATA) - return "SD"; - if (le_key_k_type(version, key) == TYPE_DIRENTRY) - return "DIR"; - if (le_key_k_type(version, key) == TYPE_DIRECT) - return "DIRECT"; - if (le_key_k_type(version, key) == TYPE_INDIRECT) - return "IND"; - return "UNKNOWN"; -} - -/* %k */ -static int scnprintf_le_key(char *buf, size_t size, struct reiserfs_key *key) -{ - if (key) - return scnprintf(buf, size, "[%d %d %s %s]", - le32_to_cpu(key->k_dir_id), - le32_to_cpu(key->k_objectid), le_offset(key), - le_type(key)); - else - return scnprintf(buf, size, "[NULL]"); -} - -/* %K */ -static int scnprintf_cpu_key(char *buf, size_t size, struct cpu_key *key) -{ - if (key) - return scnprintf(buf, size, "[%d %d %s %s]", - key->on_disk_key.k_dir_id, - key->on_disk_key.k_objectid, - reiserfs_cpu_offset(key), cpu_type(key)); - else - return scnprintf(buf, size, "[NULL]"); -} - -static int scnprintf_de_head(char *buf, size_t size, - struct reiserfs_de_head *deh) -{ - if (deh) - return scnprintf(buf, size, - "[offset=%d dir_id=%d objectid=%d location=%d state=%04x]", - deh_offset(deh), deh_dir_id(deh), - deh_objectid(deh), deh_location(deh), - deh_state(deh)); - else - return scnprintf(buf, size, "[NULL]"); - -} - -static int scnprintf_item_head(char *buf, size_t size, struct item_head *ih) -{ - if (ih) { - char *p = buf; - char * const end = buf + size; - - p += scnprintf(p, end - p, "%s", - (ih_version(ih) == KEY_FORMAT_3_6) ? - "*3.6* " : "*3.5*"); - - p += scnprintf_le_key(p, end - p, &ih->ih_key); - - p += scnprintf(p, end - p, - ", item_len %d, item_location %d, free_space(entry_count) %d", - ih_item_len(ih), ih_location(ih), - ih_free_space(ih)); - return p - buf; - } else - return scnprintf(buf, size, "[NULL]"); -} - -static int scnprintf_direntry(char *buf, size_t size, - struct reiserfs_dir_entry *de) -{ - char name[20]; - - memcpy(name, de->de_name, de->de_namelen > 19 ? 19 : de->de_namelen); - name[de->de_namelen > 19 ? 19 : de->de_namelen] = 0; - return scnprintf(buf, size, "\"%s\"==>[%d %d]", - name, de->de_dir_id, de->de_objectid); -} - -static int scnprintf_block_head(char *buf, size_t size, struct buffer_head *bh) -{ - return scnprintf(buf, size, - "level=%d, nr_items=%d, free_space=%d rdkey ", - B_LEVEL(bh), B_NR_ITEMS(bh), B_FREE_SPACE(bh)); -} - -static int scnprintf_buffer_head(char *buf, size_t size, struct buffer_head *bh) -{ - return scnprintf(buf, size, - "dev %pg, size %zd, blocknr %llu, count %d, state 0x%lx, page %p, (%s, %s, %s)", - bh->b_bdev, bh->b_size, - (unsigned long long)bh->b_blocknr, - atomic_read(&(bh->b_count)), - bh->b_state, bh->b_page, - buffer_uptodate(bh) ? "UPTODATE" : "!UPTODATE", - buffer_dirty(bh) ? "DIRTY" : "CLEAN", - buffer_locked(bh) ? "LOCKED" : "UNLOCKED"); -} - -static int scnprintf_disk_child(char *buf, size_t size, struct disk_child *dc) -{ - return scnprintf(buf, size, "[dc_number=%d, dc_size=%u]", - dc_block_number(dc), dc_size(dc)); -} - -static char *is_there_reiserfs_struct(char *fmt, int *what) -{ - char *k = fmt; - - while ((k = strchr(k, '%')) != NULL) { - if (k[1] == 'k' || k[1] == 'K' || k[1] == 'h' || k[1] == 't' || - k[1] == 'z' || k[1] == 'b' || k[1] == 'y' || k[1] == 'a') { - *what = k[1]; - break; - } - k++; - } - return k; -} - -/* - * debugging reiserfs we used to print out a lot of different - * variables, like keys, item headers, buffer heads etc. Values of - * most fields matter. So it took a long time just to write - * appropriative printk. With this reiserfs_warning you can use format - * specification for complex structures like you used to do with - * printfs for integers, doubles and pointers. For instance, to print - * out key structure you have to write just: - * reiserfs_warning ("bad key %k", key); - * instead of - * printk ("bad key %lu %lu %lu %lu", key->k_dir_id, key->k_objectid, - * key->k_offset, key->k_uniqueness); - */ -static DEFINE_SPINLOCK(error_lock); -static void prepare_error_buf(const char *fmt, va_list args) -{ - char *fmt1 = fmt_buf; - char *k; - char *p = error_buf; - char * const end = &error_buf[sizeof(error_buf)]; - int what; - - spin_lock(&error_lock); - - if (WARN_ON(strscpy(fmt_buf, fmt, sizeof(fmt_buf)) < 0)) { - strscpy(error_buf, "format string too long", end - error_buf); - goto out_unlock; - } - - while ((k = is_there_reiserfs_struct(fmt1, &what)) != NULL) { - *k = 0; - - p += vscnprintf(p, end - p, fmt1, args); - - switch (what) { - case 'k': - p += scnprintf_le_key(p, end - p, - va_arg(args, struct reiserfs_key *)); - break; - case 'K': - p += scnprintf_cpu_key(p, end - p, - va_arg(args, struct cpu_key *)); - break; - case 'h': - p += scnprintf_item_head(p, end - p, - va_arg(args, struct item_head *)); - break; - case 't': - p += scnprintf_direntry(p, end - p, - va_arg(args, struct reiserfs_dir_entry *)); - break; - case 'y': - p += scnprintf_disk_child(p, end - p, - va_arg(args, struct disk_child *)); - break; - case 'z': - p += scnprintf_block_head(p, end - p, - va_arg(args, struct buffer_head *)); - break; - case 'b': - p += scnprintf_buffer_head(p, end - p, - va_arg(args, struct buffer_head *)); - break; - case 'a': - p += scnprintf_de_head(p, end - p, - va_arg(args, struct reiserfs_de_head *)); - break; - } - - fmt1 = k + 2; - } - p += vscnprintf(p, end - p, fmt1, args); -out_unlock: - spin_unlock(&error_lock); - -} - -/* - * in addition to usual conversion specifiers this accepts reiserfs - * specific conversion specifiers: - * %k to print little endian key, - * %K to print cpu key, - * %h to print item_head, - * %t to print directory entry - * %z to print block head (arg must be struct buffer_head * - * %b to print buffer_head - */ - -#define do_reiserfs_warning(fmt)\ -{\ - va_list args;\ - va_start( args, fmt );\ - prepare_error_buf( fmt, args );\ - va_end( args );\ -} - -void __reiserfs_warning(struct super_block *sb, const char *id, - const char *function, const char *fmt, ...) -{ - do_reiserfs_warning(fmt); - if (sb) - printk(KERN_WARNING "REISERFS warning (device %s): %s%s%s: " - "%s\n", sb->s_id, id ? id : "", id ? " " : "", - function, error_buf); - else - printk(KERN_WARNING "REISERFS warning: %s%s%s: %s\n", - id ? id : "", id ? " " : "", function, error_buf); -} - -/* No newline.. reiserfs_info calls can be followed by printk's */ -void reiserfs_info(struct super_block *sb, const char *fmt, ...) -{ - do_reiserfs_warning(fmt); - if (sb) - printk(KERN_NOTICE "REISERFS (device %s): %s", - sb->s_id, error_buf); - else - printk(KERN_NOTICE "REISERFS %s:", error_buf); -} - -/* No newline.. reiserfs_printk calls can be followed by printk's */ -static void reiserfs_printk(const char *fmt, ...) -{ - do_reiserfs_warning(fmt); - printk(error_buf); -} - -void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...) -{ -#ifdef CONFIG_REISERFS_CHECK - do_reiserfs_warning(fmt); - if (s) - printk(KERN_DEBUG "REISERFS debug (device %s): %s\n", - s->s_id, error_buf); - else - printk(KERN_DEBUG "REISERFS debug: %s\n", error_buf); -#endif -} - -/* - * The format: - * - * maintainer-errorid: [function-name:] message - * - * where errorid is unique to the maintainer and function-name is - * optional, is recommended, so that anyone can easily find the bug - * with a simple grep for the short to type string - * maintainer-errorid. Don't bother with reusing errorids, there are - * lots of numbers out there. - * - * Example: - * - * reiserfs_panic( - * p_sb, "reiser-29: reiserfs_new_blocknrs: " - * "one of search_start or rn(%d) is equal to MAX_B_NUM," - * "which means that we are optimizing location based on the " - * "bogus location of a temp buffer (%p).", - * rn, bh - * ); - * - * Regular panic()s sometimes clear the screen before the message can - * be read, thus the need for the while loop. - * - * Numbering scheme for panic used by Vladimir and Anatoly( Hans completely - * ignores this scheme, and considers it pointless complexity): - * - * panics in reiserfs_fs.h have numbers from 1000 to 1999 - * super.c 2000 to 2999 - * preserve.c (unused) 3000 to 3999 - * bitmap.c 4000 to 4999 - * stree.c 5000 to 5999 - * prints.c 6000 to 6999 - * namei.c 7000 to 7999 - * fix_nodes.c 8000 to 8999 - * dir.c 9000 to 9999 - * lbalance.c 10000 to 10999 - * ibalance.c 11000 to 11999 not ready - * do_balan.c 12000 to 12999 - * inode.c 13000 to 13999 - * file.c 14000 to 14999 - * objectid.c 15000 - 15999 - * buffer.c 16000 - 16999 - * symlink.c 17000 - 17999 - * - * . */ - -void __reiserfs_panic(struct super_block *sb, const char *id, - const char *function, const char *fmt, ...) -{ - do_reiserfs_warning(fmt); - -#ifdef CONFIG_REISERFS_CHECK - dump_stack(); -#endif - if (sb) - printk(KERN_WARNING "REISERFS panic (device %s): %s%s%s: %s\n", - sb->s_id, id ? id : "", id ? " " : "", - function, error_buf); - else - printk(KERN_WARNING "REISERFS panic: %s%s%s: %s\n", - id ? id : "", id ? " " : "", function, error_buf); - BUG(); -} - -void __reiserfs_error(struct super_block *sb, const char *id, - const char *function, const char *fmt, ...) -{ - do_reiserfs_warning(fmt); - - BUG_ON(sb == NULL); - - if (reiserfs_error_panic(sb)) - __reiserfs_panic(sb, id, function, error_buf); - - if (id && id[0]) - printk(KERN_CRIT "REISERFS error (device %s): %s %s: %s\n", - sb->s_id, id, function, error_buf); - else - printk(KERN_CRIT "REISERFS error (device %s): %s: %s\n", - sb->s_id, function, error_buf); - - if (sb_rdonly(sb)) - return; - - reiserfs_info(sb, "Remounting filesystem read-only\n"); - sb->s_flags |= SB_RDONLY; - reiserfs_abort_journal(sb, -EIO); -} - -void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...) -{ - do_reiserfs_warning(fmt); - - if (reiserfs_error_panic(sb)) { - panic(KERN_CRIT "REISERFS panic (device %s): %s\n", sb->s_id, - error_buf); - } - - if (reiserfs_is_journal_aborted(SB_JOURNAL(sb))) - return; - - printk(KERN_CRIT "REISERFS abort (device %s): %s\n", sb->s_id, - error_buf); - - sb->s_flags |= SB_RDONLY; - reiserfs_abort_journal(sb, errno); -} - -/* - * this prints internal nodes (4 keys/items in line) (dc_number, - * dc_size)[k_dirid, k_objectid, k_offset, k_uniqueness](dc_number, - * dc_size)... - */ -static int print_internal(struct buffer_head *bh, int first, int last) -{ - struct reiserfs_key *key; - struct disk_child *dc; - int i; - int from, to; - - if (!B_IS_KEYS_LEVEL(bh)) - return 1; - - check_internal(bh); - - if (first == -1) { - from = 0; - to = B_NR_ITEMS(bh); - } else { - from = first; - to = min_t(int, last, B_NR_ITEMS(bh)); - } - - reiserfs_printk("INTERNAL NODE (%ld) contains %z\n", bh->b_blocknr, bh); - - dc = B_N_CHILD(bh, from); - reiserfs_printk("PTR %d: %y ", from, dc); - - for (i = from, key = internal_key(bh, from), dc++; i < to; - i++, key++, dc++) { - reiserfs_printk("KEY %d: %k PTR %d: %y ", i, key, i + 1, dc); - if (i && i % 4 == 0) - printk("\n"); - } - printk("\n"); - return 0; -} - -static int print_leaf(struct buffer_head *bh, int print_mode, int first, - int last) -{ - struct block_head *blkh; - struct item_head *ih; - int i, nr; - int from, to; - - if (!B_IS_ITEMS_LEVEL(bh)) - return 1; - - check_leaf(bh); - - blkh = B_BLK_HEAD(bh); - ih = item_head(bh, 0); - nr = blkh_nr_item(blkh); - - printk - ("\n===================================================================\n"); - reiserfs_printk("LEAF NODE (%ld) contains %z\n", bh->b_blocknr, bh); - - if (!(print_mode & PRINT_LEAF_ITEMS)) { - reiserfs_printk("FIRST ITEM_KEY: %k, LAST ITEM KEY: %k\n", - &(ih->ih_key), &((ih + nr - 1)->ih_key)); - return 0; - } - - if (first < 0 || first > nr - 1) - from = 0; - else - from = first; - - if (last < 0 || last > nr) - to = nr; - else - to = last; - - ih += from; - printk - ("-------------------------------------------------------------------------------\n"); - printk - ("|##| type | key | ilen | free_space | version | loc |\n"); - for (i = from; i < to; i++, ih++) { - printk - ("-------------------------------------------------------------------------------\n"); - reiserfs_printk("|%2d| %h |\n", i, ih); - if (print_mode & PRINT_LEAF_ITEMS) - op_print_item(ih, ih_item_body(bh, ih)); - } - - printk - ("===================================================================\n"); - - return 0; -} - -char *reiserfs_hashname(int code) -{ - if (code == YURA_HASH) - return "rupasov"; - if (code == TEA_HASH) - return "tea"; - if (code == R5_HASH) - return "r5"; - - return "unknown"; -} - -/* return 1 if this is not super block */ -static int print_super_block(struct buffer_head *bh) -{ - struct reiserfs_super_block *rs = - (struct reiserfs_super_block *)(bh->b_data); - int skipped, data_blocks; - char *version; - - if (is_reiserfs_3_5(rs)) { - version = "3.5"; - } else if (is_reiserfs_3_6(rs)) { - version = "3.6"; - } else if (is_reiserfs_jr(rs)) { - version = ((sb_version(rs) == REISERFS_VERSION_2) ? - "3.6" : "3.5"); - } else { - return 1; - } - - printk("%pg\'s super block is in block %llu\n", bh->b_bdev, - (unsigned long long)bh->b_blocknr); - printk("Reiserfs version %s\n", version); - printk("Block count %u\n", sb_block_count(rs)); - printk("Blocksize %d\n", sb_blocksize(rs)); - printk("Free blocks %u\n", sb_free_blocks(rs)); - /* - * FIXME: this would be confusing if - * someone stores reiserfs super block in some data block ;) -// skipped = (bh->b_blocknr * bh->b_size) / sb_blocksize(rs); - */ - skipped = bh->b_blocknr; - data_blocks = sb_block_count(rs) - skipped - 1 - sb_bmap_nr(rs) - - (!is_reiserfs_jr(rs) ? sb_jp_journal_size(rs) + - 1 : sb_reserved_for_journal(rs)) - sb_free_blocks(rs); - printk - ("Busy blocks (skipped %d, bitmaps - %d, journal (or reserved) blocks - %d\n" - "1 super block, %d data blocks\n", skipped, sb_bmap_nr(rs), - (!is_reiserfs_jr(rs) ? (sb_jp_journal_size(rs) + 1) : - sb_reserved_for_journal(rs)), data_blocks); - printk("Root block %u\n", sb_root_block(rs)); - printk("Journal block (first) %d\n", sb_jp_journal_1st_block(rs)); - printk("Journal dev %d\n", sb_jp_journal_dev(rs)); - printk("Journal orig size %d\n", sb_jp_journal_size(rs)); - printk("FS state %d\n", sb_fs_state(rs)); - printk("Hash function \"%s\"\n", - reiserfs_hashname(sb_hash_function_code(rs))); - - printk("Tree height %d\n", sb_tree_height(rs)); - return 0; -} - -static int print_desc_block(struct buffer_head *bh) -{ - struct reiserfs_journal_desc *desc; - - if (memcmp(get_journal_desc_magic(bh), JOURNAL_DESC_MAGIC, 8)) - return 1; - - desc = (struct reiserfs_journal_desc *)(bh->b_data); - printk("Desc block %llu (j_trans_id %d, j_mount_id %d, j_len %d)", - (unsigned long long)bh->b_blocknr, get_desc_trans_id(desc), - get_desc_mount_id(desc), get_desc_trans_len(desc)); - - return 0; -} -/* ..., int print_mode, int first, int last) */ -void print_block(struct buffer_head *bh, ...) -{ - va_list args; - int mode, first, last; - - if (!bh) { - printk("print_block: buffer is NULL\n"); - return; - } - - va_start(args, bh); - - mode = va_arg(args, int); - first = va_arg(args, int); - last = va_arg(args, int); - if (print_leaf(bh, mode, first, last)) - if (print_internal(bh, first, last)) - if (print_super_block(bh)) - if (print_desc_block(bh)) - printk - ("Block %llu contains unformatted data\n", - (unsigned long long)bh->b_blocknr); - - va_end(args); -} - -static char print_tb_buf[2048]; - -/* this stores initial state of tree balance in the print_tb_buf */ -void store_print_tb(struct tree_balance *tb) -{ - int h = 0; - int i; - struct buffer_head *tbSh, *tbFh; - - if (!tb) - return; - - sprintf(print_tb_buf, "\n" - "BALANCING %d\n" - "MODE=%c, ITEM_POS=%d POS_IN_ITEM=%d\n" - "=====================================================================\n" - "* h * S * L * R * F * FL * FR * CFL * CFR *\n", - REISERFS_SB(tb->tb_sb)->s_do_balance, - tb->tb_mode, PATH_LAST_POSITION(tb->tb_path), - tb->tb_path->pos_in_item); - - for (h = 0; h < ARRAY_SIZE(tb->insert_size); h++) { - if (PATH_H_PATH_OFFSET(tb->tb_path, h) <= - tb->tb_path->path_length - && PATH_H_PATH_OFFSET(tb->tb_path, - h) > ILLEGAL_PATH_ELEMENT_OFFSET) { - tbSh = PATH_H_PBUFFER(tb->tb_path, h); - tbFh = PATH_H_PPARENT(tb->tb_path, h); - } else { - tbSh = NULL; - tbFh = NULL; - } - sprintf(print_tb_buf + strlen(print_tb_buf), - "* %d * %3lld(%2d) * %3lld(%2d) * %3lld(%2d) * %5lld * %5lld * %5lld * %5lld * %5lld *\n", - h, - (tbSh) ? (long long)(tbSh->b_blocknr) : (-1LL), - (tbSh) ? atomic_read(&tbSh->b_count) : -1, - (tb->L[h]) ? (long long)(tb->L[h]->b_blocknr) : (-1LL), - (tb->L[h]) ? atomic_read(&tb->L[h]->b_count) : -1, - (tb->R[h]) ? (long long)(tb->R[h]->b_blocknr) : (-1LL), - (tb->R[h]) ? atomic_read(&tb->R[h]->b_count) : -1, - (tbFh) ? (long long)(tbFh->b_blocknr) : (-1LL), - (tb->FL[h]) ? (long long)(tb->FL[h]-> - b_blocknr) : (-1LL), - (tb->FR[h]) ? (long long)(tb->FR[h]-> - b_blocknr) : (-1LL), - (tb->CFL[h]) ? (long long)(tb->CFL[h]-> - b_blocknr) : (-1LL), - (tb->CFR[h]) ? (long long)(tb->CFR[h]-> - b_blocknr) : (-1LL)); - } - - sprintf(print_tb_buf + strlen(print_tb_buf), - "=====================================================================\n" - "* h * size * ln * lb * rn * rb * blkn * s0 * s1 * s1b * s2 * s2b * curb * lk * rk *\n" - "* 0 * %4d * %2d * %2d * %2d * %2d * %4d * %2d * %2d * %3d * %2d * %3d * %4d * %2d * %2d *\n", - tb->insert_size[0], tb->lnum[0], tb->lbytes, tb->rnum[0], - tb->rbytes, tb->blknum[0], tb->s0num, tb->snum[0], - tb->sbytes[0], tb->snum[1], tb->sbytes[1], - tb->cur_blknum, tb->lkey[0], tb->rkey[0]); - - /* this prints balance parameters for non-leaf levels */ - h = 0; - do { - h++; - sprintf(print_tb_buf + strlen(print_tb_buf), - "* %d * %4d * %2d * * %2d * * %2d *\n", - h, tb->insert_size[h], tb->lnum[h], tb->rnum[h], - tb->blknum[h]); - } while (tb->insert_size[h]); - - sprintf(print_tb_buf + strlen(print_tb_buf), - "=====================================================================\n" - "FEB list: "); - - /* print FEB list (list of buffers in form (bh (b_blocknr, b_count), that will be used for new nodes) */ - h = 0; - for (i = 0; i < ARRAY_SIZE(tb->FEB); i++) - sprintf(print_tb_buf + strlen(print_tb_buf), - "%p (%llu %d)%s", tb->FEB[i], - tb->FEB[i] ? (unsigned long long)tb->FEB[i]-> - b_blocknr : 0ULL, - tb->FEB[i] ? atomic_read(&tb->FEB[i]->b_count) : 0, - (i == ARRAY_SIZE(tb->FEB) - 1) ? "\n" : ", "); - - sprintf(print_tb_buf + strlen(print_tb_buf), - "======================== the end ====================================\n"); -} - -void print_cur_tb(char *mes) -{ - printk("%s\n%s", mes, print_tb_buf); -} - -static void check_leaf_block_head(struct buffer_head *bh) -{ - struct block_head *blkh; - int nr; - - blkh = B_BLK_HEAD(bh); - nr = blkh_nr_item(blkh); - if (nr > (bh->b_size - BLKH_SIZE) / IH_SIZE) - reiserfs_panic(NULL, "vs-6010", "invalid item number %z", - bh); - if (blkh_free_space(blkh) > bh->b_size - BLKH_SIZE - IH_SIZE * nr) - reiserfs_panic(NULL, "vs-6020", "invalid free space %z", - bh); - -} - -static void check_internal_block_head(struct buffer_head *bh) -{ - if (!(B_LEVEL(bh) > DISK_LEAF_NODE_LEVEL && B_LEVEL(bh) <= MAX_HEIGHT)) - reiserfs_panic(NULL, "vs-6025", "invalid level %z", bh); - - if (B_NR_ITEMS(bh) > (bh->b_size - BLKH_SIZE) / IH_SIZE) - reiserfs_panic(NULL, "vs-6030", "invalid item number %z", bh); - - if (B_FREE_SPACE(bh) != - bh->b_size - BLKH_SIZE - KEY_SIZE * B_NR_ITEMS(bh) - - DC_SIZE * (B_NR_ITEMS(bh) + 1)) - reiserfs_panic(NULL, "vs-6040", "invalid free space %z", bh); - -} - -void check_leaf(struct buffer_head *bh) -{ - int i; - struct item_head *ih; - - if (!bh) - return; - check_leaf_block_head(bh); - for (i = 0, ih = item_head(bh, 0); i < B_NR_ITEMS(bh); i++, ih++) - op_check_item(ih, ih_item_body(bh, ih)); -} - -void check_internal(struct buffer_head *bh) -{ - if (!bh) - return; - check_internal_block_head(bh); -} - -void print_statistics(struct super_block *s) -{ - - /* - printk ("reiserfs_put_super: session statistics: balances %d, fix_nodes %d, \ - bmap with search %d, without %d, dir2ind %d, ind2dir %d\n", - REISERFS_SB(s)->s_do_balance, REISERFS_SB(s)->s_fix_nodes, - REISERFS_SB(s)->s_bmaps, REISERFS_SB(s)->s_bmaps_without_search, - REISERFS_SB(s)->s_direct2indirect, REISERFS_SB(s)->s_indirect2direct); - */ - -} diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c deleted file mode 100644 index 5c68a4a52d78..000000000000 --- a/fs/reiserfs/procfs.c +++ /dev/null @@ -1,490 +0,0 @@ -/* -*- linux-c -*- */ - -/* fs/reiserfs/procfs.c */ - -/* - * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README - */ - -/* proc info support a la one created by Sizif@Botik.RU for PGC */ - -#include -#include -#include -#include -#include "reiserfs.h" -#include -#include -#include - -/* - * LOCKING: - * - * These guys are evicted from procfs as the very first step in ->kill_sb(). - * - */ - -static int show_version(struct seq_file *m, void *unused) -{ - struct super_block *sb = m->private; - char *format; - - if (REISERFS_SB(sb)->s_properties & (1 << REISERFS_3_6)) { - format = "3.6"; - } else if (REISERFS_SB(sb)->s_properties & (1 << REISERFS_3_5)) { - format = "3.5"; - } else { - format = "unknown"; - } - - seq_printf(m, "%s format\twith checks %s\n", format, -#if defined( CONFIG_REISERFS_CHECK ) - "on" -#else - "off" -#endif - ); - return 0; -} - -#define SF( x ) ( r -> x ) -#define SFP( x ) SF( s_proc_info_data.x ) -#define SFPL( x ) SFP( x[ level ] ) -#define SFPF( x ) SFP( scan_bitmap.x ) -#define SFPJ( x ) SFP( journal.x ) - -#define D2C( x ) le16_to_cpu( x ) -#define D4C( x ) le32_to_cpu( x ) -#define DF( x ) D2C( rs -> s_v1.x ) -#define DFL( x ) D4C( rs -> s_v1.x ) - -#define objectid_map( s, rs ) (old_format_only (s) ? \ - (__le32 *)((struct reiserfs_super_block_v1 *)rs + 1) : \ - (__le32 *)(rs + 1)) -#define MAP( i ) D4C( objectid_map( sb, rs )[ i ] ) - -#define DJF( x ) le32_to_cpu( rs -> x ) -#define DJP( x ) le32_to_cpu( jp -> x ) -#define JF( x ) ( r -> s_journal -> x ) - -static int show_super(struct seq_file *m, void *unused) -{ - struct super_block *sb = m->private; - struct reiserfs_sb_info *r = REISERFS_SB(sb); - - seq_printf(m, "state: \t%s\n" - "mount options: \t%s%s%s%s%s%s%s%s%s%s%s\n" - "gen. counter: \t%i\n" - "s_disk_reads: \t%i\n" - "s_disk_writes: \t%i\n" - "s_fix_nodes: \t%i\n" - "s_do_balance: \t%i\n" - "s_unneeded_left_neighbor: \t%i\n" - "s_good_search_by_key_reada: \t%i\n" - "s_bmaps: \t%i\n" - "s_bmaps_without_search: \t%i\n" - "s_direct2indirect: \t%i\n" - "s_indirect2direct: \t%i\n" - "\n" - "max_hash_collisions: \t%i\n" - "breads: \t%lu\n" - "bread_misses: \t%lu\n" - "search_by_key: \t%lu\n" - "search_by_key_fs_changed: \t%lu\n" - "search_by_key_restarted: \t%lu\n" - "insert_item_restarted: \t%lu\n" - "paste_into_item_restarted: \t%lu\n" - "cut_from_item_restarted: \t%lu\n" - "delete_solid_item_restarted: \t%lu\n" - "delete_item_restarted: \t%lu\n" - "leaked_oid: \t%lu\n" - "leaves_removable: \t%lu\n", - SF(s_mount_state) == REISERFS_VALID_FS ? - "REISERFS_VALID_FS" : "REISERFS_ERROR_FS", - reiserfs_r5_hash(sb) ? "FORCE_R5 " : "", - reiserfs_rupasov_hash(sb) ? "FORCE_RUPASOV " : "", - reiserfs_tea_hash(sb) ? "FORCE_TEA " : "", - reiserfs_hash_detect(sb) ? "DETECT_HASH " : "", - reiserfs_no_border(sb) ? "NO_BORDER " : "BORDER ", - reiserfs_no_unhashed_relocation(sb) ? - "NO_UNHASHED_RELOCATION " : "", - reiserfs_hashed_relocation(sb) ? "UNHASHED_RELOCATION " : "", - reiserfs_test4(sb) ? "TEST4 " : "", - have_large_tails(sb) ? "TAILS " : have_small_tails(sb) ? - "SMALL_TAILS " : "NO_TAILS ", - replay_only(sb) ? "REPLAY_ONLY " : "", - convert_reiserfs(sb) ? "CONV " : "", - atomic_read(&r->s_generation_counter), - SF(s_disk_reads), SF(s_disk_writes), SF(s_fix_nodes), - SF(s_do_balance), SF(s_unneeded_left_neighbor), - SF(s_good_search_by_key_reada), SF(s_bmaps), - SF(s_bmaps_without_search), SF(s_direct2indirect), - SF(s_indirect2direct), SFP(max_hash_collisions), SFP(breads), - SFP(bread_miss), SFP(search_by_key), - SFP(search_by_key_fs_changed), SFP(search_by_key_restarted), - SFP(insert_item_restarted), SFP(paste_into_item_restarted), - SFP(cut_from_item_restarted), - SFP(delete_solid_item_restarted), SFP(delete_item_restarted), - SFP(leaked_oid), SFP(leaves_removable)); - - return 0; -} - -static int show_per_level(struct seq_file *m, void *unused) -{ - struct super_block *sb = m->private; - struct reiserfs_sb_info *r = REISERFS_SB(sb); - int level; - - seq_printf(m, "level\t" - " balances" - " [sbk: reads" - " fs_changed" - " restarted]" - " free space" - " items" - " can_remove" - " lnum" - " rnum" - " lbytes" - " rbytes" - " get_neig" - " get_neig_res" " need_l_neig" " need_r_neig" "\n"); - - for (level = 0; level < MAX_HEIGHT; ++level) { - seq_printf(m, "%i\t" - " %12lu" - " %12lu" - " %12lu" - " %12lu" - " %12lu" - " %12lu" - " %12lu" - " %12li" - " %12li" - " %12li" - " %12li" - " %12lu" - " %12lu" - " %12lu" - " %12lu" - "\n", - level, - SFPL(balance_at), - SFPL(sbk_read_at), - SFPL(sbk_fs_changed), - SFPL(sbk_restarted), - SFPL(free_at), - SFPL(items_at), - SFPL(can_node_be_removed), - SFPL(lnum), - SFPL(rnum), - SFPL(lbytes), - SFPL(rbytes), - SFPL(get_neighbors), - SFPL(get_neighbors_restart), - SFPL(need_l_neighbor), SFPL(need_r_neighbor) - ); - } - return 0; -} - -static int show_bitmap(struct seq_file *m, void *unused) -{ - struct super_block *sb = m->private; - struct reiserfs_sb_info *r = REISERFS_SB(sb); - - seq_printf(m, "free_block: %lu\n" - " scan_bitmap:" - " wait" - " bmap" - " retry" - " stolen" - " journal_hint" - "journal_nohint" - "\n" - " %14lu" - " %14lu" - " %14lu" - " %14lu" - " %14lu" - " %14lu" - " %14lu" - "\n", - SFP(free_block), - SFPF(call), - SFPF(wait), - SFPF(bmap), - SFPF(retry), - SFPF(stolen), - SFPF(in_journal_hint), SFPF(in_journal_nohint)); - - return 0; -} - -static int show_on_disk_super(struct seq_file *m, void *unused) -{ - struct super_block *sb = m->private; - struct reiserfs_sb_info *sb_info = REISERFS_SB(sb); - struct reiserfs_super_block *rs = sb_info->s_rs; - int hash_code = DFL(s_hash_function_code); - __u32 flags = DJF(s_flags); - - seq_printf(m, "block_count: \t%i\n" - "free_blocks: \t%i\n" - "root_block: \t%i\n" - "blocksize: \t%i\n" - "oid_maxsize: \t%i\n" - "oid_cursize: \t%i\n" - "umount_state: \t%i\n" - "magic: \t%10.10s\n" - "fs_state: \t%i\n" - "hash: \t%s\n" - "tree_height: \t%i\n" - "bmap_nr: \t%i\n" - "version: \t%i\n" - "flags: \t%x[%s]\n" - "reserved_for_journal: \t%i\n", - DFL(s_block_count), - DFL(s_free_blocks), - DFL(s_root_block), - DF(s_blocksize), - DF(s_oid_maxsize), - DF(s_oid_cursize), - DF(s_umount_state), - rs->s_v1.s_magic, - DF(s_fs_state), - hash_code == TEA_HASH ? "tea" : - (hash_code == YURA_HASH) ? "rupasov" : - (hash_code == R5_HASH) ? "r5" : - (hash_code == UNSET_HASH) ? "unset" : "unknown", - DF(s_tree_height), - DF(s_bmap_nr), - DF(s_version), flags, (flags & reiserfs_attrs_cleared) - ? "attrs_cleared" : "", DF(s_reserved_for_journal)); - - return 0; -} - -static int show_oidmap(struct seq_file *m, void *unused) -{ - struct super_block *sb = m->private; - struct reiserfs_sb_info *sb_info = REISERFS_SB(sb); - struct reiserfs_super_block *rs = sb_info->s_rs; - unsigned int mapsize = le16_to_cpu(rs->s_v1.s_oid_cursize); - unsigned long total_used = 0; - int i; - - for (i = 0; i < mapsize; ++i) { - __u32 right; - - right = (i == mapsize - 1) ? MAX_KEY_OBJECTID : MAP(i + 1); - seq_printf(m, "%s: [ %x .. %x )\n", - (i & 1) ? "free" : "used", MAP(i), right); - if (!(i & 1)) { - total_used += right - MAP(i); - } - } -#if defined( REISERFS_USE_OIDMAPF ) - if (sb_info->oidmap.use_file && (sb_info->oidmap.mapf != NULL)) { - loff_t size = file_inode(sb_info->oidmap.mapf)->i_size; - total_used += size / sizeof(reiserfs_oidinterval_d_t); - } -#endif - seq_printf(m, "total: \t%i [%i/%i] used: %lu [exact]\n", - mapsize, - mapsize, le16_to_cpu(rs->s_v1.s_oid_maxsize), total_used); - return 0; -} - -static time64_t ktime_mono_to_real_seconds(time64_t mono) -{ - ktime_t kt = ktime_set(mono, NSEC_PER_SEC/2); - - return ktime_divns(ktime_mono_to_real(kt), NSEC_PER_SEC); -} - -static int show_journal(struct seq_file *m, void *unused) -{ - struct super_block *sb = m->private; - struct reiserfs_sb_info *r = REISERFS_SB(sb); - struct reiserfs_super_block *rs = r->s_rs; - struct journal_params *jp = &rs->s_v1.s_journal; - - seq_printf(m, /* on-disk fields */ - "jp_journal_1st_block: \t%i\n" - "jp_journal_dev: \t%pg[%x]\n" - "jp_journal_size: \t%i\n" - "jp_journal_trans_max: \t%i\n" - "jp_journal_magic: \t%i\n" - "jp_journal_max_batch: \t%i\n" - "jp_journal_max_commit_age: \t%i\n" - "jp_journal_max_trans_age: \t%i\n" - /* incore fields */ - "j_1st_reserved_block: \t%i\n" - "j_state: \t%li\n" - "j_trans_id: \t%u\n" - "j_mount_id: \t%lu\n" - "j_start: \t%lu\n" - "j_len: \t%lu\n" - "j_len_alloc: \t%lu\n" - "j_wcount: \t%i\n" - "j_bcount: \t%lu\n" - "j_first_unflushed_offset: \t%lu\n" - "j_last_flush_trans_id: \t%u\n" - "j_trans_start_time: \t%lli\n" - "j_list_bitmap_index: \t%i\n" - "j_must_wait: \t%i\n" - "j_next_full_flush: \t%i\n" - "j_next_async_flush: \t%i\n" - "j_cnode_used: \t%i\n" "j_cnode_free: \t%i\n" "\n" - /* reiserfs_proc_info_data_t.journal fields */ - "in_journal: \t%12lu\n" - "in_journal_bitmap: \t%12lu\n" - "in_journal_reusable: \t%12lu\n" - "lock_journal: \t%12lu\n" - "lock_journal_wait: \t%12lu\n" - "journal_begin: \t%12lu\n" - "journal_relock_writers: \t%12lu\n" - "journal_relock_wcount: \t%12lu\n" - "mark_dirty: \t%12lu\n" - "mark_dirty_already: \t%12lu\n" - "mark_dirty_notjournal: \t%12lu\n" - "restore_prepared: \t%12lu\n" - "prepare: \t%12lu\n" - "prepare_retry: \t%12lu\n", - DJP(jp_journal_1st_block), - file_bdev(SB_JOURNAL(sb)->j_bdev_file), - DJP(jp_journal_dev), - DJP(jp_journal_size), - DJP(jp_journal_trans_max), - DJP(jp_journal_magic), - DJP(jp_journal_max_batch), - SB_JOURNAL(sb)->j_max_commit_age, - DJP(jp_journal_max_trans_age), - JF(j_1st_reserved_block), - JF(j_state), - JF(j_trans_id), - JF(j_mount_id), - JF(j_start), - JF(j_len), - JF(j_len_alloc), - atomic_read(&r->s_journal->j_wcount), - JF(j_bcount), - JF(j_first_unflushed_offset), - JF(j_last_flush_trans_id), - ktime_mono_to_real_seconds(JF(j_trans_start_time)), - JF(j_list_bitmap_index), - JF(j_must_wait), - JF(j_next_full_flush), - JF(j_next_async_flush), - JF(j_cnode_used), - JF(j_cnode_free), - SFPJ(in_journal), - SFPJ(in_journal_bitmap), - SFPJ(in_journal_reusable), - SFPJ(lock_journal), - SFPJ(lock_journal_wait), - SFPJ(journal_being), - SFPJ(journal_relock_writers), - SFPJ(journal_relock_wcount), - SFPJ(mark_dirty), - SFPJ(mark_dirty_already), - SFPJ(mark_dirty_notjournal), - SFPJ(restore_prepared), SFPJ(prepare), SFPJ(prepare_retry) - ); - return 0; -} - -static struct proc_dir_entry *proc_info_root = NULL; -static const char proc_info_root_name[] = "fs/reiserfs"; - -static void add_file(struct super_block *sb, char *name, - int (*func) (struct seq_file *, void *)) -{ - proc_create_single_data(name, 0, REISERFS_SB(sb)->procdir, func, sb); -} - -int reiserfs_proc_info_init(struct super_block *sb) -{ - char b[BDEVNAME_SIZE]; - char *s; - - /* Some block devices use /'s */ - strscpy(b, sb->s_id, BDEVNAME_SIZE); - s = strchr(b, '/'); - if (s) - *s = '!'; - - spin_lock_init(&__PINFO(sb).lock); - REISERFS_SB(sb)->procdir = proc_mkdir_data(b, 0, proc_info_root, sb); - if (REISERFS_SB(sb)->procdir) { - add_file(sb, "version", show_version); - add_file(sb, "super", show_super); - add_file(sb, "per-level", show_per_level); - add_file(sb, "bitmap", show_bitmap); - add_file(sb, "on-disk-super", show_on_disk_super); - add_file(sb, "oidmap", show_oidmap); - add_file(sb, "journal", show_journal); - return 0; - } - reiserfs_warning(sb, "cannot create /proc/%s/%s", - proc_info_root_name, b); - return 1; -} - -int reiserfs_proc_info_done(struct super_block *sb) -{ - struct proc_dir_entry *de = REISERFS_SB(sb)->procdir; - if (de) { - char b[BDEVNAME_SIZE]; - char *s; - - /* Some block devices use /'s */ - strscpy(b, sb->s_id, BDEVNAME_SIZE); - s = strchr(b, '/'); - if (s) - *s = '!'; - - remove_proc_subtree(b, proc_info_root); - REISERFS_SB(sb)->procdir = NULL; - } - return 0; -} - -int reiserfs_proc_info_global_init(void) -{ - if (proc_info_root == NULL) { - proc_info_root = proc_mkdir(proc_info_root_name, NULL); - if (!proc_info_root) { - reiserfs_warning(NULL, "cannot create /proc/%s", - proc_info_root_name); - return 1; - } - } - return 0; -} - -int reiserfs_proc_info_global_done(void) -{ - if (proc_info_root != NULL) { - proc_info_root = NULL; - remove_proc_entry(proc_info_root_name, NULL); - } - return 0; -} -/* - * Revision 1.1.8.2 2001/07/15 17:08:42 god - * . use get_super() in procfs.c - * . remove remove_save_link() from reiserfs_do_truncate() - * - * I accept terms and conditions stated in the Legal Agreement - * (available at http://www.namesys.com/legalese.html) - * - * Revision 1.1.8.1 2001/07/11 16:48:50 god - * proc info support - * - * I accept terms and conditions stated in the Legal Agreement - * (available at http://www.namesys.com/legalese.html) - * - */ diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h deleted file mode 100644 index 12fc20af8e17..000000000000 --- a/fs/reiserfs/reiserfs.h +++ /dev/null @@ -1,3419 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for - * licensing and copyright details - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* the 32 bit compat definitions with int argument */ -#define REISERFS_IOC32_UNPACK _IOW(0xCD, 1, int) -#define REISERFS_IOC32_GETVERSION FS_IOC32_GETVERSION -#define REISERFS_IOC32_SETVERSION FS_IOC32_SETVERSION - -struct reiserfs_journal_list; - -/* bitmasks for i_flags field in reiserfs-specific part of inode */ -typedef enum { - /* - * this says what format of key do all items (but stat data) of - * an object have. If this is set, that format is 3.6 otherwise - 3.5 - */ - i_item_key_version_mask = 0x0001, - - /* - * If this is unset, object has 3.5 stat data, otherwise, - * it has 3.6 stat data with 64bit size, 32bit nlink etc. - */ - i_stat_data_version_mask = 0x0002, - - /* file might need tail packing on close */ - i_pack_on_close_mask = 0x0004, - - /* don't pack tail of file */ - i_nopack_mask = 0x0008, - - /* - * If either of these are set, "safe link" was created for this - * file during truncate or unlink. Safe link is used to avoid - * leakage of disk space on crash with some files open, but unlinked. - */ - i_link_saved_unlink_mask = 0x0010, - i_link_saved_truncate_mask = 0x0020, - - i_has_xattr_dir = 0x0040, - i_data_log = 0x0080, -} reiserfs_inode_flags; - -struct reiserfs_inode_info { - __u32 i_key[4]; /* key is still 4 32 bit integers */ - - /* - * transient inode flags that are never stored on disk. Bitmasks - * for this field are defined above. - */ - __u32 i_flags; - - /* offset of first byte stored in direct item. */ - __u32 i_first_direct_byte; - - /* copy of persistent inode flags read from sd_attrs. */ - __u32 i_attrs; - - /* first unused block of a sequence of unused blocks */ - int i_prealloc_block; - int i_prealloc_count; /* length of that sequence */ - - /* per-transaction list of inodes which have preallocated blocks */ - struct list_head i_prealloc_list; - - /* - * new_packing_locality is created; new blocks for the contents - * of this directory should be displaced - */ - unsigned new_packing_locality:1; - - /* - * we use these for fsync or O_SYNC to decide which transaction - * needs to be committed in order for this inode to be properly - * flushed - */ - unsigned int i_trans_id; - - struct reiserfs_journal_list *i_jl; - atomic_t openers; - struct mutex tailpack; -#ifdef CONFIG_REISERFS_FS_XATTR - struct rw_semaphore i_xattr_sem; -#endif -#ifdef CONFIG_QUOTA - struct dquot __rcu *i_dquot[MAXQUOTAS]; -#endif - - struct inode vfs_inode; -}; - -typedef enum { - reiserfs_attrs_cleared = 0x00000001, -} reiserfs_super_block_flags; - -/* - * struct reiserfs_super_block accessors/mutators since this is a disk - * structure, it will always be in little endian format. - */ -#define sb_block_count(sbp) (le32_to_cpu((sbp)->s_v1.s_block_count)) -#define set_sb_block_count(sbp,v) ((sbp)->s_v1.s_block_count = cpu_to_le32(v)) -#define sb_free_blocks(sbp) (le32_to_cpu((sbp)->s_v1.s_free_blocks)) -#define set_sb_free_blocks(sbp,v) ((sbp)->s_v1.s_free_blocks = cpu_to_le32(v)) -#define sb_root_block(sbp) (le32_to_cpu((sbp)->s_v1.s_root_block)) -#define set_sb_root_block(sbp,v) ((sbp)->s_v1.s_root_block = cpu_to_le32(v)) - -#define sb_jp_journal_1st_block(sbp) \ - (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_1st_block)) -#define set_sb_jp_journal_1st_block(sbp,v) \ - ((sbp)->s_v1.s_journal.jp_journal_1st_block = cpu_to_le32(v)) -#define sb_jp_journal_dev(sbp) \ - (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_dev)) -#define set_sb_jp_journal_dev(sbp,v) \ - ((sbp)->s_v1.s_journal.jp_journal_dev = cpu_to_le32(v)) -#define sb_jp_journal_size(sbp) \ - (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_size)) -#define set_sb_jp_journal_size(sbp,v) \ - ((sbp)->s_v1.s_journal.jp_journal_size = cpu_to_le32(v)) -#define sb_jp_journal_trans_max(sbp) \ - (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_trans_max)) -#define set_sb_jp_journal_trans_max(sbp,v) \ - ((sbp)->s_v1.s_journal.jp_journal_trans_max = cpu_to_le32(v)) -#define sb_jp_journal_magic(sbp) \ - (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_magic)) -#define set_sb_jp_journal_magic(sbp,v) \ - ((sbp)->s_v1.s_journal.jp_journal_magic = cpu_to_le32(v)) -#define sb_jp_journal_max_batch(sbp) \ - (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_max_batch)) -#define set_sb_jp_journal_max_batch(sbp,v) \ - ((sbp)->s_v1.s_journal.jp_journal_max_batch = cpu_to_le32(v)) -#define sb_jp_jourmal_max_commit_age(sbp) \ - (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_max_commit_age)) -#define set_sb_jp_journal_max_commit_age(sbp,v) \ - ((sbp)->s_v1.s_journal.jp_journal_max_commit_age = cpu_to_le32(v)) - -#define sb_blocksize(sbp) (le16_to_cpu((sbp)->s_v1.s_blocksize)) -#define set_sb_blocksize(sbp,v) ((sbp)->s_v1.s_blocksize = cpu_to_le16(v)) -#define sb_oid_maxsize(sbp) (le16_to_cpu((sbp)->s_v1.s_oid_maxsize)) -#define set_sb_oid_maxsize(sbp,v) ((sbp)->s_v1.s_oid_maxsize = cpu_to_le16(v)) -#define sb_oid_cursize(sbp) (le16_to_cpu((sbp)->s_v1.s_oid_cursize)) -#define set_sb_oid_cursize(sbp,v) ((sbp)->s_v1.s_oid_cursize = cpu_to_le16(v)) -#define sb_umount_state(sbp) (le16_to_cpu((sbp)->s_v1.s_umount_state)) -#define set_sb_umount_state(sbp,v) ((sbp)->s_v1.s_umount_state = cpu_to_le16(v)) -#define sb_fs_state(sbp) (le16_to_cpu((sbp)->s_v1.s_fs_state)) -#define set_sb_fs_state(sbp,v) ((sbp)->s_v1.s_fs_state = cpu_to_le16(v)) -#define sb_hash_function_code(sbp) \ - (le32_to_cpu((sbp)->s_v1.s_hash_function_code)) -#define set_sb_hash_function_code(sbp,v) \ - ((sbp)->s_v1.s_hash_function_code = cpu_to_le32(v)) -#define sb_tree_height(sbp) (le16_to_cpu((sbp)->s_v1.s_tree_height)) -#define set_sb_tree_height(sbp,v) ((sbp)->s_v1.s_tree_height = cpu_to_le16(v)) -#define sb_bmap_nr(sbp) (le16_to_cpu((sbp)->s_v1.s_bmap_nr)) -#define set_sb_bmap_nr(sbp,v) ((sbp)->s_v1.s_bmap_nr = cpu_to_le16(v)) -#define sb_version(sbp) (le16_to_cpu((sbp)->s_v1.s_version)) -#define set_sb_version(sbp,v) ((sbp)->s_v1.s_version = cpu_to_le16(v)) - -#define sb_mnt_count(sbp) (le16_to_cpu((sbp)->s_mnt_count)) -#define set_sb_mnt_count(sbp, v) ((sbp)->s_mnt_count = cpu_to_le16(v)) - -#define sb_reserved_for_journal(sbp) \ - (le16_to_cpu((sbp)->s_v1.s_reserved_for_journal)) -#define set_sb_reserved_for_journal(sbp,v) \ - ((sbp)->s_v1.s_reserved_for_journal = cpu_to_le16(v)) - -/* LOGGING -- */ - -/* - * These all interelate for performance. - * - * If the journal block count is smaller than n transactions, you lose speed. - * I don't know what n is yet, I'm guessing 8-16. - * - * typical transaction size depends on the application, how often fsync is - * called, and how many metadata blocks you dirty in a 30 second period. - * The more small files (<16k) you use, the larger your transactions will - * be. - * - * If your journal fills faster than dirty buffers get flushed to disk, it - * must flush them before allowing the journal to wrap, which slows things - * down. If you need high speed meta data updates, the journal should be - * big enough to prevent wrapping before dirty meta blocks get to disk. - * - * If the batch max is smaller than the transaction max, you'll waste space - * at the end of the journal because journal_end sets the next transaction - * to start at 0 if the next transaction has any chance of wrapping. - * - * The large the batch max age, the better the speed, and the more meta - * data changes you'll lose after a crash. - */ - -/* don't mess with these for a while */ -/* we have a node size define somewhere in reiserfs_fs.h. -Hans */ -#define JOURNAL_BLOCK_SIZE 4096 /* BUG gotta get rid of this */ -#define JOURNAL_MAX_CNODE 1500 /* max cnodes to allocate. */ -#define JOURNAL_HASH_SIZE 8192 - -/* number of copies of the bitmaps to have floating. Must be >= 2 */ -#define JOURNAL_NUM_BITMAPS 5 - -/* - * One of these for every block in every transaction - * Each one is in two hash tables. First, a hash of the current transaction, - * and after journal_end, a hash of all the in memory transactions. - * next and prev are used by the current transaction (journal_hash). - * hnext and hprev are used by journal_list_hash. If a block is in more - * than one transaction, the journal_list_hash links it in multiple times. - * This allows flush_journal_list to remove just the cnode belonging to a - * given transaction. - */ -struct reiserfs_journal_cnode { - struct buffer_head *bh; /* real buffer head */ - struct super_block *sb; /* dev of real buffer head */ - - /* block number of real buffer head, == 0 when buffer on disk */ - __u32 blocknr; - - unsigned long state; - - /* journal list this cnode lives in */ - struct reiserfs_journal_list *jlist; - - struct reiserfs_journal_cnode *next; /* next in transaction list */ - struct reiserfs_journal_cnode *prev; /* prev in transaction list */ - struct reiserfs_journal_cnode *hprev; /* prev in hash list */ - struct reiserfs_journal_cnode *hnext; /* next in hash list */ -}; - -struct reiserfs_bitmap_node { - int id; - char *data; - struct list_head list; -}; - -struct reiserfs_list_bitmap { - struct reiserfs_journal_list *journal_list; - struct reiserfs_bitmap_node **bitmaps; -}; - -/* - * one of these for each transaction. The most important part here is the - * j_realblock. this list of cnodes is used to hash all the blocks in all - * the commits, to mark all the real buffer heads dirty once all the commits - * hit the disk, and to make sure every real block in a transaction is on - * disk before allowing the log area to be overwritten - */ -struct reiserfs_journal_list { - unsigned long j_start; - unsigned long j_state; - unsigned long j_len; - atomic_t j_nonzerolen; - atomic_t j_commit_left; - - /* all commits older than this on disk */ - atomic_t j_older_commits_done; - - struct mutex j_commit_mutex; - unsigned int j_trans_id; - time64_t j_timestamp; /* write-only but useful for crash dump analysis */ - struct reiserfs_list_bitmap *j_list_bitmap; - struct buffer_head *j_commit_bh; /* commit buffer head */ - struct reiserfs_journal_cnode *j_realblock; - struct reiserfs_journal_cnode *j_freedlist; /* list of buffers that were freed during this trans. free each of these on flush */ - /* time ordered list of all active transactions */ - struct list_head j_list; - - /* - * time ordered list of all transactions we haven't tried - * to flush yet - */ - struct list_head j_working_list; - - /* list of tail conversion targets in need of flush before commit */ - struct list_head j_tail_bh_list; - - /* list of data=ordered buffers in need of flush before commit */ - struct list_head j_bh_list; - int j_refcount; -}; - -struct reiserfs_journal { - struct buffer_head **j_ap_blocks; /* journal blocks on disk */ - /* newest journal block */ - struct reiserfs_journal_cnode *j_last; - - /* oldest journal block. start here for traverse */ - struct reiserfs_journal_cnode *j_first; - - struct file *j_bdev_file; - - /* first block on s_dev of reserved area journal */ - int j_1st_reserved_block; - - unsigned long j_state; - unsigned int j_trans_id; - unsigned long j_mount_id; - - /* start of current waiting commit (index into j_ap_blocks) */ - unsigned long j_start; - unsigned long j_len; /* length of current waiting commit */ - - /* number of buffers requested by journal_begin() */ - unsigned long j_len_alloc; - - atomic_t j_wcount; /* count of writers for current commit */ - - /* batch count. allows turning X transactions into 1 */ - unsigned long j_bcount; - - /* first unflushed transactions offset */ - unsigned long j_first_unflushed_offset; - - /* last fully flushed journal timestamp */ - unsigned j_last_flush_trans_id; - - struct buffer_head *j_header_bh; - - time64_t j_trans_start_time; /* time this transaction started */ - struct mutex j_mutex; - struct mutex j_flush_mutex; - - /* wait for current transaction to finish before starting new one */ - wait_queue_head_t j_join_wait; - - atomic_t j_jlock; /* lock for j_join_wait */ - int j_list_bitmap_index; /* number of next list bitmap to use */ - - /* no more journal begins allowed. MUST sleep on j_join_wait */ - int j_must_wait; - - /* next journal_end will flush all journal list */ - int j_next_full_flush; - - /* next journal_end will flush all async commits */ - int j_next_async_flush; - - int j_cnode_used; /* number of cnodes on the used list */ - int j_cnode_free; /* number of cnodes on the free list */ - - /* max number of blocks in a transaction. */ - unsigned int j_trans_max; - - /* max number of blocks to batch into a trans */ - unsigned int j_max_batch; - - /* in seconds, how old can an async commit be */ - unsigned int j_max_commit_age; - - /* in seconds, how old can a transaction be */ - unsigned int j_max_trans_age; - - /* the default for the max commit age */ - unsigned int j_default_max_commit_age; - - struct reiserfs_journal_cnode *j_cnode_free_list; - - /* orig pointer returned from vmalloc */ - struct reiserfs_journal_cnode *j_cnode_free_orig; - - struct reiserfs_journal_list *j_current_jl; - int j_free_bitmap_nodes; - int j_used_bitmap_nodes; - - int j_num_lists; /* total number of active transactions */ - int j_num_work_lists; /* number that need attention from kreiserfsd */ - - /* debugging to make sure things are flushed in order */ - unsigned int j_last_flush_id; - - /* debugging to make sure things are committed in order */ - unsigned int j_last_commit_id; - - struct list_head j_bitmap_nodes; - struct list_head j_dirty_buffers; - spinlock_t j_dirty_buffers_lock; /* protects j_dirty_buffers */ - - /* list of all active transactions */ - struct list_head j_journal_list; - - /* lists that haven't been touched by writeback attempts */ - struct list_head j_working_list; - - /* hash table for real buffer heads in current trans */ - struct reiserfs_journal_cnode *j_hash_table[JOURNAL_HASH_SIZE]; - - /* hash table for all the real buffer heads in all the transactions */ - struct reiserfs_journal_cnode *j_list_hash_table[JOURNAL_HASH_SIZE]; - - /* array of bitmaps to record the deleted blocks */ - struct reiserfs_list_bitmap j_list_bitmap[JOURNAL_NUM_BITMAPS]; - - /* list of inodes which have preallocated blocks */ - struct list_head j_prealloc_list; - int j_persistent_trans; - unsigned long j_max_trans_size; - unsigned long j_max_batch_size; - - int j_errno; - - /* when flushing ordered buffers, throttle new ordered writers */ - struct delayed_work j_work; - struct super_block *j_work_sb; - atomic_t j_async_throttle; -}; - -enum journal_state_bits { - J_WRITERS_BLOCKED = 1, /* set when new writers not allowed */ - J_WRITERS_QUEUED, /* set when log is full due to too many writers */ - J_ABORTED, /* set when log is aborted */ -}; - -/* ick. magic string to find desc blocks in the journal */ -#define JOURNAL_DESC_MAGIC "ReIsErLB" - -typedef __u32(*hashf_t) (const signed char *, int); - -struct reiserfs_bitmap_info { - __u32 free_count; -}; - -struct proc_dir_entry; - -#if defined( CONFIG_PROC_FS ) && defined( CONFIG_REISERFS_PROC_INFO ) -typedef unsigned long int stat_cnt_t; -typedef struct reiserfs_proc_info_data { - spinlock_t lock; - int exiting; - int max_hash_collisions; - - stat_cnt_t breads; - stat_cnt_t bread_miss; - stat_cnt_t search_by_key; - stat_cnt_t search_by_key_fs_changed; - stat_cnt_t search_by_key_restarted; - - stat_cnt_t insert_item_restarted; - stat_cnt_t paste_into_item_restarted; - stat_cnt_t cut_from_item_restarted; - stat_cnt_t delete_solid_item_restarted; - stat_cnt_t delete_item_restarted; - - stat_cnt_t leaked_oid; - stat_cnt_t leaves_removable; - - /* - * balances per level. - * Use explicit 5 as MAX_HEIGHT is not visible yet. - */ - stat_cnt_t balance_at[5]; /* XXX */ - /* sbk == search_by_key */ - stat_cnt_t sbk_read_at[5]; /* XXX */ - stat_cnt_t sbk_fs_changed[5]; - stat_cnt_t sbk_restarted[5]; - stat_cnt_t items_at[5]; /* XXX */ - stat_cnt_t free_at[5]; /* XXX */ - stat_cnt_t can_node_be_removed[5]; /* XXX */ - long int lnum[5]; /* XXX */ - long int rnum[5]; /* XXX */ - long int lbytes[5]; /* XXX */ - long int rbytes[5]; /* XXX */ - stat_cnt_t get_neighbors[5]; - stat_cnt_t get_neighbors_restart[5]; - stat_cnt_t need_l_neighbor[5]; - stat_cnt_t need_r_neighbor[5]; - - stat_cnt_t free_block; - struct __scan_bitmap_stats { - stat_cnt_t call; - stat_cnt_t wait; - stat_cnt_t bmap; - stat_cnt_t retry; - stat_cnt_t in_journal_hint; - stat_cnt_t in_journal_nohint; - stat_cnt_t stolen; - } scan_bitmap; - struct __journal_stats { - stat_cnt_t in_journal; - stat_cnt_t in_journal_bitmap; - stat_cnt_t in_journal_reusable; - stat_cnt_t lock_journal; - stat_cnt_t lock_journal_wait; - stat_cnt_t journal_being; - stat_cnt_t journal_relock_writers; - stat_cnt_t journal_relock_wcount; - stat_cnt_t mark_dirty; - stat_cnt_t mark_dirty_already; - stat_cnt_t mark_dirty_notjournal; - stat_cnt_t restore_prepared; - stat_cnt_t prepare; - stat_cnt_t prepare_retry; - } journal; -} reiserfs_proc_info_data_t; -#else -typedef struct reiserfs_proc_info_data { -} reiserfs_proc_info_data_t; -#endif - -/* Number of quota types we support */ -#define REISERFS_MAXQUOTAS 2 - -/* reiserfs union of in-core super block data */ -struct reiserfs_sb_info { - /* Buffer containing the super block */ - struct buffer_head *s_sbh; - - /* Pointer to the on-disk super block in the buffer */ - struct reiserfs_super_block *s_rs; - struct reiserfs_bitmap_info *s_ap_bitmap; - - /* pointer to journal information */ - struct reiserfs_journal *s_journal; - - unsigned short s_mount_state; /* reiserfs state (valid, invalid) */ - - /* Serialize writers access, replace the old bkl */ - struct mutex lock; - - /* Owner of the lock (can be recursive) */ - struct task_struct *lock_owner; - - /* Depth of the lock, start from -1 like the bkl */ - int lock_depth; - - struct workqueue_struct *commit_wq; - - /* Comment? -Hans */ - void (*end_io_handler) (struct buffer_head *, int); - - /* - * pointer to function which is used to sort names in directory. - * Set on mount - */ - hashf_t s_hash_function; - - /* reiserfs's mount options are set here */ - unsigned long s_mount_opt; - - /* This is a structure that describes block allocator options */ - struct { - /* Bitfield for enable/disable kind of options */ - unsigned long bits; - - /* - * size started from which we consider file - * to be a large one (in blocks) - */ - unsigned long large_file_size; - - int border; /* percentage of disk, border takes */ - - /* - * Minimal file size (in blocks) starting - * from which we do preallocations - */ - int preallocmin; - - /* - * Number of blocks we try to prealloc when file - * reaches preallocmin size (in blocks) or prealloc_list - is empty. - */ - int preallocsize; - } s_alloc_options; - - /* Comment? -Hans */ - wait_queue_head_t s_wait; - /* increased by one every time the tree gets re-balanced */ - atomic_t s_generation_counter; - - /* File system properties. Currently holds on-disk FS format */ - unsigned long s_properties; - - /* session statistics */ - int s_disk_reads; - int s_disk_writes; - int s_fix_nodes; - int s_do_balance; - int s_unneeded_left_neighbor; - int s_good_search_by_key_reada; - int s_bmaps; - int s_bmaps_without_search; - int s_direct2indirect; - int s_indirect2direct; - - /* - * set up when it's ok for reiserfs_read_inode2() to read from - * disk inode with nlink==0. Currently this is only used during - * finish_unfinished() processing at mount time - */ - int s_is_unlinked_ok; - - reiserfs_proc_info_data_t s_proc_info_data; - struct proc_dir_entry *procdir; - - /* amount of blocks reserved for further allocations */ - int reserved_blocks; - - - /* this lock on now only used to protect reserved_blocks variable */ - spinlock_t bitmap_lock; - struct dentry *priv_root; /* root of /.reiserfs_priv */ - struct dentry *xattr_root; /* root of /.reiserfs_priv/xattrs */ - int j_errno; - - int work_queued; /* non-zero delayed work is queued */ - struct delayed_work old_work; /* old transactions flush delayed work */ - spinlock_t old_work_lock; /* protects old_work and work_queued */ - -#ifdef CONFIG_QUOTA - char *s_qf_names[REISERFS_MAXQUOTAS]; - int s_jquota_fmt; -#endif - char *s_jdev; /* Stored jdev for mount option showing */ -#ifdef CONFIG_REISERFS_CHECK - - /* - * Detects whether more than one copy of tb exists per superblock - * as a means of checking whether do_balance is executing - * concurrently against another tree reader/writer on a same - * mount point. - */ - struct tree_balance *cur_tb; -#endif -}; - -/* Definitions of reiserfs on-disk properties: */ -#define REISERFS_3_5 0 -#define REISERFS_3_6 1 -#define REISERFS_OLD_FORMAT 2 - -/* Mount options */ -enum reiserfs_mount_options { - /* large tails will be created in a session */ - REISERFS_LARGETAIL, - /* - * small (for files less than block size) tails will - * be created in a session - */ - REISERFS_SMALLTAIL, - - /* replay journal and return 0. Use by fsck */ - REPLAYONLY, - - /* - * -o conv: causes conversion of old format super block to the - * new format. If not specified - old partition will be dealt - * with in a manner of 3.5.x - */ - REISERFS_CONVERT, - - /* - * -o hash={tea, rupasov, r5, detect} is meant for properly mounting - * reiserfs disks from 3.5.19 or earlier. 99% of the time, this - * option is not required. If the normal autodection code can't - * determine which hash to use (because both hashes had the same - * value for a file) use this option to force a specific hash. - * It won't allow you to override the existing hash on the FS, so - * if you have a tea hash disk, and mount with -o hash=rupasov, - * the mount will fail. - */ - FORCE_TEA_HASH, /* try to force tea hash on mount */ - FORCE_RUPASOV_HASH, /* try to force rupasov hash on mount */ - FORCE_R5_HASH, /* try to force rupasov hash on mount */ - FORCE_HASH_DETECT, /* try to detect hash function on mount */ - - REISERFS_DATA_LOG, - REISERFS_DATA_ORDERED, - REISERFS_DATA_WRITEBACK, - - /* - * used for testing experimental features, makes benchmarking new - * features with and without more convenient, should never be used by - * users in any code shipped to users (ideally) - */ - - REISERFS_NO_BORDER, - REISERFS_NO_UNHASHED_RELOCATION, - REISERFS_HASHED_RELOCATION, - REISERFS_ATTRS, - REISERFS_XATTRS_USER, - REISERFS_POSIXACL, - REISERFS_EXPOSE_PRIVROOT, - REISERFS_BARRIER_NONE, - REISERFS_BARRIER_FLUSH, - - /* Actions on error */ - REISERFS_ERROR_PANIC, - REISERFS_ERROR_RO, - REISERFS_ERROR_CONTINUE, - - REISERFS_USRQUOTA, /* User quota option specified */ - REISERFS_GRPQUOTA, /* Group quota option specified */ - - REISERFS_TEST1, - REISERFS_TEST2, - REISERFS_TEST3, - REISERFS_TEST4, - REISERFS_UNSUPPORTED_OPT, -}; - -#define reiserfs_r5_hash(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_R5_HASH)) -#define reiserfs_rupasov_hash(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_RUPASOV_HASH)) -#define reiserfs_tea_hash(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_TEA_HASH)) -#define reiserfs_hash_detect(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_HASH_DETECT)) -#define reiserfs_no_border(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_NO_BORDER)) -#define reiserfs_no_unhashed_relocation(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_NO_UNHASHED_RELOCATION)) -#define reiserfs_hashed_relocation(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_HASHED_RELOCATION)) -#define reiserfs_test4(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_TEST4)) - -#define have_large_tails(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_LARGETAIL)) -#define have_small_tails(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_SMALLTAIL)) -#define replay_only(s) (REISERFS_SB(s)->s_mount_opt & (1 << REPLAYONLY)) -#define reiserfs_attrs(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_ATTRS)) -#define old_format_only(s) (REISERFS_SB(s)->s_properties & (1 << REISERFS_3_5)) -#define convert_reiserfs(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_CONVERT)) -#define reiserfs_data_log(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_LOG)) -#define reiserfs_data_ordered(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_ORDERED)) -#define reiserfs_data_writeback(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_WRITEBACK)) -#define reiserfs_xattrs_user(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_XATTRS_USER)) -#define reiserfs_posixacl(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_POSIXACL)) -#define reiserfs_expose_privroot(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_EXPOSE_PRIVROOT)) -#define reiserfs_xattrs_optional(s) (reiserfs_xattrs_user(s) || reiserfs_posixacl(s)) -#define reiserfs_barrier_none(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_BARRIER_NONE)) -#define reiserfs_barrier_flush(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_BARRIER_FLUSH)) - -#define reiserfs_error_panic(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_ERROR_PANIC)) -#define reiserfs_error_ro(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_ERROR_RO)) - -void reiserfs_file_buffer(struct buffer_head *bh, int list); -extern struct file_system_type reiserfs_fs_type; -int reiserfs_resize(struct super_block *, unsigned long); - -#define CARRY_ON 0 -#define SCHEDULE_OCCURRED 1 - -#define SB_BUFFER_WITH_SB(s) (REISERFS_SB(s)->s_sbh) -#define SB_JOURNAL(s) (REISERFS_SB(s)->s_journal) -#define SB_JOURNAL_1st_RESERVED_BLOCK(s) (SB_JOURNAL(s)->j_1st_reserved_block) -#define SB_JOURNAL_LEN_FREE(s) (SB_JOURNAL(s)->j_journal_len_free) -#define SB_AP_BITMAP(s) (REISERFS_SB(s)->s_ap_bitmap) - -#define SB_DISK_JOURNAL_HEAD(s) (SB_JOURNAL(s)->j_header_bh->) - -#define reiserfs_is_journal_aborted(journal) (unlikely (__reiserfs_is_journal_aborted (journal))) -static inline int __reiserfs_is_journal_aborted(struct reiserfs_journal - *journal) -{ - return test_bit(J_ABORTED, &journal->j_state); -} - -/* - * Locking primitives. The write lock is a per superblock - * special mutex that has properties close to the Big Kernel Lock - * which was used in the previous locking scheme. - */ -void reiserfs_write_lock(struct super_block *s); -void reiserfs_write_unlock(struct super_block *s); -int __must_check reiserfs_write_unlock_nested(struct super_block *s); -void reiserfs_write_lock_nested(struct super_block *s, int depth); - -#ifdef CONFIG_REISERFS_CHECK -void reiserfs_lock_check_recursive(struct super_block *s); -#else -static inline void reiserfs_lock_check_recursive(struct super_block *s) { } -#endif - -/* - * Several mutexes depend on the write lock. - * However sometimes we want to relax the write lock while we hold - * these mutexes, according to the release/reacquire on schedule() - * properties of the Bkl that were used. - * Reiserfs performances and locking were based on this scheme. - * Now that the write lock is a mutex and not the bkl anymore, doing so - * may result in a deadlock: - * - * A acquire write_lock - * A acquire j_commit_mutex - * A release write_lock and wait for something - * B acquire write_lock - * B can't acquire j_commit_mutex and sleep - * A can't acquire write lock anymore - * deadlock - * - * What we do here is avoiding such deadlock by playing the same game - * than the Bkl: if we can't acquire a mutex that depends on the write lock, - * we release the write lock, wait a bit and then retry. - * - * The mutexes concerned by this hack are: - * - The commit mutex of a journal list - * - The flush mutex - * - The journal lock - * - The inode mutex - */ -static inline void reiserfs_mutex_lock_safe(struct mutex *m, - struct super_block *s) -{ - int depth; - - depth = reiserfs_write_unlock_nested(s); - mutex_lock(m); - reiserfs_write_lock_nested(s, depth); -} - -static inline void -reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass, - struct super_block *s) -{ - int depth; - - depth = reiserfs_write_unlock_nested(s); - mutex_lock_nested(m, subclass); - reiserfs_write_lock_nested(s, depth); -} - -static inline void -reiserfs_down_read_safe(struct rw_semaphore *sem, struct super_block *s) -{ - int depth; - depth = reiserfs_write_unlock_nested(s); - down_read(sem); - reiserfs_write_lock_nested(s, depth); -} - -/* - * When we schedule, we usually want to also release the write lock, - * according to the previous bkl based locking scheme of reiserfs. - */ -static inline void reiserfs_cond_resched(struct super_block *s) -{ - if (need_resched()) { - int depth; - - depth = reiserfs_write_unlock_nested(s); - schedule(); - reiserfs_write_lock_nested(s, depth); - } -} - -struct fid; - -/* - * in reading the #defines, it may help to understand that they employ - * the following abbreviations: - * - * B = Buffer - * I = Item header - * H = Height within the tree (should be changed to LEV) - * N = Number of the item in the node - * STAT = stat data - * DEH = Directory Entry Header - * EC = Entry Count - * E = Entry number - * UL = Unsigned Long - * BLKH = BLocK Header - * UNFM = UNForMatted node - * DC = Disk Child - * P = Path - * - * These #defines are named by concatenating these abbreviations, - * where first comes the arguments, and last comes the return value, - * of the macro. - */ - -#define USE_INODE_GENERATION_COUNTER - -#define REISERFS_PREALLOCATE -#define DISPLACE_NEW_PACKING_LOCALITIES -#define PREALLOCATION_SIZE 9 - -/* n must be power of 2 */ -#define _ROUND_UP(x,n) (((x)+(n)-1u) & ~((n)-1u)) - -/* - * to be ok for alpha and others we have to align structures to 8 byte - * boundary. - * FIXME: do not change 4 by anything else: there is code which relies on that - */ -#define ROUND_UP(x) _ROUND_UP(x,8LL) - -/* - * debug levels. Right now, CONFIG_REISERFS_CHECK means print all debug - * messages. - */ -#define REISERFS_DEBUG_CODE 5 /* extra messages to help find/debug errors */ - -void __reiserfs_warning(struct super_block *s, const char *id, - const char *func, const char *fmt, ...); -#define reiserfs_warning(s, id, fmt, args...) \ - __reiserfs_warning(s, id, __func__, fmt, ##args) -/* assertions handling */ - -/* always check a condition and panic if it's false. */ -#define __RASSERT(cond, scond, format, args...) \ -do { \ - if (!(cond)) \ - reiserfs_panic(NULL, "assertion failure", "(" #cond ") at " \ - __FILE__ ":%i:%s: " format "\n", \ - __LINE__, __func__ , ##args); \ -} while (0) - -#define RASSERT(cond, format, args...) __RASSERT(cond, #cond, format, ##args) - -#if defined( CONFIG_REISERFS_CHECK ) -#define RFALSE(cond, format, args...) __RASSERT(!(cond), "!(" #cond ")", format, ##args) -#else -#define RFALSE( cond, format, args... ) do {;} while( 0 ) -#endif - -#define CONSTF __attribute_const__ -/* - * Disk Data Structures - */ - -/*************************************************************************** - * SUPER BLOCK * - ***************************************************************************/ - -/* - * Structure of super block on disk, a version of which in RAM is often - * accessed as REISERFS_SB(s)->s_rs. The version in RAM is part of a larger - * structure containing fields never written to disk. - */ -#define UNSET_HASH 0 /* Detect hash on disk */ -#define TEA_HASH 1 -#define YURA_HASH 2 -#define R5_HASH 3 -#define DEFAULT_HASH R5_HASH - -struct journal_params { - /* where does journal start from on its * device */ - __le32 jp_journal_1st_block; - - /* journal device st_rdev */ - __le32 jp_journal_dev; - - /* size of the journal */ - __le32 jp_journal_size; - - /* max number of blocks in a transaction. */ - __le32 jp_journal_trans_max; - - /* - * random value made on fs creation - * (this was sb_journal_block_count) - */ - __le32 jp_journal_magic; - - /* max number of blocks to batch into a trans */ - __le32 jp_journal_max_batch; - - /* in seconds, how old can an async commit be */ - __le32 jp_journal_max_commit_age; - - /* in seconds, how old can a transaction be */ - __le32 jp_journal_max_trans_age; -}; - -/* this is the super from 3.5.X, where X >= 10 */ -struct reiserfs_super_block_v1 { - __le32 s_block_count; /* blocks count */ - __le32 s_free_blocks; /* free blocks count */ - __le32 s_root_block; /* root block number */ - struct journal_params s_journal; - __le16 s_blocksize; /* block size */ - - /* max size of object id array, see get_objectid() commentary */ - __le16 s_oid_maxsize; - __le16 s_oid_cursize; /* current size of object id array */ - - /* this is set to 1 when filesystem was umounted, to 2 - when not */ - __le16 s_umount_state; - - /* - * reiserfs magic string indicates that file system is reiserfs: - * "ReIsErFs" or "ReIsEr2Fs" or "ReIsEr3Fs" - */ - char s_magic[10]; - - /* - * it is set to used by fsck to mark which - * phase of rebuilding is done - */ - __le16 s_fs_state; - /* - * indicate, what hash function is being use - * to sort names in a directory - */ - __le32 s_hash_function_code; - __le16 s_tree_height; /* height of disk tree */ - - /* - * amount of bitmap blocks needed to address - * each block of file system - */ - __le16 s_bmap_nr; - - /* - * this field is only reliable on filesystem with non-standard journal - */ - __le16 s_version; - - /* - * size in blocks of journal area on main device, we need to - * keep after making fs with non-standard journal - */ - __le16 s_reserved_for_journal; -} __attribute__ ((__packed__)); - -#define SB_SIZE_V1 (sizeof(struct reiserfs_super_block_v1)) - -/* this is the on disk super block */ -struct reiserfs_super_block { - struct reiserfs_super_block_v1 s_v1; - __le32 s_inode_generation; - - /* Right now used only by inode-attributes, if enabled */ - __le32 s_flags; - - unsigned char s_uuid[16]; /* filesystem unique identifier */ - unsigned char s_label[16]; /* filesystem volume label */ - __le16 s_mnt_count; /* Count of mounts since last fsck */ - __le16 s_max_mnt_count; /* Maximum mounts before check */ - __le32 s_lastcheck; /* Timestamp of last fsck */ - __le32 s_check_interval; /* Interval between checks */ - - /* - * zero filled by mkreiserfs and reiserfs_convert_objectid_map_v1() - * so any additions must be updated there as well. */ - char s_unused[76]; -} __attribute__ ((__packed__)); - -#define SB_SIZE (sizeof(struct reiserfs_super_block)) - -#define REISERFS_VERSION_1 0 -#define REISERFS_VERSION_2 2 - -/* on-disk super block fields converted to cpu form */ -#define SB_DISK_SUPER_BLOCK(s) (REISERFS_SB(s)->s_rs) -#define SB_V1_DISK_SUPER_BLOCK(s) (&(SB_DISK_SUPER_BLOCK(s)->s_v1)) -#define SB_BLOCKSIZE(s) \ - le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_blocksize)) -#define SB_BLOCK_COUNT(s) \ - le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_block_count)) -#define SB_FREE_BLOCKS(s) \ - le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_free_blocks)) -#define SB_REISERFS_MAGIC(s) \ - (SB_V1_DISK_SUPER_BLOCK(s)->s_magic) -#define SB_ROOT_BLOCK(s) \ - le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_root_block)) -#define SB_TREE_HEIGHT(s) \ - le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_tree_height)) -#define SB_REISERFS_STATE(s) \ - le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_umount_state)) -#define SB_VERSION(s) le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_version)) -#define SB_BMAP_NR(s) le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_bmap_nr)) - -#define PUT_SB_BLOCK_COUNT(s, val) \ - do { SB_V1_DISK_SUPER_BLOCK(s)->s_block_count = cpu_to_le32(val); } while (0) -#define PUT_SB_FREE_BLOCKS(s, val) \ - do { SB_V1_DISK_SUPER_BLOCK(s)->s_free_blocks = cpu_to_le32(val); } while (0) -#define PUT_SB_ROOT_BLOCK(s, val) \ - do { SB_V1_DISK_SUPER_BLOCK(s)->s_root_block = cpu_to_le32(val); } while (0) -#define PUT_SB_TREE_HEIGHT(s, val) \ - do { SB_V1_DISK_SUPER_BLOCK(s)->s_tree_height = cpu_to_le16(val); } while (0) -#define PUT_SB_REISERFS_STATE(s, val) \ - do { SB_V1_DISK_SUPER_BLOCK(s)->s_umount_state = cpu_to_le16(val); } while (0) -#define PUT_SB_VERSION(s, val) \ - do { SB_V1_DISK_SUPER_BLOCK(s)->s_version = cpu_to_le16(val); } while (0) -#define PUT_SB_BMAP_NR(s, val) \ - do { SB_V1_DISK_SUPER_BLOCK(s)->s_bmap_nr = cpu_to_le16 (val); } while (0) - -#define SB_ONDISK_JP(s) (&SB_V1_DISK_SUPER_BLOCK(s)->s_journal) -#define SB_ONDISK_JOURNAL_SIZE(s) \ - le32_to_cpu ((SB_ONDISK_JP(s)->jp_journal_size)) -#define SB_ONDISK_JOURNAL_1st_BLOCK(s) \ - le32_to_cpu ((SB_ONDISK_JP(s)->jp_journal_1st_block)) -#define SB_ONDISK_JOURNAL_DEVICE(s) \ - le32_to_cpu ((SB_ONDISK_JP(s)->jp_journal_dev)) -#define SB_ONDISK_RESERVED_FOR_JOURNAL(s) \ - le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_reserved_for_journal)) - -#define is_block_in_log_or_reserved_area(s, block) \ - block >= SB_JOURNAL_1st_RESERVED_BLOCK(s) \ - && block < SB_JOURNAL_1st_RESERVED_BLOCK(s) + \ - ((!is_reiserfs_jr(SB_DISK_SUPER_BLOCK(s)) ? \ - SB_ONDISK_JOURNAL_SIZE(s) + 1 : SB_ONDISK_RESERVED_FOR_JOURNAL(s))) - -int is_reiserfs_3_5(struct reiserfs_super_block *rs); -int is_reiserfs_3_6(struct reiserfs_super_block *rs); -int is_reiserfs_jr(struct reiserfs_super_block *rs); - -/* - * ReiserFS leaves the first 64k unused, so that partition labels have - * enough space. If someone wants to write a fancy bootloader that - * needs more than 64k, let us know, and this will be increased in size. - * This number must be larger than the largest block size on any - * platform, or code will break. -Hans - */ -#define REISERFS_DISK_OFFSET_IN_BYTES (64 * 1024) -#define REISERFS_FIRST_BLOCK unused_define -#define REISERFS_JOURNAL_OFFSET_IN_BYTES REISERFS_DISK_OFFSET_IN_BYTES - -/* the spot for the super in versions 3.5 - 3.5.10 (inclusive) */ -#define REISERFS_OLD_DISK_OFFSET_IN_BYTES (8 * 1024) - -/* reiserfs internal error code (used by search_by_key and fix_nodes)) */ -#define CARRY_ON 0 -#define REPEAT_SEARCH -1 -#define IO_ERROR -2 -#define NO_DISK_SPACE -3 -#define NO_BALANCING_NEEDED (-4) -#define NO_MORE_UNUSED_CONTIGUOUS_BLOCKS (-5) -#define QUOTA_EXCEEDED -6 - -typedef __u32 b_blocknr_t; -typedef __le32 unp_t; - -struct unfm_nodeinfo { - unp_t unfm_nodenum; - unsigned short unfm_freespace; -}; - -/* there are two formats of keys: 3.5 and 3.6 */ -#define KEY_FORMAT_3_5 0 -#define KEY_FORMAT_3_6 1 - -/* there are two stat datas */ -#define STAT_DATA_V1 0 -#define STAT_DATA_V2 1 - -static inline struct reiserfs_inode_info *REISERFS_I(const struct inode *inode) -{ - return container_of(inode, struct reiserfs_inode_info, vfs_inode); -} - -static inline struct reiserfs_sb_info *REISERFS_SB(const struct super_block *sb) -{ - return sb->s_fs_info; -} - -/* - * Don't trust REISERFS_SB(sb)->s_bmap_nr, it's a u16 - * which overflows on large file systems. - */ -static inline __u32 reiserfs_bmap_count(struct super_block *sb) -{ - return (SB_BLOCK_COUNT(sb) - 1) / (sb->s_blocksize * 8) + 1; -} - -static inline int bmap_would_wrap(unsigned bmap_nr) -{ - return bmap_nr > ((1LL << 16) - 1); -} - -extern const struct xattr_handler * const reiserfs_xattr_handlers[]; - -/* - * this says about version of key of all items (but stat data) the - * object consists of - */ -#define get_inode_item_key_version( inode ) \ - ((REISERFS_I(inode)->i_flags & i_item_key_version_mask) ? KEY_FORMAT_3_6 : KEY_FORMAT_3_5) - -#define set_inode_item_key_version( inode, version ) \ - ({ if((version)==KEY_FORMAT_3_6) \ - REISERFS_I(inode)->i_flags |= i_item_key_version_mask; \ - else \ - REISERFS_I(inode)->i_flags &= ~i_item_key_version_mask; }) - -#define get_inode_sd_version(inode) \ - ((REISERFS_I(inode)->i_flags & i_stat_data_version_mask) ? STAT_DATA_V2 : STAT_DATA_V1) - -#define set_inode_sd_version(inode, version) \ - ({ if((version)==STAT_DATA_V2) \ - REISERFS_I(inode)->i_flags |= i_stat_data_version_mask; \ - else \ - REISERFS_I(inode)->i_flags &= ~i_stat_data_version_mask; }) - -/* - * This is an aggressive tail suppression policy, I am hoping it - * improves our benchmarks. The principle behind it is that percentage - * space saving is what matters, not absolute space saving. This is - * non-intuitive, but it helps to understand it if you consider that the - * cost to access 4 blocks is not much more than the cost to access 1 - * block, if you have to do a seek and rotate. A tail risks a - * non-linear disk access that is significant as a percentage of total - * time cost for a 4 block file and saves an amount of space that is - * less significant as a percentage of space, or so goes the hypothesis. - * -Hans - */ -#define STORE_TAIL_IN_UNFM_S1(n_file_size,n_tail_size,n_block_size) \ -(\ - (!(n_tail_size)) || \ - (((n_tail_size) > MAX_DIRECT_ITEM_LEN(n_block_size)) || \ - ( (n_file_size) >= (n_block_size) * 4 ) || \ - ( ( (n_file_size) >= (n_block_size) * 3 ) && \ - ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size))/4) ) || \ - ( ( (n_file_size) >= (n_block_size) * 2 ) && \ - ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size))/2) ) || \ - ( ( (n_file_size) >= (n_block_size) ) && \ - ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size) * 3)/4) ) ) \ -) - -/* - * Another strategy for tails, this one means only create a tail if all the - * file would fit into one DIRECT item. - * Primary intention for this one is to increase performance by decreasing - * seeking. -*/ -#define STORE_TAIL_IN_UNFM_S2(n_file_size,n_tail_size,n_block_size) \ -(\ - (!(n_tail_size)) || \ - (((n_file_size) > MAX_DIRECT_ITEM_LEN(n_block_size)) ) \ -) - -/* - * values for s_umount_state field - */ -#define REISERFS_VALID_FS 1 -#define REISERFS_ERROR_FS 2 - -/* - * there are 5 item types currently - */ -#define TYPE_STAT_DATA 0 -#define TYPE_INDIRECT 1 -#define TYPE_DIRECT 2 -#define TYPE_DIRENTRY 3 -#define TYPE_MAXTYPE 3 -#define TYPE_ANY 15 /* FIXME: comment is required */ - -/*************************************************************************** - * KEY & ITEM HEAD * - ***************************************************************************/ - -/* * directories use this key as well as old files */ -struct offset_v1 { - __le32 k_offset; - __le32 k_uniqueness; -} __attribute__ ((__packed__)); - -struct offset_v2 { - __le64 v; -} __attribute__ ((__packed__)); - -static inline __u16 offset_v2_k_type(const struct offset_v2 *v2) -{ - __u8 type = le64_to_cpu(v2->v) >> 60; - return (type <= TYPE_MAXTYPE) ? type : TYPE_ANY; -} - -static inline void set_offset_v2_k_type(struct offset_v2 *v2, int type) -{ - v2->v = - (v2->v & cpu_to_le64(~0ULL >> 4)) | cpu_to_le64((__u64) type << 60); -} - -static inline loff_t offset_v2_k_offset(const struct offset_v2 *v2) -{ - return le64_to_cpu(v2->v) & (~0ULL >> 4); -} - -static inline void set_offset_v2_k_offset(struct offset_v2 *v2, loff_t offset) -{ - offset &= (~0ULL >> 4); - v2->v = (v2->v & cpu_to_le64(15ULL << 60)) | cpu_to_le64(offset); -} - -/* - * Key of an item determines its location in the S+tree, and - * is composed of 4 components - */ -struct reiserfs_key { - /* packing locality: by default parent directory object id */ - __le32 k_dir_id; - - __le32 k_objectid; /* object identifier */ - union { - struct offset_v1 k_offset_v1; - struct offset_v2 k_offset_v2; - } __attribute__ ((__packed__)) u; -} __attribute__ ((__packed__)); - -struct in_core_key { - /* packing locality: by default parent directory object id */ - __u32 k_dir_id; - __u32 k_objectid; /* object identifier */ - __u64 k_offset; - __u8 k_type; -}; - -struct cpu_key { - struct in_core_key on_disk_key; - int version; - /* 3 in all cases but direct2indirect and indirect2direct conversion */ - int key_length; -}; - -/* - * Our function for comparing keys can compare keys of different - * lengths. It takes as a parameter the length of the keys it is to - * compare. These defines are used in determining what is to be passed - * to it as that parameter. - */ -#define REISERFS_FULL_KEY_LEN 4 -#define REISERFS_SHORT_KEY_LEN 2 - -/* The result of the key compare */ -#define FIRST_GREATER 1 -#define SECOND_GREATER -1 -#define KEYS_IDENTICAL 0 -#define KEY_FOUND 1 -#define KEY_NOT_FOUND 0 - -#define KEY_SIZE (sizeof(struct reiserfs_key)) - -/* return values for search_by_key and clones */ -#define ITEM_FOUND 1 -#define ITEM_NOT_FOUND 0 -#define ENTRY_FOUND 1 -#define ENTRY_NOT_FOUND 0 -#define DIRECTORY_NOT_FOUND -1 -#define REGULAR_FILE_FOUND -2 -#define DIRECTORY_FOUND -3 -#define BYTE_FOUND 1 -#define BYTE_NOT_FOUND 0 -#define FILE_NOT_FOUND -1 - -#define POSITION_FOUND 1 -#define POSITION_NOT_FOUND 0 - -/* return values for reiserfs_find_entry and search_by_entry_key */ -#define NAME_FOUND 1 -#define NAME_NOT_FOUND 0 -#define GOTO_PREVIOUS_ITEM 2 -#define NAME_FOUND_INVISIBLE 3 - -/* - * Everything in the filesystem is stored as a set of items. The - * item head contains the key of the item, its free space (for - * indirect items) and specifies the location of the item itself - * within the block. - */ - -struct item_head { - /* - * Everything in the tree is found by searching for it based on - * its key. - */ - struct reiserfs_key ih_key; - union { - /* - * The free space in the last unformatted node of an - * indirect item if this is an indirect item. This - * equals 0xFFFF iff this is a direct item or stat data - * item. Note that the key, not this field, is used to - * determine the item type, and thus which field this - * union contains. - */ - __le16 ih_free_space_reserved; - - /* - * Iff this is a directory item, this field equals the - * number of directory entries in the directory item. - */ - __le16 ih_entry_count; - } __attribute__ ((__packed__)) u; - __le16 ih_item_len; /* total size of the item body */ - - /* an offset to the item body within the block */ - __le16 ih_item_location; - - /* - * 0 for all old items, 2 for new ones. Highest bit is set by fsck - * temporary, cleaned after all done - */ - __le16 ih_version; -} __attribute__ ((__packed__)); -/* size of item header */ -#define IH_SIZE (sizeof(struct item_head)) - -#define ih_free_space(ih) le16_to_cpu((ih)->u.ih_free_space_reserved) -#define ih_version(ih) le16_to_cpu((ih)->ih_version) -#define ih_entry_count(ih) le16_to_cpu((ih)->u.ih_entry_count) -#define ih_location(ih) le16_to_cpu((ih)->ih_item_location) -#define ih_item_len(ih) le16_to_cpu((ih)->ih_item_len) - -#define put_ih_free_space(ih, val) do { (ih)->u.ih_free_space_reserved = cpu_to_le16(val); } while(0) -#define put_ih_version(ih, val) do { (ih)->ih_version = cpu_to_le16(val); } while (0) -#define put_ih_entry_count(ih, val) do { (ih)->u.ih_entry_count = cpu_to_le16(val); } while (0) -#define put_ih_location(ih, val) do { (ih)->ih_item_location = cpu_to_le16(val); } while (0) -#define put_ih_item_len(ih, val) do { (ih)->ih_item_len = cpu_to_le16(val); } while (0) - -#define unreachable_item(ih) (ih_version(ih) & (1 << 15)) - -#define get_ih_free_space(ih) (ih_version (ih) == KEY_FORMAT_3_6 ? 0 : ih_free_space (ih)) -#define set_ih_free_space(ih,val) put_ih_free_space((ih), ((ih_version(ih) == KEY_FORMAT_3_6) ? 0 : (val))) - -/* - * these operate on indirect items, where you've got an array of ints - * at a possibly unaligned location. These are a noop on ia32 - * - * p is the array of __u32, i is the index into the array, v is the value - * to store there. - */ -#define get_block_num(p, i) get_unaligned_le32((p) + (i)) -#define put_block_num(p, i, v) put_unaligned_le32((v), (p) + (i)) - -/* * in old version uniqueness field shows key type */ -#define V1_SD_UNIQUENESS 0 -#define V1_INDIRECT_UNIQUENESS 0xfffffffe -#define V1_DIRECT_UNIQUENESS 0xffffffff -#define V1_DIRENTRY_UNIQUENESS 500 -#define V1_ANY_UNIQUENESS 555 /* FIXME: comment is required */ - -/* here are conversion routines */ -static inline int uniqueness2type(__u32 uniqueness) CONSTF; -static inline int uniqueness2type(__u32 uniqueness) -{ - switch ((int)uniqueness) { - case V1_SD_UNIQUENESS: - return TYPE_STAT_DATA; - case V1_INDIRECT_UNIQUENESS: - return TYPE_INDIRECT; - case V1_DIRECT_UNIQUENESS: - return TYPE_DIRECT; - case V1_DIRENTRY_UNIQUENESS: - return TYPE_DIRENTRY; - case V1_ANY_UNIQUENESS: - default: - return TYPE_ANY; - } -} - -static inline __u32 type2uniqueness(int type) CONSTF; -static inline __u32 type2uniqueness(int type) -{ - switch (type) { - case TYPE_STAT_DATA: - return V1_SD_UNIQUENESS; - case TYPE_INDIRECT: - return V1_INDIRECT_UNIQUENESS; - case TYPE_DIRECT: - return V1_DIRECT_UNIQUENESS; - case TYPE_DIRENTRY: - return V1_DIRENTRY_UNIQUENESS; - case TYPE_ANY: - default: - return V1_ANY_UNIQUENESS; - } -} - -/* - * key is pointer to on disk key which is stored in le, result is cpu, - * there is no way to get version of object from key, so, provide - * version to these defines - */ -static inline loff_t le_key_k_offset(int version, - const struct reiserfs_key *key) -{ - return (version == KEY_FORMAT_3_5) ? - le32_to_cpu(key->u.k_offset_v1.k_offset) : - offset_v2_k_offset(&(key->u.k_offset_v2)); -} - -static inline loff_t le_ih_k_offset(const struct item_head *ih) -{ - return le_key_k_offset(ih_version(ih), &(ih->ih_key)); -} - -static inline loff_t le_key_k_type(int version, const struct reiserfs_key *key) -{ - if (version == KEY_FORMAT_3_5) { - loff_t val = le32_to_cpu(key->u.k_offset_v1.k_uniqueness); - return uniqueness2type(val); - } else - return offset_v2_k_type(&(key->u.k_offset_v2)); -} - -static inline loff_t le_ih_k_type(const struct item_head *ih) -{ - return le_key_k_type(ih_version(ih), &(ih->ih_key)); -} - -static inline void set_le_key_k_offset(int version, struct reiserfs_key *key, - loff_t offset) -{ - if (version == KEY_FORMAT_3_5) - key->u.k_offset_v1.k_offset = cpu_to_le32(offset); - else - set_offset_v2_k_offset(&key->u.k_offset_v2, offset); -} - -static inline void add_le_key_k_offset(int version, struct reiserfs_key *key, - loff_t offset) -{ - set_le_key_k_offset(version, key, - le_key_k_offset(version, key) + offset); -} - -static inline void add_le_ih_k_offset(struct item_head *ih, loff_t offset) -{ - add_le_key_k_offset(ih_version(ih), &(ih->ih_key), offset); -} - -static inline void set_le_ih_k_offset(struct item_head *ih, loff_t offset) -{ - set_le_key_k_offset(ih_version(ih), &(ih->ih_key), offset); -} - -static inline void set_le_key_k_type(int version, struct reiserfs_key *key, - int type) -{ - if (version == KEY_FORMAT_3_5) { - type = type2uniqueness(type); - key->u.k_offset_v1.k_uniqueness = cpu_to_le32(type); - } else - set_offset_v2_k_type(&key->u.k_offset_v2, type); -} - -static inline void set_le_ih_k_type(struct item_head *ih, int type) -{ - set_le_key_k_type(ih_version(ih), &(ih->ih_key), type); -} - -static inline int is_direntry_le_key(int version, struct reiserfs_key *key) -{ - return le_key_k_type(version, key) == TYPE_DIRENTRY; -} - -static inline int is_direct_le_key(int version, struct reiserfs_key *key) -{ - return le_key_k_type(version, key) == TYPE_DIRECT; -} - -static inline int is_indirect_le_key(int version, struct reiserfs_key *key) -{ - return le_key_k_type(version, key) == TYPE_INDIRECT; -} - -static inline int is_statdata_le_key(int version, struct reiserfs_key *key) -{ - return le_key_k_type(version, key) == TYPE_STAT_DATA; -} - -/* item header has version. */ -static inline int is_direntry_le_ih(struct item_head *ih) -{ - return is_direntry_le_key(ih_version(ih), &ih->ih_key); -} - -static inline int is_direct_le_ih(struct item_head *ih) -{ - return is_direct_le_key(ih_version(ih), &ih->ih_key); -} - -static inline int is_indirect_le_ih(struct item_head *ih) -{ - return is_indirect_le_key(ih_version(ih), &ih->ih_key); -} - -static inline int is_statdata_le_ih(struct item_head *ih) -{ - return is_statdata_le_key(ih_version(ih), &ih->ih_key); -} - -/* key is pointer to cpu key, result is cpu */ -static inline loff_t cpu_key_k_offset(const struct cpu_key *key) -{ - return key->on_disk_key.k_offset; -} - -static inline loff_t cpu_key_k_type(const struct cpu_key *key) -{ - return key->on_disk_key.k_type; -} - -static inline void set_cpu_key_k_offset(struct cpu_key *key, loff_t offset) -{ - key->on_disk_key.k_offset = offset; -} - -static inline void set_cpu_key_k_type(struct cpu_key *key, int type) -{ - key->on_disk_key.k_type = type; -} - -static inline void cpu_key_k_offset_dec(struct cpu_key *key) -{ - key->on_disk_key.k_offset--; -} - -#define is_direntry_cpu_key(key) (cpu_key_k_type (key) == TYPE_DIRENTRY) -#define is_direct_cpu_key(key) (cpu_key_k_type (key) == TYPE_DIRECT) -#define is_indirect_cpu_key(key) (cpu_key_k_type (key) == TYPE_INDIRECT) -#define is_statdata_cpu_key(key) (cpu_key_k_type (key) == TYPE_STAT_DATA) - -/* are these used ? */ -#define is_direntry_cpu_ih(ih) (is_direntry_cpu_key (&((ih)->ih_key))) -#define is_direct_cpu_ih(ih) (is_direct_cpu_key (&((ih)->ih_key))) -#define is_indirect_cpu_ih(ih) (is_indirect_cpu_key (&((ih)->ih_key))) -#define is_statdata_cpu_ih(ih) (is_statdata_cpu_key (&((ih)->ih_key))) - -#define I_K_KEY_IN_ITEM(ih, key, n_blocksize) \ - (!COMP_SHORT_KEYS(ih, key) && \ - I_OFF_BYTE_IN_ITEM(ih, k_offset(key), n_blocksize)) - -/* maximal length of item */ -#define MAX_ITEM_LEN(block_size) (block_size - BLKH_SIZE - IH_SIZE) -#define MIN_ITEM_LEN 1 - -/* object identifier for root dir */ -#define REISERFS_ROOT_OBJECTID 2 -#define REISERFS_ROOT_PARENT_OBJECTID 1 - -extern struct reiserfs_key root_key; - -/* - * Picture represents a leaf of the S+tree - * ______________________________________________________ - * | | Array of | | | - * |Block | Object-Item | F r e e | Objects- | - * | head | Headers | S p a c e | Items | - * |______|_______________|___________________|___________| - */ - -/* - * Header of a disk block. More precisely, header of a formatted leaf - * or internal node, and not the header of an unformatted node. - */ -struct block_head { - __le16 blk_level; /* Level of a block in the tree. */ - __le16 blk_nr_item; /* Number of keys/items in a block. */ - __le16 blk_free_space; /* Block free space in bytes. */ - __le16 blk_reserved; - /* dump this in v4/planA */ - - /* kept only for compatibility */ - struct reiserfs_key blk_right_delim_key; -}; - -#define BLKH_SIZE (sizeof(struct block_head)) -#define blkh_level(p_blkh) (le16_to_cpu((p_blkh)->blk_level)) -#define blkh_nr_item(p_blkh) (le16_to_cpu((p_blkh)->blk_nr_item)) -#define blkh_free_space(p_blkh) (le16_to_cpu((p_blkh)->blk_free_space)) -#define blkh_reserved(p_blkh) (le16_to_cpu((p_blkh)->blk_reserved)) -#define set_blkh_level(p_blkh,val) ((p_blkh)->blk_level = cpu_to_le16(val)) -#define set_blkh_nr_item(p_blkh,val) ((p_blkh)->blk_nr_item = cpu_to_le16(val)) -#define set_blkh_free_space(p_blkh,val) ((p_blkh)->blk_free_space = cpu_to_le16(val)) -#define set_blkh_reserved(p_blkh,val) ((p_blkh)->blk_reserved = cpu_to_le16(val)) -#define blkh_right_delim_key(p_blkh) ((p_blkh)->blk_right_delim_key) -#define set_blkh_right_delim_key(p_blkh,val) ((p_blkh)->blk_right_delim_key = val) - -/* values for blk_level field of the struct block_head */ - -/* - * When node gets removed from the tree its blk_level is set to FREE_LEVEL. - * It is then used to see whether the node is still in the tree - */ -#define FREE_LEVEL 0 - -#define DISK_LEAF_NODE_LEVEL 1 /* Leaf node level. */ - -/* - * Given the buffer head of a formatted node, resolve to the - * block head of that node. - */ -#define B_BLK_HEAD(bh) ((struct block_head *)((bh)->b_data)) -/* Number of items that are in buffer. */ -#define B_NR_ITEMS(bh) (blkh_nr_item(B_BLK_HEAD(bh))) -#define B_LEVEL(bh) (blkh_level(B_BLK_HEAD(bh))) -#define B_FREE_SPACE(bh) (blkh_free_space(B_BLK_HEAD(bh))) - -#define PUT_B_NR_ITEMS(bh, val) do { set_blkh_nr_item(B_BLK_HEAD(bh), val); } while (0) -#define PUT_B_LEVEL(bh, val) do { set_blkh_level(B_BLK_HEAD(bh), val); } while (0) -#define PUT_B_FREE_SPACE(bh, val) do { set_blkh_free_space(B_BLK_HEAD(bh), val); } while (0) - -/* Get right delimiting key. -- little endian */ -#define B_PRIGHT_DELIM_KEY(bh) (&(blk_right_delim_key(B_BLK_HEAD(bh)))) - -/* Does the buffer contain a disk leaf. */ -#define B_IS_ITEMS_LEVEL(bh) (B_LEVEL(bh) == DISK_LEAF_NODE_LEVEL) - -/* Does the buffer contain a disk internal node */ -#define B_IS_KEYS_LEVEL(bh) (B_LEVEL(bh) > DISK_LEAF_NODE_LEVEL \ - && B_LEVEL(bh) <= MAX_HEIGHT) - -/*************************************************************************** - * STAT DATA * - ***************************************************************************/ - -/* - * old stat data is 32 bytes long. We are going to distinguish new one by - * different size -*/ -struct stat_data_v1 { - __le16 sd_mode; /* file type, permissions */ - __le16 sd_nlink; /* number of hard links */ - __le16 sd_uid; /* owner */ - __le16 sd_gid; /* group */ - __le32 sd_size; /* file size */ - __le32 sd_atime; /* time of last access */ - __le32 sd_mtime; /* time file was last modified */ - - /* - * time inode (stat data) was last changed - * (except changes to sd_atime and sd_mtime) - */ - __le32 sd_ctime; - union { - __le32 sd_rdev; - __le32 sd_blocks; /* number of blocks file uses */ - } __attribute__ ((__packed__)) u; - - /* - * first byte of file which is stored in a direct item: except that if - * it equals 1 it is a symlink and if it equals ~(__u32)0 there is no - * direct item. The existence of this field really grates on me. - * Let's replace it with a macro based on sd_size and our tail - * suppression policy. Someday. -Hans - */ - __le32 sd_first_direct_byte; -} __attribute__ ((__packed__)); - -#define SD_V1_SIZE (sizeof(struct stat_data_v1)) -#define stat_data_v1(ih) (ih_version (ih) == KEY_FORMAT_3_5) -#define sd_v1_mode(sdp) (le16_to_cpu((sdp)->sd_mode)) -#define set_sd_v1_mode(sdp,v) ((sdp)->sd_mode = cpu_to_le16(v)) -#define sd_v1_nlink(sdp) (le16_to_cpu((sdp)->sd_nlink)) -#define set_sd_v1_nlink(sdp,v) ((sdp)->sd_nlink = cpu_to_le16(v)) -#define sd_v1_uid(sdp) (le16_to_cpu((sdp)->sd_uid)) -#define set_sd_v1_uid(sdp,v) ((sdp)->sd_uid = cpu_to_le16(v)) -#define sd_v1_gid(sdp) (le16_to_cpu((sdp)->sd_gid)) -#define set_sd_v1_gid(sdp,v) ((sdp)->sd_gid = cpu_to_le16(v)) -#define sd_v1_size(sdp) (le32_to_cpu((sdp)->sd_size)) -#define set_sd_v1_size(sdp,v) ((sdp)->sd_size = cpu_to_le32(v)) -#define sd_v1_atime(sdp) (le32_to_cpu((sdp)->sd_atime)) -#define set_sd_v1_atime(sdp,v) ((sdp)->sd_atime = cpu_to_le32(v)) -#define sd_v1_mtime(sdp) (le32_to_cpu((sdp)->sd_mtime)) -#define set_sd_v1_mtime(sdp,v) ((sdp)->sd_mtime = cpu_to_le32(v)) -#define sd_v1_ctime(sdp) (le32_to_cpu((sdp)->sd_ctime)) -#define set_sd_v1_ctime(sdp,v) ((sdp)->sd_ctime = cpu_to_le32(v)) -#define sd_v1_rdev(sdp) (le32_to_cpu((sdp)->u.sd_rdev)) -#define set_sd_v1_rdev(sdp,v) ((sdp)->u.sd_rdev = cpu_to_le32(v)) -#define sd_v1_blocks(sdp) (le32_to_cpu((sdp)->u.sd_blocks)) -#define set_sd_v1_blocks(sdp,v) ((sdp)->u.sd_blocks = cpu_to_le32(v)) -#define sd_v1_first_direct_byte(sdp) \ - (le32_to_cpu((sdp)->sd_first_direct_byte)) -#define set_sd_v1_first_direct_byte(sdp,v) \ - ((sdp)->sd_first_direct_byte = cpu_to_le32(v)) - -/* inode flags stored in sd_attrs (nee sd_reserved) */ - -/* - * we want common flags to have the same values as in ext2, - * so chattr(1) will work without problems - */ -#define REISERFS_IMMUTABLE_FL FS_IMMUTABLE_FL -#define REISERFS_APPEND_FL FS_APPEND_FL -#define REISERFS_SYNC_FL FS_SYNC_FL -#define REISERFS_NOATIME_FL FS_NOATIME_FL -#define REISERFS_NODUMP_FL FS_NODUMP_FL -#define REISERFS_SECRM_FL FS_SECRM_FL -#define REISERFS_UNRM_FL FS_UNRM_FL -#define REISERFS_COMPR_FL FS_COMPR_FL -#define REISERFS_NOTAIL_FL FS_NOTAIL_FL - -/* persistent flags that file inherits from the parent directory */ -#define REISERFS_INHERIT_MASK ( REISERFS_IMMUTABLE_FL | \ - REISERFS_SYNC_FL | \ - REISERFS_NOATIME_FL | \ - REISERFS_NODUMP_FL | \ - REISERFS_SECRM_FL | \ - REISERFS_COMPR_FL | \ - REISERFS_NOTAIL_FL ) - -/* - * Stat Data on disk (reiserfs version of UFS disk inode minus the - * address blocks) - */ -struct stat_data { - __le16 sd_mode; /* file type, permissions */ - __le16 sd_attrs; /* persistent inode flags */ - __le32 sd_nlink; /* number of hard links */ - __le64 sd_size; /* file size */ - __le32 sd_uid; /* owner */ - __le32 sd_gid; /* group */ - __le32 sd_atime; /* time of last access */ - __le32 sd_mtime; /* time file was last modified */ - - /* - * time inode (stat data) was last changed - * (except changes to sd_atime and sd_mtime) - */ - __le32 sd_ctime; - __le32 sd_blocks; - union { - __le32 sd_rdev; - __le32 sd_generation; - } __attribute__ ((__packed__)) u; -} __attribute__ ((__packed__)); - -/* this is 44 bytes long */ -#define SD_SIZE (sizeof(struct stat_data)) -#define SD_V2_SIZE SD_SIZE -#define stat_data_v2(ih) (ih_version (ih) == KEY_FORMAT_3_6) -#define sd_v2_mode(sdp) (le16_to_cpu((sdp)->sd_mode)) -#define set_sd_v2_mode(sdp,v) ((sdp)->sd_mode = cpu_to_le16(v)) -/* sd_reserved */ -/* set_sd_reserved */ -#define sd_v2_nlink(sdp) (le32_to_cpu((sdp)->sd_nlink)) -#define set_sd_v2_nlink(sdp,v) ((sdp)->sd_nlink = cpu_to_le32(v)) -#define sd_v2_size(sdp) (le64_to_cpu((sdp)->sd_size)) -#define set_sd_v2_size(sdp,v) ((sdp)->sd_size = cpu_to_le64(v)) -#define sd_v2_uid(sdp) (le32_to_cpu((sdp)->sd_uid)) -#define set_sd_v2_uid(sdp,v) ((sdp)->sd_uid = cpu_to_le32(v)) -#define sd_v2_gid(sdp) (le32_to_cpu((sdp)->sd_gid)) -#define set_sd_v2_gid(sdp,v) ((sdp)->sd_gid = cpu_to_le32(v)) -#define sd_v2_atime(sdp) (le32_to_cpu((sdp)->sd_atime)) -#define set_sd_v2_atime(sdp,v) ((sdp)->sd_atime = cpu_to_le32(v)) -#define sd_v2_mtime(sdp) (le32_to_cpu((sdp)->sd_mtime)) -#define set_sd_v2_mtime(sdp,v) ((sdp)->sd_mtime = cpu_to_le32(v)) -#define sd_v2_ctime(sdp) (le32_to_cpu((sdp)->sd_ctime)) -#define set_sd_v2_ctime(sdp,v) ((sdp)->sd_ctime = cpu_to_le32(v)) -#define sd_v2_blocks(sdp) (le32_to_cpu((sdp)->sd_blocks)) -#define set_sd_v2_blocks(sdp,v) ((sdp)->sd_blocks = cpu_to_le32(v)) -#define sd_v2_rdev(sdp) (le32_to_cpu((sdp)->u.sd_rdev)) -#define set_sd_v2_rdev(sdp,v) ((sdp)->u.sd_rdev = cpu_to_le32(v)) -#define sd_v2_generation(sdp) (le32_to_cpu((sdp)->u.sd_generation)) -#define set_sd_v2_generation(sdp,v) ((sdp)->u.sd_generation = cpu_to_le32(v)) -#define sd_v2_attrs(sdp) (le16_to_cpu((sdp)->sd_attrs)) -#define set_sd_v2_attrs(sdp,v) ((sdp)->sd_attrs = cpu_to_le16(v)) - -/*************************************************************************** - * DIRECTORY STRUCTURE * - ***************************************************************************/ -/* - * Picture represents the structure of directory items - * ________________________________________________ - * | Array of | | | | | | - * | directory |N-1| N-2 | .... | 1st |0th| - * | entry headers | | | | | | - * |_______________|___|_____|________|_______|___| - * <---- directory entries ------> - * - * First directory item has k_offset component 1. We store "." and ".." - * in one item, always, we never split "." and ".." into differing - * items. This makes, among other things, the code for removing - * directories simpler. - */ -#define SD_OFFSET 0 -#define SD_UNIQUENESS 0 -#define DOT_OFFSET 1 -#define DOT_DOT_OFFSET 2 -#define DIRENTRY_UNIQUENESS 500 - -#define FIRST_ITEM_OFFSET 1 - -/* - * Q: How to get key of object pointed to by entry from entry? - * - * A: Each directory entry has its header. This header has deh_dir_id - * and deh_objectid fields, those are key of object, entry points to - */ - -/* - * NOT IMPLEMENTED: - * Directory will someday contain stat data of object - */ - -struct reiserfs_de_head { - __le32 deh_offset; /* third component of the directory entry key */ - - /* - * objectid of the parent directory of the object, that is referenced - * by directory entry - */ - __le32 deh_dir_id; - - /* objectid of the object, that is referenced by directory entry */ - __le32 deh_objectid; - __le16 deh_location; /* offset of name in the whole item */ - - /* - * whether 1) entry contains stat data (for future), and - * 2) whether entry is hidden (unlinked) - */ - __le16 deh_state; -} __attribute__ ((__packed__)); -#define DEH_SIZE sizeof(struct reiserfs_de_head) -#define deh_offset(p_deh) (le32_to_cpu((p_deh)->deh_offset)) -#define deh_dir_id(p_deh) (le32_to_cpu((p_deh)->deh_dir_id)) -#define deh_objectid(p_deh) (le32_to_cpu((p_deh)->deh_objectid)) -#define deh_location(p_deh) (le16_to_cpu((p_deh)->deh_location)) -#define deh_state(p_deh) (le16_to_cpu((p_deh)->deh_state)) - -#define put_deh_offset(p_deh,v) ((p_deh)->deh_offset = cpu_to_le32((v))) -#define put_deh_dir_id(p_deh,v) ((p_deh)->deh_dir_id = cpu_to_le32((v))) -#define put_deh_objectid(p_deh,v) ((p_deh)->deh_objectid = cpu_to_le32((v))) -#define put_deh_location(p_deh,v) ((p_deh)->deh_location = cpu_to_le16((v))) -#define put_deh_state(p_deh,v) ((p_deh)->deh_state = cpu_to_le16((v))) - -/* empty directory contains two entries "." and ".." and their headers */ -#define EMPTY_DIR_SIZE \ -(DEH_SIZE * 2 + ROUND_UP (sizeof(".") - 1) + ROUND_UP (sizeof("..") - 1)) - -/* old format directories have this size when empty */ -#define EMPTY_DIR_SIZE_V1 (DEH_SIZE * 2 + 3) - -#define DEH_Statdata 0 /* not used now */ -#define DEH_Visible 2 - -/* 64 bit systems (and the S/390) need to be aligned explicitly -jdm */ -#if BITS_PER_LONG == 64 || defined(__s390__) || defined(__hppa__) -# define ADDR_UNALIGNED_BITS (3) -#endif - -/* - * These are only used to manipulate deh_state. - * Because of this, we'll use the ext2_ bit routines, - * since they are little endian - */ -#ifdef ADDR_UNALIGNED_BITS - -# define aligned_address(addr) ((void *)((long)(addr) & ~((1UL << ADDR_UNALIGNED_BITS) - 1))) -# define unaligned_offset(addr) (((int)((long)(addr) & ((1 << ADDR_UNALIGNED_BITS) - 1))) << 3) - -# define set_bit_unaligned(nr, addr) \ - __test_and_set_bit_le((nr) + unaligned_offset(addr), aligned_address(addr)) -# define clear_bit_unaligned(nr, addr) \ - __test_and_clear_bit_le((nr) + unaligned_offset(addr), aligned_address(addr)) -# define test_bit_unaligned(nr, addr) \ - test_bit_le((nr) + unaligned_offset(addr), aligned_address(addr)) - -#else - -# define set_bit_unaligned(nr, addr) __test_and_set_bit_le(nr, addr) -# define clear_bit_unaligned(nr, addr) __test_and_clear_bit_le(nr, addr) -# define test_bit_unaligned(nr, addr) test_bit_le(nr, addr) - -#endif - -#define mark_de_with_sd(deh) set_bit_unaligned (DEH_Statdata, &((deh)->deh_state)) -#define mark_de_without_sd(deh) clear_bit_unaligned (DEH_Statdata, &((deh)->deh_state)) -#define mark_de_visible(deh) set_bit_unaligned (DEH_Visible, &((deh)->deh_state)) -#define mark_de_hidden(deh) clear_bit_unaligned (DEH_Visible, &((deh)->deh_state)) - -#define de_with_sd(deh) test_bit_unaligned (DEH_Statdata, &((deh)->deh_state)) -#define de_visible(deh) test_bit_unaligned (DEH_Visible, &((deh)->deh_state)) -#define de_hidden(deh) !test_bit_unaligned (DEH_Visible, &((deh)->deh_state)) - -extern void make_empty_dir_item_v1(char *body, __le32 dirid, __le32 objid, - __le32 par_dirid, __le32 par_objid); -extern void make_empty_dir_item(char *body, __le32 dirid, __le32 objid, - __le32 par_dirid, __le32 par_objid); - -/* two entries per block (at least) */ -#define REISERFS_MAX_NAME(block_size) 255 - -/* - * this structure is used for operations on directory entries. It is - * not a disk structure. - * - * When reiserfs_find_entry or search_by_entry_key find directory - * entry, they return filled reiserfs_dir_entry structure - */ -struct reiserfs_dir_entry { - struct buffer_head *de_bh; - int de_item_num; - struct item_head *de_ih; - int de_entry_num; - struct reiserfs_de_head *de_deh; - int de_entrylen; - int de_namelen; - char *de_name; - unsigned long *de_gen_number_bit_string; - - __u32 de_dir_id; - __u32 de_objectid; - - struct cpu_key de_entry_key; -}; - -/* - * these defines are useful when a particular member of - * a reiserfs_dir_entry is needed - */ - -/* pointer to file name, stored in entry */ -#define B_I_DEH_ENTRY_FILE_NAME(bh, ih, deh) \ - (ih_item_body(bh, ih) + deh_location(deh)) - -/* length of name */ -#define I_DEH_N_ENTRY_FILE_NAME_LENGTH(ih,deh,entry_num) \ -(I_DEH_N_ENTRY_LENGTH (ih, deh, entry_num) - (de_with_sd (deh) ? SD_SIZE : 0)) - -/* hash value occupies bits from 7 up to 30 */ -#define GET_HASH_VALUE(offset) ((offset) & 0x7fffff80LL) -/* generation number occupies 7 bits starting from 0 up to 6 */ -#define GET_GENERATION_NUMBER(offset) ((offset) & 0x7fLL) -#define MAX_GENERATION_NUMBER 127 - -#define SET_GENERATION_NUMBER(offset,gen_number) (GET_HASH_VALUE(offset)|(gen_number)) - -/* - * Picture represents an internal node of the reiserfs tree - * ______________________________________________________ - * | | Array of | Array of | Free | - * |block | keys | pointers | space | - * | head | N | N+1 | | - * |______|_______________|___________________|___________| - */ - -/*************************************************************************** - * DISK CHILD * - ***************************************************************************/ -/* - * Disk child pointer: - * The pointer from an internal node of the tree to a node that is on disk. - */ -struct disk_child { - __le32 dc_block_number; /* Disk child's block number. */ - __le16 dc_size; /* Disk child's used space. */ - __le16 dc_reserved; -}; - -#define DC_SIZE (sizeof(struct disk_child)) -#define dc_block_number(dc_p) (le32_to_cpu((dc_p)->dc_block_number)) -#define dc_size(dc_p) (le16_to_cpu((dc_p)->dc_size)) -#define put_dc_block_number(dc_p, val) do { (dc_p)->dc_block_number = cpu_to_le32(val); } while(0) -#define put_dc_size(dc_p, val) do { (dc_p)->dc_size = cpu_to_le16(val); } while(0) - -/* Get disk child by buffer header and position in the tree node. */ -#define B_N_CHILD(bh, n_pos) ((struct disk_child *)\ -((bh)->b_data + BLKH_SIZE + B_NR_ITEMS(bh) * KEY_SIZE + DC_SIZE * (n_pos))) - -/* Get disk child number by buffer header and position in the tree node. */ -#define B_N_CHILD_NUM(bh, n_pos) (dc_block_number(B_N_CHILD(bh, n_pos))) -#define PUT_B_N_CHILD_NUM(bh, n_pos, val) \ - (put_dc_block_number(B_N_CHILD(bh, n_pos), val)) - - /* maximal value of field child_size in structure disk_child */ - /* child size is the combined size of all items and their headers */ -#define MAX_CHILD_SIZE(bh) ((int)( (bh)->b_size - BLKH_SIZE )) - -/* amount of used space in buffer (not including block head) */ -#define B_CHILD_SIZE(cur) (MAX_CHILD_SIZE(cur)-(B_FREE_SPACE(cur))) - -/* max and min number of keys in internal node */ -#define MAX_NR_KEY(bh) ( (MAX_CHILD_SIZE(bh)-DC_SIZE)/(KEY_SIZE+DC_SIZE) ) -#define MIN_NR_KEY(bh) (MAX_NR_KEY(bh)/2) - -/*************************************************************************** - * PATH STRUCTURES AND DEFINES * - ***************************************************************************/ - -/* - * search_by_key fills up the path from the root to the leaf as it descends - * the tree looking for the key. It uses reiserfs_bread to try to find - * buffers in the cache given their block number. If it does not find - * them in the cache it reads them from disk. For each node search_by_key - * finds using reiserfs_bread it then uses bin_search to look through that - * node. bin_search will find the position of the block_number of the next - * node if it is looking through an internal node. If it is looking through - * a leaf node bin_search will find the position of the item which has key - * either equal to given key, or which is the maximal key less than the - * given key. - */ - -struct path_element { - /* Pointer to the buffer at the path in the tree. */ - struct buffer_head *pe_buffer; - /* Position in the tree node which is placed in the buffer above. */ - int pe_position; -}; - -/* - * maximal height of a tree. don't change this without - * changing JOURNAL_PER_BALANCE_CNT - */ -#define MAX_HEIGHT 5 - -/* Must be equals MAX_HEIGHT + FIRST_PATH_ELEMENT_OFFSET */ -#define EXTENDED_MAX_HEIGHT 7 - -/* Must be equal to at least 2. */ -#define FIRST_PATH_ELEMENT_OFFSET 2 - -/* Must be equal to FIRST_PATH_ELEMENT_OFFSET - 1 */ -#define ILLEGAL_PATH_ELEMENT_OFFSET 1 - -/* this MUST be MAX_HEIGHT + 1. See about FEB below */ -#define MAX_FEB_SIZE 6 - -/* - * We need to keep track of who the ancestors of nodes are. When we - * perform a search we record which nodes were visited while - * descending the tree looking for the node we searched for. This list - * of nodes is called the path. This information is used while - * performing balancing. Note that this path information may become - * invalid, and this means we must check it when using it to see if it - * is still valid. You'll need to read search_by_key and the comments - * in it, especially about decrement_counters_in_path(), to understand - * this structure. - * - * Paths make the code so much harder to work with and debug.... An - * enormous number of bugs are due to them, and trying to write or modify - * code that uses them just makes my head hurt. They are based on an - * excessive effort to avoid disturbing the precious VFS code.:-( The - * gods only know how we are going to SMP the code that uses them. - * znodes are the way! - */ - -#define PATH_READA 0x1 /* do read ahead */ -#define PATH_READA_BACK 0x2 /* read backwards */ - -struct treepath { - int path_length; /* Length of the array above. */ - int reada; - /* Array of the path elements. */ - struct path_element path_elements[EXTENDED_MAX_HEIGHT]; - int pos_in_item; -}; - -#define pos_in_item(path) ((path)->pos_in_item) - -#define INITIALIZE_PATH(var) \ -struct treepath var = {.path_length = ILLEGAL_PATH_ELEMENT_OFFSET, .reada = 0,} - -/* Get path element by path and path position. */ -#define PATH_OFFSET_PELEMENT(path, n_offset) ((path)->path_elements + (n_offset)) - -/* Get buffer header at the path by path and path position. */ -#define PATH_OFFSET_PBUFFER(path, n_offset) (PATH_OFFSET_PELEMENT(path, n_offset)->pe_buffer) - -/* Get position in the element at the path by path and path position. */ -#define PATH_OFFSET_POSITION(path, n_offset) (PATH_OFFSET_PELEMENT(path, n_offset)->pe_position) - -#define PATH_PLAST_BUFFER(path) (PATH_OFFSET_PBUFFER((path), (path)->path_length)) - -/* - * you know, to the person who didn't write this the macro name does not - * at first suggest what it does. Maybe POSITION_FROM_PATH_END? Or - * maybe we should just focus on dumping paths... -Hans - */ -#define PATH_LAST_POSITION(path) (PATH_OFFSET_POSITION((path), (path)->path_length)) - -/* - * in do_balance leaf has h == 0 in contrast with path structure, - * where root has level == 0. That is why we need these defines - */ - -/* tb->S[h] */ -#define PATH_H_PBUFFER(path, h) \ - PATH_OFFSET_PBUFFER(path, path->path_length - (h)) - -/* tb->F[h] or tb->S[0]->b_parent */ -#define PATH_H_PPARENT(path, h) PATH_H_PBUFFER(path, (h) + 1) - -#define PATH_H_POSITION(path, h) \ - PATH_OFFSET_POSITION(path, path->path_length - (h)) - -/* tb->S[h]->b_item_order */ -#define PATH_H_B_ITEM_ORDER(path, h) PATH_H_POSITION(path, h + 1) - -#define PATH_H_PATH_OFFSET(path, n_h) ((path)->path_length - (n_h)) - -static inline void *reiserfs_node_data(const struct buffer_head *bh) -{ - return bh->b_data + sizeof(struct block_head); -} - -/* get key from internal node */ -static inline struct reiserfs_key *internal_key(struct buffer_head *bh, - int item_num) -{ - struct reiserfs_key *key = reiserfs_node_data(bh); - - return &key[item_num]; -} - -/* get the item header from leaf node */ -static inline struct item_head *item_head(const struct buffer_head *bh, - int item_num) -{ - struct item_head *ih = reiserfs_node_data(bh); - - return &ih[item_num]; -} - -/* get the key from leaf node */ -static inline struct reiserfs_key *leaf_key(const struct buffer_head *bh, - int item_num) -{ - return &item_head(bh, item_num)->ih_key; -} - -static inline void *ih_item_body(const struct buffer_head *bh, - const struct item_head *ih) -{ - return bh->b_data + ih_location(ih); -} - -/* get item body from leaf node */ -static inline void *item_body(const struct buffer_head *bh, int item_num) -{ - return ih_item_body(bh, item_head(bh, item_num)); -} - -static inline struct item_head *tp_item_head(const struct treepath *path) -{ - return item_head(PATH_PLAST_BUFFER(path), PATH_LAST_POSITION(path)); -} - -static inline void *tp_item_body(const struct treepath *path) -{ - return item_body(PATH_PLAST_BUFFER(path), PATH_LAST_POSITION(path)); -} - -#define get_last_bh(path) PATH_PLAST_BUFFER(path) -#define get_item_pos(path) PATH_LAST_POSITION(path) -#define item_moved(ih,path) comp_items(ih, path) -#define path_changed(ih,path) comp_items (ih, path) - -/* array of the entry headers */ - /* get item body */ -#define B_I_DEH(bh, ih) ((struct reiserfs_de_head *)(ih_item_body(bh, ih))) - -/* - * length of the directory entry in directory item. This define - * calculates length of i-th directory entry using directory entry - * locations from dir entry head. When it calculates length of 0-th - * directory entry, it uses length of whole item in place of entry - * location of the non-existent following entry in the calculation. - * See picture above. - */ -static inline int entry_length(const struct buffer_head *bh, - const struct item_head *ih, int pos_in_item) -{ - struct reiserfs_de_head *deh; - - deh = B_I_DEH(bh, ih) + pos_in_item; - if (pos_in_item) - return deh_location(deh - 1) - deh_location(deh); - - return ih_item_len(ih) - deh_location(deh); -} - -/*************************************************************************** - * MISC * - ***************************************************************************/ - -/* Size of pointer to the unformatted node. */ -#define UNFM_P_SIZE (sizeof(unp_t)) -#define UNFM_P_SHIFT 2 - -/* in in-core inode key is stored on le form */ -#define INODE_PKEY(inode) ((struct reiserfs_key *)(REISERFS_I(inode)->i_key)) - -#define MAX_UL_INT 0xffffffff -#define MAX_INT 0x7ffffff -#define MAX_US_INT 0xffff - -// reiserfs version 2 has max offset 60 bits. Version 1 - 32 bit offset -static inline loff_t max_reiserfs_offset(struct inode *inode) -{ - if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5) - return (loff_t) U32_MAX; - - return (loff_t) ((~(__u64) 0) >> 4); -} - -#define MAX_KEY_OBJECTID MAX_UL_INT - -#define MAX_B_NUM MAX_UL_INT -#define MAX_FC_NUM MAX_US_INT - -/* the purpose is to detect overflow of an unsigned short */ -#define REISERFS_LINK_MAX (MAX_US_INT - 1000) - -/* - * The following defines are used in reiserfs_insert_item - * and reiserfs_append_item - */ -#define REISERFS_KERNEL_MEM 0 /* kernel memory mode */ -#define REISERFS_USER_MEM 1 /* user memory mode */ - -#define fs_generation(s) (REISERFS_SB(s)->s_generation_counter) -#define get_generation(s) atomic_read (&fs_generation(s)) -#define FILESYSTEM_CHANGED_TB(tb) (get_generation((tb)->tb_sb) != (tb)->fs_gen) -#define __fs_changed(gen,s) (gen != get_generation (s)) -#define fs_changed(gen,s) \ -({ \ - reiserfs_cond_resched(s); \ - __fs_changed(gen, s); \ -}) - -/*************************************************************************** - * FIXATE NODES * - ***************************************************************************/ - -#define VI_TYPE_LEFT_MERGEABLE 1 -#define VI_TYPE_RIGHT_MERGEABLE 2 - -/* - * To make any changes in the tree we always first find node, that - * contains item to be changed/deleted or place to insert a new - * item. We call this node S. To do balancing we need to decide what - * we will shift to left/right neighbor, or to a new node, where new - * item will be etc. To make this analysis simpler we build virtual - * node. Virtual node is an array of items, that will replace items of - * node S. (For instance if we are going to delete an item, virtual - * node does not contain it). Virtual node keeps information about - * item sizes and types, mergeability of first and last items, sizes - * of all entries in directory item. We use this array of items when - * calculating what we can shift to neighbors and how many nodes we - * have to have if we do not any shiftings, if we shift to left/right - * neighbor or to both. - */ -struct virtual_item { - int vi_index; /* index in the array of item operations */ - unsigned short vi_type; /* left/right mergeability */ - - /* length of item that it will have after balancing */ - unsigned short vi_item_len; - - struct item_head *vi_ih; - const char *vi_item; /* body of item (old or new) */ - const void *vi_new_data; /* 0 always but paste mode */ - void *vi_uarea; /* item specific area */ -}; - -struct virtual_node { - /* this is a pointer to the free space in the buffer */ - char *vn_free_ptr; - - unsigned short vn_nr_item; /* number of items in virtual node */ - - /* - * size of node , that node would have if it has - * unlimited size and no balancing is performed - */ - short vn_size; - - /* mode of balancing (paste, insert, delete, cut) */ - short vn_mode; - - short vn_affected_item_num; - short vn_pos_in_item; - - /* item header of inserted item, 0 for other modes */ - struct item_head *vn_ins_ih; - const void *vn_data; - - /* array of items (including a new one, excluding item to be deleted) */ - struct virtual_item *vn_vi; -}; - -/* used by directory items when creating virtual nodes */ -struct direntry_uarea { - int flags; - __u16 entry_count; - __u16 entry_sizes[]; -} __attribute__ ((__packed__)); - -/*************************************************************************** - * TREE BALANCE * - ***************************************************************************/ - -/* - * This temporary structure is used in tree balance algorithms, and - * constructed as we go to the extent that its various parts are - * needed. It contains arrays of nodes that can potentially be - * involved in the balancing of node S, and parameters that define how - * each of the nodes must be balanced. Note that in these algorithms - * for balancing the worst case is to need to balance the current node - * S and the left and right neighbors and all of their parents plus - * create a new node. We implement S1 balancing for the leaf nodes - * and S0 balancing for the internal nodes (S1 and S0 are defined in - * our papers.) - */ - -/* size of the array of buffers to free at end of do_balance */ -#define MAX_FREE_BLOCK 7 - -/* maximum number of FEB blocknrs on a single level */ -#define MAX_AMOUNT_NEEDED 2 - -/* someday somebody will prefix every field in this struct with tb_ */ -struct tree_balance { - int tb_mode; - int need_balance_dirty; - struct super_block *tb_sb; - struct reiserfs_transaction_handle *transaction_handle; - struct treepath *tb_path; - - /* array of left neighbors of nodes in the path */ - struct buffer_head *L[MAX_HEIGHT]; - - /* array of right neighbors of nodes in the path */ - struct buffer_head *R[MAX_HEIGHT]; - - /* array of fathers of the left neighbors */ - struct buffer_head *FL[MAX_HEIGHT]; - - /* array of fathers of the right neighbors */ - struct buffer_head *FR[MAX_HEIGHT]; - /* array of common parents of center node and its left neighbor */ - struct buffer_head *CFL[MAX_HEIGHT]; - - /* array of common parents of center node and its right neighbor */ - struct buffer_head *CFR[MAX_HEIGHT]; - - /* - * array of empty buffers. Number of buffers in array equals - * cur_blknum. - */ - struct buffer_head *FEB[MAX_FEB_SIZE]; - struct buffer_head *used[MAX_FEB_SIZE]; - struct buffer_head *thrown[MAX_FEB_SIZE]; - - /* - * array of number of items which must be shifted to the left in - * order to balance the current node; for leaves includes item that - * will be partially shifted; for internal nodes, it is the number - * of child pointers rather than items. It includes the new item - * being created. The code sometimes subtracts one to get the - * number of wholly shifted items for other purposes. - */ - int lnum[MAX_HEIGHT]; - - /* substitute right for left in comment above */ - int rnum[MAX_HEIGHT]; - - /* - * array indexed by height h mapping the key delimiting L[h] and - * S[h] to its item number within the node CFL[h] - */ - int lkey[MAX_HEIGHT]; - - /* substitute r for l in comment above */ - int rkey[MAX_HEIGHT]; - - /* - * the number of bytes by we are trying to add or remove from - * S[h]. A negative value means removing. - */ - int insert_size[MAX_HEIGHT]; - - /* - * number of nodes that will replace node S[h] after balancing - * on the level h of the tree. If 0 then S is being deleted, - * if 1 then S is remaining and no new nodes are being created, - * if 2 or 3 then 1 or 2 new nodes is being created - */ - int blknum[MAX_HEIGHT]; - - /* fields that are used only for balancing leaves of the tree */ - - /* number of empty blocks having been already allocated */ - int cur_blknum; - - /* number of items that fall into left most node when S[0] splits */ - int s0num; - - /* - * number of bytes which can flow to the left neighbor from the left - * most liquid item that cannot be shifted from S[0] entirely - * if -1 then nothing will be partially shifted - */ - int lbytes; - - /* - * number of bytes which will flow to the right neighbor from the right - * most liquid item that cannot be shifted from S[0] entirely - * if -1 then nothing will be partially shifted - */ - int rbytes; - - - /* - * index into the array of item headers in - * S[0] of the affected item - */ - int item_pos; - - /* new nodes allocated to hold what could not fit into S */ - struct buffer_head *S_new[2]; - - /* - * number of items that will be placed into nodes in S_new - * when S[0] splits - */ - int snum[2]; - - /* - * number of bytes which flow to nodes in S_new when S[0] splits - * note: if S[0] splits into 3 nodes, then items do not need to be cut - */ - int sbytes[2]; - - int pos_in_item; - int zeroes_num; - - /* - * buffers which are to be freed after do_balance finishes - * by unfix_nodes - */ - struct buffer_head *buf_to_free[MAX_FREE_BLOCK]; - - /* - * kmalloced memory. Used to create virtual node and keep - * map of dirtied bitmap blocks - */ - char *vn_buf; - - int vn_buf_size; /* size of the vn_buf */ - - /* VN starts after bitmap of bitmap blocks */ - struct virtual_node *tb_vn; - - /* - * saved value of `reiserfs_generation' counter see - * FILESYSTEM_CHANGED() macro in reiserfs_fs.h - */ - int fs_gen; - -#ifdef DISPLACE_NEW_PACKING_LOCALITIES - /* - * key pointer, to pass to block allocator or - * another low-level subsystem - */ - struct in_core_key key; -#endif -}; - -/* These are modes of balancing */ - -/* When inserting an item. */ -#define M_INSERT 'i' -/* - * When inserting into (directories only) or appending onto an already - * existent item. - */ -#define M_PASTE 'p' -/* When deleting an item. */ -#define M_DELETE 'd' -/* When truncating an item or removing an entry from a (directory) item. */ -#define M_CUT 'c' - -/* used when balancing on leaf level skipped (in reiserfsck) */ -#define M_INTERNAL 'n' - -/* - * When further balancing is not needed, then do_balance does not need - * to be called. - */ -#define M_SKIP_BALANCING 's' -#define M_CONVERT 'v' - -/* modes of leaf_move_items */ -#define LEAF_FROM_S_TO_L 0 -#define LEAF_FROM_S_TO_R 1 -#define LEAF_FROM_R_TO_L 2 -#define LEAF_FROM_L_TO_R 3 -#define LEAF_FROM_S_TO_SNEW 4 - -#define FIRST_TO_LAST 0 -#define LAST_TO_FIRST 1 - -/* - * used in do_balance for passing parent of node information that has - * been gotten from tb struct - */ -struct buffer_info { - struct tree_balance *tb; - struct buffer_head *bi_bh; - struct buffer_head *bi_parent; - int bi_position; -}; - -static inline struct super_block *sb_from_tb(struct tree_balance *tb) -{ - return tb ? tb->tb_sb : NULL; -} - -static inline struct super_block *sb_from_bi(struct buffer_info *bi) -{ - return bi ? sb_from_tb(bi->tb) : NULL; -} - -/* - * there are 4 types of items: stat data, directory item, indirect, direct. - * +-------------------+------------+--------------+------------+ - * | | k_offset | k_uniqueness | mergeable? | - * +-------------------+------------+--------------+------------+ - * | stat data | 0 | 0 | no | - * +-------------------+------------+--------------+------------+ - * | 1st directory item| DOT_OFFSET | DIRENTRY_ .. | no | - * | non 1st directory | hash value | UNIQUENESS | yes | - * | item | | | | - * +-------------------+------------+--------------+------------+ - * | indirect item | offset + 1 |TYPE_INDIRECT | [1] | - * +-------------------+------------+--------------+------------+ - * | direct item | offset + 1 |TYPE_DIRECT | [2] | - * +-------------------+------------+--------------+------------+ - * - * [1] if this is not the first indirect item of the object - * [2] if this is not the first direct item of the object -*/ - -struct item_operations { - int (*bytes_number) (struct item_head * ih, int block_size); - void (*decrement_key) (struct cpu_key *); - int (*is_left_mergeable) (struct reiserfs_key * ih, - unsigned long bsize); - void (*print_item) (struct item_head *, char *item); - void (*check_item) (struct item_head *, char *item); - - int (*create_vi) (struct virtual_node * vn, struct virtual_item * vi, - int is_affected, int insert_size); - int (*check_left) (struct virtual_item * vi, int free, - int start_skip, int end_skip); - int (*check_right) (struct virtual_item * vi, int free); - int (*part_size) (struct virtual_item * vi, int from, int to); - int (*unit_num) (struct virtual_item * vi); - void (*print_vi) (struct virtual_item * vi); -}; - -extern struct item_operations *item_ops[TYPE_ANY + 1]; - -#define op_bytes_number(ih,bsize) item_ops[le_ih_k_type (ih)]->bytes_number (ih, bsize) -#define op_is_left_mergeable(key,bsize) item_ops[le_key_k_type (le_key_version (key), key)]->is_left_mergeable (key, bsize) -#define op_print_item(ih,item) item_ops[le_ih_k_type (ih)]->print_item (ih, item) -#define op_check_item(ih,item) item_ops[le_ih_k_type (ih)]->check_item (ih, item) -#define op_create_vi(vn,vi,is_affected,insert_size) item_ops[le_ih_k_type ((vi)->vi_ih)]->create_vi (vn,vi,is_affected,insert_size) -#define op_check_left(vi,free,start_skip,end_skip) item_ops[(vi)->vi_index]->check_left (vi, free, start_skip, end_skip) -#define op_check_right(vi,free) item_ops[(vi)->vi_index]->check_right (vi, free) -#define op_part_size(vi,from,to) item_ops[(vi)->vi_index]->part_size (vi, from, to) -#define op_unit_num(vi) item_ops[(vi)->vi_index]->unit_num (vi) -#define op_print_vi(vi) item_ops[(vi)->vi_index]->print_vi (vi) - -#define COMP_SHORT_KEYS comp_short_keys - -/* number of blocks pointed to by the indirect item */ -#define I_UNFM_NUM(ih) (ih_item_len(ih) / UNFM_P_SIZE) - -/* - * the used space within the unformatted node corresponding - * to pos within the item pointed to by ih - */ -#define I_POS_UNFM_SIZE(ih,pos,size) (((pos) == I_UNFM_NUM(ih) - 1 ) ? (size) - ih_free_space(ih) : (size)) - -/* - * number of bytes contained by the direct item or the - * unformatted nodes the indirect item points to - */ - -/* following defines use reiserfs buffer header and item header */ - -/* get stat-data */ -#define B_I_STAT_DATA(bh, ih) ( (struct stat_data * )((bh)->b_data + ih_location(ih)) ) - -/* this is 3976 for size==4096 */ -#define MAX_DIRECT_ITEM_LEN(size) ((size) - BLKH_SIZE - 2*IH_SIZE - SD_SIZE - UNFM_P_SIZE) - -/* - * indirect items consist of entries which contain blocknrs, pos - * indicates which entry, and B_I_POS_UNFM_POINTER resolves to the - * blocknr contained by the entry pos points to - */ -#define B_I_POS_UNFM_POINTER(bh, ih, pos) \ - le32_to_cpu(*(((unp_t *)ih_item_body(bh, ih)) + (pos))) -#define PUT_B_I_POS_UNFM_POINTER(bh, ih, pos, val) \ - (*(((unp_t *)ih_item_body(bh, ih)) + (pos)) = cpu_to_le32(val)) - -struct reiserfs_iget_args { - __u32 objectid; - __u32 dirid; -}; - -/*************************************************************************** - * FUNCTION DECLARATIONS * - ***************************************************************************/ - -#define get_journal_desc_magic(bh) (bh->b_data + bh->b_size - 12) - -#define journal_trans_half(blocksize) \ - ((blocksize - sizeof(struct reiserfs_journal_desc) - 12) / sizeof(__u32)) - -/* journal.c see journal.c for all the comments here */ - -/* first block written in a commit. */ -struct reiserfs_journal_desc { - __le32 j_trans_id; /* id of commit */ - - /* length of commit. len +1 is the commit block */ - __le32 j_len; - - __le32 j_mount_id; /* mount id of this trans */ - __le32 j_realblock[]; /* real locations for each block */ -}; - -#define get_desc_trans_id(d) le32_to_cpu((d)->j_trans_id) -#define get_desc_trans_len(d) le32_to_cpu((d)->j_len) -#define get_desc_mount_id(d) le32_to_cpu((d)->j_mount_id) - -#define set_desc_trans_id(d,val) do { (d)->j_trans_id = cpu_to_le32 (val); } while (0) -#define set_desc_trans_len(d,val) do { (d)->j_len = cpu_to_le32 (val); } while (0) -#define set_desc_mount_id(d,val) do { (d)->j_mount_id = cpu_to_le32 (val); } while (0) - -/* last block written in a commit */ -struct reiserfs_journal_commit { - __le32 j_trans_id; /* must match j_trans_id from the desc block */ - __le32 j_len; /* ditto */ - __le32 j_realblock[]; /* real locations for each block */ -}; - -#define get_commit_trans_id(c) le32_to_cpu((c)->j_trans_id) -#define get_commit_trans_len(c) le32_to_cpu((c)->j_len) -#define get_commit_mount_id(c) le32_to_cpu((c)->j_mount_id) - -#define set_commit_trans_id(c,val) do { (c)->j_trans_id = cpu_to_le32 (val); } while (0) -#define set_commit_trans_len(c,val) do { (c)->j_len = cpu_to_le32 (val); } while (0) - -/* - * this header block gets written whenever a transaction is considered - * fully flushed, and is more recent than the last fully flushed transaction. - * fully flushed means all the log blocks and all the real blocks are on - * disk, and this transaction does not need to be replayed. - */ -struct reiserfs_journal_header { - /* id of last fully flushed transaction */ - __le32 j_last_flush_trans_id; - - /* offset in the log of where to start replay after a crash */ - __le32 j_first_unflushed_offset; - - __le32 j_mount_id; - /* 12 */ struct journal_params jh_journal; -}; - -/* biggest tunable defines are right here */ -#define JOURNAL_BLOCK_COUNT 8192 /* number of blocks in the journal */ - -/* biggest possible single transaction, don't change for now (8/3/99) */ -#define JOURNAL_TRANS_MAX_DEFAULT 1024 -#define JOURNAL_TRANS_MIN_DEFAULT 256 - -/* - * max blocks to batch into one transaction, - * don't make this any bigger than 900 - */ -#define JOURNAL_MAX_BATCH_DEFAULT 900 -#define JOURNAL_MIN_RATIO 2 -#define JOURNAL_MAX_COMMIT_AGE 30 -#define JOURNAL_MAX_TRANS_AGE 30 -#define JOURNAL_PER_BALANCE_CNT (3 * (MAX_HEIGHT-2) + 9) -#define JOURNAL_BLOCKS_PER_OBJECT(sb) (JOURNAL_PER_BALANCE_CNT * 3 + \ - 2 * (REISERFS_QUOTA_INIT_BLOCKS(sb) + \ - REISERFS_QUOTA_TRANS_BLOCKS(sb))) - -#ifdef CONFIG_QUOTA -#define REISERFS_QUOTA_OPTS ((1 << REISERFS_USRQUOTA) | (1 << REISERFS_GRPQUOTA)) -/* We need to update data and inode (atime) */ -#define REISERFS_QUOTA_TRANS_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & REISERFS_QUOTA_OPTS ? 2 : 0) -/* 1 balancing, 1 bitmap, 1 data per write + stat data update */ -#define REISERFS_QUOTA_INIT_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & REISERFS_QUOTA_OPTS ? \ -(DQUOT_INIT_ALLOC*(JOURNAL_PER_BALANCE_CNT+2)+DQUOT_INIT_REWRITE+1) : 0) -/* same as with INIT */ -#define REISERFS_QUOTA_DEL_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & REISERFS_QUOTA_OPTS ? \ -(DQUOT_DEL_ALLOC*(JOURNAL_PER_BALANCE_CNT+2)+DQUOT_DEL_REWRITE+1) : 0) -#else -#define REISERFS_QUOTA_TRANS_BLOCKS(s) 0 -#define REISERFS_QUOTA_INIT_BLOCKS(s) 0 -#define REISERFS_QUOTA_DEL_BLOCKS(s) 0 -#endif - -/* - * both of these can be as low as 1, or as high as you want. The min is the - * number of 4k bitmap nodes preallocated on mount. New nodes are allocated - * as needed, and released when transactions are committed. On release, if - * the current number of nodes is > max, the node is freed, otherwise, - * it is put on a free list for faster use later. -*/ -#define REISERFS_MIN_BITMAP_NODES 10 -#define REISERFS_MAX_BITMAP_NODES 100 - -/* these are based on journal hash size of 8192 */ -#define JBH_HASH_SHIFT 13 -#define JBH_HASH_MASK 8191 - -#define _jhashfn(sb,block) \ - (((unsigned long)sb>>L1_CACHE_SHIFT) ^ \ - (((block)<<(JBH_HASH_SHIFT - 6)) ^ ((block) >> 13) ^ ((block) << (JBH_HASH_SHIFT - 12)))) -#define journal_hash(t,sb,block) ((t)[_jhashfn((sb),(block)) & JBH_HASH_MASK]) - -/* We need these to make journal.c code more readable */ -#define journal_find_get_block(s, block) __find_get_block(\ - file_bdev(SB_JOURNAL(s)->j_bdev_file), block, s->s_blocksize) -#define journal_getblk(s, block) __getblk(file_bdev(SB_JOURNAL(s)->j_bdev_file),\ - block, s->s_blocksize) -#define journal_bread(s, block) __bread(file_bdev(SB_JOURNAL(s)->j_bdev_file),\ - block, s->s_blocksize) - -enum reiserfs_bh_state_bits { - BH_JDirty = BH_PrivateStart, /* buffer is in current transaction */ - BH_JDirty_wait, - /* - * disk block was taken off free list before being in a - * finished transaction, or written to disk. Can be reused immed. - */ - BH_JNew, - BH_JPrepared, - BH_JRestore_dirty, - BH_JTest, /* debugging only will go away */ -}; - -BUFFER_FNS(JDirty, journaled); -TAS_BUFFER_FNS(JDirty, journaled); -BUFFER_FNS(JDirty_wait, journal_dirty); -TAS_BUFFER_FNS(JDirty_wait, journal_dirty); -BUFFER_FNS(JNew, journal_new); -TAS_BUFFER_FNS(JNew, journal_new); -BUFFER_FNS(JPrepared, journal_prepared); -TAS_BUFFER_FNS(JPrepared, journal_prepared); -BUFFER_FNS(JRestore_dirty, journal_restore_dirty); -TAS_BUFFER_FNS(JRestore_dirty, journal_restore_dirty); -BUFFER_FNS(JTest, journal_test); -TAS_BUFFER_FNS(JTest, journal_test); - -/* transaction handle which is passed around for all journal calls */ -struct reiserfs_transaction_handle { - /* - * super for this FS when journal_begin was called. saves calls to - * reiserfs_get_super also used by nested transactions to make - * sure they are nesting on the right FS _must_ be first - * in the handle - */ - struct super_block *t_super; - - int t_refcount; - int t_blocks_logged; /* number of blocks this writer has logged */ - int t_blocks_allocated; /* number of blocks this writer allocated */ - - /* sanity check, equals the current trans id */ - unsigned int t_trans_id; - - void *t_handle_save; /* save existing current->journal_info */ - - /* - * if new block allocation occurres, that block - * should be displaced from others - */ - unsigned displace_new_blocks:1; - - struct list_head t_list; -}; - -/* - * used to keep track of ordered and tail writes, attached to the buffer - * head through b_journal_head. - */ -struct reiserfs_jh { - struct reiserfs_journal_list *jl; - struct buffer_head *bh; - struct list_head list; -}; - -void reiserfs_free_jh(struct buffer_head *bh); -int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh); -int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh); -int journal_mark_dirty(struct reiserfs_transaction_handle *, - struct buffer_head *bh); - -static inline int reiserfs_file_data_log(struct inode *inode) -{ - if (reiserfs_data_log(inode->i_sb) || - (REISERFS_I(inode)->i_flags & i_data_log)) - return 1; - return 0; -} - -static inline int reiserfs_transaction_running(struct super_block *s) -{ - struct reiserfs_transaction_handle *th = current->journal_info; - if (th && th->t_super == s) - return 1; - if (th && th->t_super == NULL) - BUG(); - return 0; -} - -static inline int reiserfs_transaction_free_space(struct reiserfs_transaction_handle *th) -{ - return th->t_blocks_allocated - th->t_blocks_logged; -} - -struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct - super_block - *, - int count); -int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *); -void reiserfs_vfs_truncate_file(struct inode *inode); -int reiserfs_commit_page(struct inode *inode, struct page *page, - unsigned from, unsigned to); -void reiserfs_flush_old_commits(struct super_block *); -int reiserfs_commit_for_inode(struct inode *); -int reiserfs_inode_needs_commit(struct inode *); -void reiserfs_update_inode_transaction(struct inode *); -void reiserfs_wait_on_write_block(struct super_block *s); -void reiserfs_block_writes(struct reiserfs_transaction_handle *th); -void reiserfs_allow_writes(struct super_block *s); -void reiserfs_check_lock_depth(struct super_block *s, char *caller); -int reiserfs_prepare_for_journal(struct super_block *, struct buffer_head *bh, - int wait); -void reiserfs_restore_prepared_buffer(struct super_block *, - struct buffer_head *bh); -int journal_init(struct super_block *, const char *j_dev_name, int old_format, - unsigned int); -int journal_release(struct reiserfs_transaction_handle *, struct super_block *); -int journal_release_error(struct reiserfs_transaction_handle *, - struct super_block *); -int journal_end(struct reiserfs_transaction_handle *); -int journal_end_sync(struct reiserfs_transaction_handle *); -int journal_mark_freed(struct reiserfs_transaction_handle *, - struct super_block *, b_blocknr_t blocknr); -int journal_transaction_should_end(struct reiserfs_transaction_handle *, int); -int reiserfs_in_journal(struct super_block *sb, unsigned int bmap_nr, - int bit_nr, int searchall, b_blocknr_t *next); -int journal_begin(struct reiserfs_transaction_handle *, - struct super_block *sb, unsigned long); -int journal_join_abort(struct reiserfs_transaction_handle *, - struct super_block *sb); -void reiserfs_abort_journal(struct super_block *sb, int errno); -void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...); -int reiserfs_allocate_list_bitmaps(struct super_block *s, - struct reiserfs_list_bitmap *, unsigned int); - -void reiserfs_schedule_old_flush(struct super_block *s); -void reiserfs_cancel_old_flush(struct super_block *s); -void add_save_link(struct reiserfs_transaction_handle *th, - struct inode *inode, int truncate); -int remove_save_link(struct inode *inode, int truncate); - -/* objectid.c */ -__u32 reiserfs_get_unused_objectid(struct reiserfs_transaction_handle *th); -void reiserfs_release_objectid(struct reiserfs_transaction_handle *th, - __u32 objectid_to_release); -int reiserfs_convert_objectid_map_v1(struct super_block *); - -/* stree.c */ -int B_IS_IN_TREE(const struct buffer_head *); -extern void copy_item_head(struct item_head *to, - const struct item_head *from); - -/* first key is in cpu form, second - le */ -extern int comp_short_keys(const struct reiserfs_key *le_key, - const struct cpu_key *cpu_key); -extern void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from); - -/* both are in le form */ -extern int comp_le_keys(const struct reiserfs_key *, - const struct reiserfs_key *); -extern int comp_short_le_keys(const struct reiserfs_key *, - const struct reiserfs_key *); - -/* * get key version from on disk key - kludge */ -static inline int le_key_version(const struct reiserfs_key *key) -{ - int type; - - type = offset_v2_k_type(&(key->u.k_offset_v2)); - if (type != TYPE_DIRECT && type != TYPE_INDIRECT - && type != TYPE_DIRENTRY) - return KEY_FORMAT_3_5; - - return KEY_FORMAT_3_6; - -} - -static inline void copy_key(struct reiserfs_key *to, - const struct reiserfs_key *from) -{ - memcpy(to, from, KEY_SIZE); -} - -int comp_items(const struct item_head *stored_ih, const struct treepath *path); -const struct reiserfs_key *get_rkey(const struct treepath *chk_path, - const struct super_block *sb); -int search_by_key(struct super_block *, const struct cpu_key *, - struct treepath *, int); -#define search_item(s,key,path) search_by_key (s, key, path, DISK_LEAF_NODE_LEVEL) -int search_for_position_by_key(struct super_block *sb, - const struct cpu_key *cpu_key, - struct treepath *search_path); -extern void decrement_bcount(struct buffer_head *bh); -void decrement_counters_in_path(struct treepath *search_path); -void pathrelse(struct treepath *search_path); -int reiserfs_check_path(struct treepath *p); -void pathrelse_and_restore(struct super_block *s, struct treepath *search_path); - -int reiserfs_insert_item(struct reiserfs_transaction_handle *th, - struct treepath *path, - const struct cpu_key *key, - struct item_head *ih, - struct inode *inode, const char *body); - -int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, - struct treepath *path, - const struct cpu_key *key, - struct inode *inode, - const char *body, int paste_size); - -int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th, - struct treepath *path, - struct cpu_key *key, - struct inode *inode, - struct page *page, loff_t new_file_size); - -int reiserfs_delete_item(struct reiserfs_transaction_handle *th, - struct treepath *path, - const struct cpu_key *key, - struct inode *inode, struct buffer_head *un_bh); - -void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th, - struct inode *inode, struct reiserfs_key *key); -int reiserfs_delete_object(struct reiserfs_transaction_handle *th, - struct inode *inode); -int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, - struct inode *inode, struct page *, - int update_timestamps); - -#define i_block_size(inode) ((inode)->i_sb->s_blocksize) -#define file_size(inode) ((inode)->i_size) -#define tail_size(inode) (file_size (inode) & (i_block_size (inode) - 1)) - -#define tail_has_to_be_packed(inode) (have_large_tails ((inode)->i_sb)?\ -!STORE_TAIL_IN_UNFM_S1(file_size (inode), tail_size(inode), inode->i_sb->s_blocksize):have_small_tails ((inode)->i_sb)?!STORE_TAIL_IN_UNFM_S2(file_size (inode), tail_size(inode), inode->i_sb->s_blocksize):0 ) - -void padd_item(char *item, int total_length, int length); - -/* inode.c */ -/* args for the create parameter of reiserfs_get_block */ -#define GET_BLOCK_NO_CREATE 0 /* don't create new blocks or convert tails */ -#define GET_BLOCK_CREATE 1 /* add anything you need to find block */ -#define GET_BLOCK_NO_HOLE 2 /* return -ENOENT for file holes */ -#define GET_BLOCK_READ_DIRECT 4 /* read the tail if indirect item not found */ -#define GET_BLOCK_NO_IMUX 8 /* i_mutex is not held, don't preallocate */ -#define GET_BLOCK_NO_DANGLE 16 /* don't leave any transactions running */ - -void reiserfs_read_locked_inode(struct inode *inode, - struct reiserfs_iget_args *args); -int reiserfs_find_actor(struct inode *inode, void *p); -int reiserfs_init_locked_inode(struct inode *inode, void *p); -void reiserfs_evict_inode(struct inode *inode); -int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc); -int reiserfs_get_block(struct inode *inode, sector_t block, - struct buffer_head *bh_result, int create); -struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, - int fh_len, int fh_type); -struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid, - int fh_len, int fh_type); -int reiserfs_encode_fh(struct inode *inode, __u32 * data, int *lenp, - struct inode *parent); - -int reiserfs_truncate_file(struct inode *, int update_timestamps); -void make_cpu_key(struct cpu_key *cpu_key, struct inode *inode, loff_t offset, - int type, int key_length); -void make_le_item_head(struct item_head *ih, const struct cpu_key *key, - int version, - loff_t offset, int type, int length, int entry_count); -struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key); - -struct reiserfs_security_handle; -int reiserfs_new_inode(struct reiserfs_transaction_handle *th, - struct inode *dir, umode_t mode, - const char *symname, loff_t i_size, - struct dentry *dentry, struct inode *inode, - struct reiserfs_security_handle *security); - -void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th, - struct inode *inode, loff_t size); - -static inline void reiserfs_update_sd(struct reiserfs_transaction_handle *th, - struct inode *inode) -{ - reiserfs_update_sd_size(th, inode, inode->i_size); -} - -void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode); -int reiserfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, - struct iattr *attr); - -int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len); - -/* namei.c */ -void reiserfs_init_priv_inode(struct inode *inode); -void set_de_name_and_namelen(struct reiserfs_dir_entry *de); -int search_by_entry_key(struct super_block *sb, const struct cpu_key *key, - struct treepath *path, struct reiserfs_dir_entry *de); -struct dentry *reiserfs_get_parent(struct dentry *); - -#ifdef CONFIG_REISERFS_PROC_INFO -int reiserfs_proc_info_init(struct super_block *sb); -int reiserfs_proc_info_done(struct super_block *sb); -int reiserfs_proc_info_global_init(void); -int reiserfs_proc_info_global_done(void); - -#define PROC_EXP( e ) e - -#define __PINFO( sb ) REISERFS_SB(sb) -> s_proc_info_data -#define PROC_INFO_MAX( sb, field, value ) \ - __PINFO( sb ).field = \ - max( REISERFS_SB( sb ) -> s_proc_info_data.field, value ) -#define PROC_INFO_INC( sb, field ) ( ++ ( __PINFO( sb ).field ) ) -#define PROC_INFO_ADD( sb, field, val ) ( __PINFO( sb ).field += ( val ) ) -#define PROC_INFO_BH_STAT( sb, bh, level ) \ - PROC_INFO_INC( sb, sbk_read_at[ ( level ) ] ); \ - PROC_INFO_ADD( sb, free_at[ ( level ) ], B_FREE_SPACE( bh ) ); \ - PROC_INFO_ADD( sb, items_at[ ( level ) ], B_NR_ITEMS( bh ) ) -#else -static inline int reiserfs_proc_info_init(struct super_block *sb) -{ - return 0; -} - -static inline int reiserfs_proc_info_done(struct super_block *sb) -{ - return 0; -} - -static inline int reiserfs_proc_info_global_init(void) -{ - return 0; -} - -static inline int reiserfs_proc_info_global_done(void) -{ - return 0; -} - -#define PROC_EXP( e ) -#define VOID_V ( ( void ) 0 ) -#define PROC_INFO_MAX( sb, field, value ) VOID_V -#define PROC_INFO_INC( sb, field ) VOID_V -#define PROC_INFO_ADD( sb, field, val ) VOID_V -#define PROC_INFO_BH_STAT(sb, bh, n_node_level) VOID_V -#endif - -/* dir.c */ -extern const struct inode_operations reiserfs_dir_inode_operations; -extern const struct inode_operations reiserfs_symlink_inode_operations; -extern const struct inode_operations reiserfs_special_inode_operations; -extern const struct file_operations reiserfs_dir_operations; -int reiserfs_readdir_inode(struct inode *, struct dir_context *); - -/* tail_conversion.c */ -int direct2indirect(struct reiserfs_transaction_handle *, struct inode *, - struct treepath *, struct buffer_head *, loff_t); -int indirect2direct(struct reiserfs_transaction_handle *, struct inode *, - struct page *, struct treepath *, const struct cpu_key *, - loff_t, char *); -void reiserfs_unmap_buffer(struct buffer_head *); - -/* file.c */ -extern const struct inode_operations reiserfs_file_inode_operations; -extern const struct inode_operations reiserfs_priv_file_inode_operations; -extern const struct file_operations reiserfs_file_operations; -extern const struct address_space_operations reiserfs_address_space_operations; - -/* fix_nodes.c */ - -int fix_nodes(int n_op_mode, struct tree_balance *tb, - struct item_head *ins_ih, const void *); -void unfix_nodes(struct tree_balance *); - -/* prints.c */ -void __reiserfs_panic(struct super_block *s, const char *id, - const char *function, const char *fmt, ...) - __attribute__ ((noreturn)); -#define reiserfs_panic(s, id, fmt, args...) \ - __reiserfs_panic(s, id, __func__, fmt, ##args) -void __reiserfs_error(struct super_block *s, const char *id, - const char *function, const char *fmt, ...); -#define reiserfs_error(s, id, fmt, args...) \ - __reiserfs_error(s, id, __func__, fmt, ##args) -void reiserfs_info(struct super_block *s, const char *fmt, ...); -void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...); -void print_indirect_item(struct buffer_head *bh, int item_num); -void store_print_tb(struct tree_balance *tb); -void print_cur_tb(char *mes); -void print_de(struct reiserfs_dir_entry *de); -void print_bi(struct buffer_info *bi, char *mes); -#define PRINT_LEAF_ITEMS 1 /* print all items */ -#define PRINT_DIRECTORY_ITEMS 2 /* print directory items */ -#define PRINT_DIRECT_ITEMS 4 /* print contents of direct items */ -void print_block(struct buffer_head *bh, ...); -void print_bmap(struct super_block *s, int silent); -void print_bmap_block(int i, char *data, int size, int silent); -/*void print_super_block (struct super_block * s, char * mes);*/ -void print_objectid_map(struct super_block *s); -void print_block_head(struct buffer_head *bh, char *mes); -void check_leaf(struct buffer_head *bh); -void check_internal(struct buffer_head *bh); -void print_statistics(struct super_block *s); -char *reiserfs_hashname(int code); - -/* lbalance.c */ -int leaf_move_items(int shift_mode, struct tree_balance *tb, int mov_num, - int mov_bytes, struct buffer_head *Snew); -int leaf_shift_left(struct tree_balance *tb, int shift_num, int shift_bytes); -int leaf_shift_right(struct tree_balance *tb, int shift_num, int shift_bytes); -void leaf_delete_items(struct buffer_info *cur_bi, int last_first, int first, - int del_num, int del_bytes); -void leaf_insert_into_buf(struct buffer_info *bi, int before, - struct item_head * const inserted_item_ih, - const char * const inserted_item_body, - int zeros_number); -void leaf_paste_in_buffer(struct buffer_info *bi, int pasted_item_num, - int pos_in_item, int paste_size, - const char * const body, int zeros_number); -void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num, - int pos_in_item, int cut_size); -void leaf_paste_entries(struct buffer_info *bi, int item_num, int before, - int new_entry_count, struct reiserfs_de_head *new_dehs, - const char *records, int paste_size); -/* ibalance.c */ -int balance_internal(struct tree_balance *, int, int, struct item_head *, - struct buffer_head **); - -/* do_balance.c */ -void do_balance_mark_leaf_dirty(struct tree_balance *tb, - struct buffer_head *bh, int flag); -#define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty -#define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty - -void do_balance(struct tree_balance *tb, struct item_head *ih, - const char *body, int flag); -void reiserfs_invalidate_buffer(struct tree_balance *tb, - struct buffer_head *bh); - -int get_left_neighbor_position(struct tree_balance *tb, int h); -int get_right_neighbor_position(struct tree_balance *tb, int h); -void replace_key(struct tree_balance *tb, struct buffer_head *, int, - struct buffer_head *, int); -void make_empty_node(struct buffer_info *); -struct buffer_head *get_FEB(struct tree_balance *); - -/* bitmap.c */ - -/* - * structure contains hints for block allocator, and it is a container for - * arguments, such as node, search path, transaction_handle, etc. - */ -struct __reiserfs_blocknr_hint { - /* inode passed to allocator, if we allocate unf. nodes */ - struct inode *inode; - - sector_t block; /* file offset, in blocks */ - struct in_core_key key; - - /* - * search path, used by allocator to deternine search_start by - * various ways - */ - struct treepath *path; - - /* - * transaction handle is needed to log super blocks - * and bitmap blocks changes - */ - struct reiserfs_transaction_handle *th; - - b_blocknr_t beg, end; - - /* - * a field used to transfer search start value (block number) - * between different block allocator procedures - * (determine_search_start() and others) - */ - b_blocknr_t search_start; - - /* - * is set in determine_prealloc_size() function, - * used by underlayed function that do actual allocation - */ - int prealloc_size; - - /* - * the allocator uses different polices for getting disk - * space for formatted/unformatted blocks with/without preallocation - */ - unsigned formatted_node:1; - unsigned preallocate:1; -}; - -typedef struct __reiserfs_blocknr_hint reiserfs_blocknr_hint_t; - -int reiserfs_parse_alloc_options(struct super_block *, char *); -void reiserfs_init_alloc_options(struct super_block *s); - -/* - * given a directory, this will tell you what packing locality - * to use for a new object underneat it. The locality is returned - * in disk byte order (le). - */ -__le32 reiserfs_choose_packing(struct inode *dir); - -void show_alloc_options(struct seq_file *seq, struct super_block *s); -int reiserfs_init_bitmap_cache(struct super_block *sb); -void reiserfs_free_bitmap_cache(struct super_block *sb); -void reiserfs_cache_bitmap_metadata(struct super_block *sb, struct buffer_head *bh, struct reiserfs_bitmap_info *info); -struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb, unsigned int bitmap); -int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value); -void reiserfs_free_block(struct reiserfs_transaction_handle *th, struct inode *, - b_blocknr_t, int for_unformatted); -int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *, b_blocknr_t *, int, - int); -static inline int reiserfs_new_form_blocknrs(struct tree_balance *tb, - b_blocknr_t * new_blocknrs, - int amount_needed) -{ - reiserfs_blocknr_hint_t hint = { - .th = tb->transaction_handle, - .path = tb->tb_path, - .inode = NULL, - .key = tb->key, - .block = 0, - .formatted_node = 1 - }; - return reiserfs_allocate_blocknrs(&hint, new_blocknrs, amount_needed, - 0); -} - -static inline int reiserfs_new_unf_blocknrs(struct reiserfs_transaction_handle - *th, struct inode *inode, - b_blocknr_t * new_blocknrs, - struct treepath *path, - sector_t block) -{ - reiserfs_blocknr_hint_t hint = { - .th = th, - .path = path, - .inode = inode, - .block = block, - .formatted_node = 0, - .preallocate = 0 - }; - return reiserfs_allocate_blocknrs(&hint, new_blocknrs, 1, 0); -} - -#ifdef REISERFS_PREALLOCATE -static inline int reiserfs_new_unf_blocknrs2(struct reiserfs_transaction_handle - *th, struct inode *inode, - b_blocknr_t * new_blocknrs, - struct treepath *path, - sector_t block) -{ - reiserfs_blocknr_hint_t hint = { - .th = th, - .path = path, - .inode = inode, - .block = block, - .formatted_node = 0, - .preallocate = 1 - }; - return reiserfs_allocate_blocknrs(&hint, new_blocknrs, 1, 0); -} - -void reiserfs_discard_prealloc(struct reiserfs_transaction_handle *th, - struct inode *inode); -void reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle *th); -#endif - -/* hashes.c */ -__u32 keyed_hash(const signed char *msg, int len); -__u32 yura_hash(const signed char *msg, int len); -__u32 r5_hash(const signed char *msg, int len); - -#define reiserfs_set_le_bit __set_bit_le -#define reiserfs_test_and_set_le_bit __test_and_set_bit_le -#define reiserfs_clear_le_bit __clear_bit_le -#define reiserfs_test_and_clear_le_bit __test_and_clear_bit_le -#define reiserfs_test_le_bit test_bit_le -#define reiserfs_find_next_zero_le_bit find_next_zero_bit_le - -/* - * sometimes reiserfs_truncate may require to allocate few new blocks - * to perform indirect2direct conversion. People probably used to - * think, that truncate should work without problems on a filesystem - * without free disk space. They may complain that they can not - * truncate due to lack of free disk space. This spare space allows us - * to not worry about it. 500 is probably too much, but it should be - * absolutely safe - */ -#define SPARE_SPACE 500 - -/* prototypes from ioctl.c */ -int reiserfs_fileattr_get(struct dentry *dentry, struct fileattr *fa); -int reiserfs_fileattr_set(struct mnt_idmap *idmap, - struct dentry *dentry, struct fileattr *fa); -long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); -long reiserfs_compat_ioctl(struct file *filp, - unsigned int cmd, unsigned long arg); -int reiserfs_unpack(struct inode *inode); diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c deleted file mode 100644 index 7b498a0d060b..000000000000 --- a/fs/reiserfs/resize.c +++ /dev/null @@ -1,230 +0,0 @@ -/* - * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README - */ - -/* - * Written by Alexander Zarochentcev. - * - * The kernel part of the (on-line) reiserfs resizer. - */ - -#include -#include -#include -#include -#include -#include "reiserfs.h" -#include - -int reiserfs_resize(struct super_block *s, unsigned long block_count_new) -{ - int err = 0; - struct reiserfs_super_block *sb; - struct reiserfs_bitmap_info *bitmap; - struct reiserfs_bitmap_info *info; - struct reiserfs_bitmap_info *old_bitmap = SB_AP_BITMAP(s); - struct buffer_head *bh; - struct reiserfs_transaction_handle th; - unsigned int bmap_nr_new, bmap_nr; - unsigned int block_r_new, block_r; - - struct reiserfs_list_bitmap *jb; - struct reiserfs_list_bitmap jbitmap[JOURNAL_NUM_BITMAPS]; - - unsigned long int block_count, free_blocks; - int i; - int copy_size; - int depth; - - sb = SB_DISK_SUPER_BLOCK(s); - - if (SB_BLOCK_COUNT(s) >= block_count_new) { - printk("can\'t shrink filesystem on-line\n"); - return -EINVAL; - } - - /* check the device size */ - depth = reiserfs_write_unlock_nested(s); - bh = sb_bread(s, block_count_new - 1); - reiserfs_write_lock_nested(s, depth); - if (!bh) { - printk("reiserfs_resize: can\'t read last block\n"); - return -EINVAL; - } - bforget(bh); - - /* - * old disk layout detection; those partitions can be mounted, but - * cannot be resized - */ - if (SB_BUFFER_WITH_SB(s)->b_blocknr * SB_BUFFER_WITH_SB(s)->b_size - != REISERFS_DISK_OFFSET_IN_BYTES) { - printk - ("reiserfs_resize: unable to resize a reiserfs without distributed bitmap (fs version < 3.5.12)\n"); - return -ENOTSUPP; - } - - /* count used bits in last bitmap block */ - block_r = SB_BLOCK_COUNT(s) - - (reiserfs_bmap_count(s) - 1) * s->s_blocksize * 8; - - /* count bitmap blocks in new fs */ - bmap_nr_new = block_count_new / (s->s_blocksize * 8); - block_r_new = block_count_new - bmap_nr_new * s->s_blocksize * 8; - if (block_r_new) - bmap_nr_new++; - else - block_r_new = s->s_blocksize * 8; - - /* save old values */ - block_count = SB_BLOCK_COUNT(s); - bmap_nr = reiserfs_bmap_count(s); - - /* resizing of reiserfs bitmaps (journal and real), if needed */ - if (bmap_nr_new > bmap_nr) { - /* reallocate journal bitmaps */ - if (reiserfs_allocate_list_bitmaps(s, jbitmap, bmap_nr_new) < 0) { - printk - ("reiserfs_resize: unable to allocate memory for journal bitmaps\n"); - return -ENOMEM; - } - /* - * the new journal bitmaps are zero filled, now we copy i - * the bitmap node pointers from the old journal bitmap - * structs, and then transfer the new data structures - * into the journal struct. - * - * using the copy_size var below allows this code to work for - * both shrinking and expanding the FS. - */ - copy_size = min(bmap_nr_new, bmap_nr); - copy_size = - copy_size * sizeof(struct reiserfs_list_bitmap_node *); - for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { - struct reiserfs_bitmap_node **node_tmp; - jb = SB_JOURNAL(s)->j_list_bitmap + i; - memcpy(jbitmap[i].bitmaps, jb->bitmaps, copy_size); - - /* - * just in case vfree schedules on us, copy the new - * pointer into the journal struct before freeing the - * old one - */ - node_tmp = jb->bitmaps; - jb->bitmaps = jbitmap[i].bitmaps; - vfree(node_tmp); - } - - /* - * allocate additional bitmap blocks, reallocate - * array of bitmap block pointers - */ - bitmap = - vzalloc(array_size(bmap_nr_new, - sizeof(struct reiserfs_bitmap_info))); - if (!bitmap) { - /* - * Journal bitmaps are still supersized, but the - * memory isn't leaked, so I guess it's ok - */ - printk("reiserfs_resize: unable to allocate memory.\n"); - return -ENOMEM; - } - for (i = 0; i < bmap_nr; i++) - bitmap[i] = old_bitmap[i]; - - /* - * This doesn't go through the journal, but it doesn't have to. - * The changes are still atomic: We're synced up when the - * journal transaction begins, and the new bitmaps don't - * matter if the transaction fails. - */ - for (i = bmap_nr; i < bmap_nr_new; i++) { - int depth; - /* - * don't use read_bitmap_block since it will cache - * the uninitialized bitmap - */ - depth = reiserfs_write_unlock_nested(s); - bh = sb_bread(s, i * s->s_blocksize * 8); - reiserfs_write_lock_nested(s, depth); - if (!bh) { - vfree(bitmap); - return -EIO; - } - memset(bh->b_data, 0, sb_blocksize(sb)); - reiserfs_set_le_bit(0, bh->b_data); - reiserfs_cache_bitmap_metadata(s, bh, bitmap + i); - - set_buffer_uptodate(bh); - mark_buffer_dirty(bh); - depth = reiserfs_write_unlock_nested(s); - sync_dirty_buffer(bh); - reiserfs_write_lock_nested(s, depth); - /* update bitmap_info stuff */ - bitmap[i].free_count = sb_blocksize(sb) * 8 - 1; - brelse(bh); - } - /* free old bitmap blocks array */ - SB_AP_BITMAP(s) = bitmap; - vfree(old_bitmap); - } - - /* - * begin transaction, if there was an error, it's fine. Yes, we have - * incorrect bitmaps now, but none of it is ever going to touch the - * disk anyway. - */ - err = journal_begin(&th, s, 10); - if (err) - return err; - - /* Extend old last bitmap block - new blocks have been made available */ - info = SB_AP_BITMAP(s) + bmap_nr - 1; - bh = reiserfs_read_bitmap_block(s, bmap_nr - 1); - if (!bh) { - int jerr = journal_end(&th); - if (jerr) - return jerr; - return -EIO; - } - - reiserfs_prepare_for_journal(s, bh, 1); - for (i = block_r; i < s->s_blocksize * 8; i++) - reiserfs_clear_le_bit(i, bh->b_data); - info->free_count += s->s_blocksize * 8 - block_r; - - journal_mark_dirty(&th, bh); - brelse(bh); - - /* Correct new last bitmap block - It may not be full */ - info = SB_AP_BITMAP(s) + bmap_nr_new - 1; - bh = reiserfs_read_bitmap_block(s, bmap_nr_new - 1); - if (!bh) { - int jerr = journal_end(&th); - if (jerr) - return jerr; - return -EIO; - } - - reiserfs_prepare_for_journal(s, bh, 1); - for (i = block_r_new; i < s->s_blocksize * 8; i++) - reiserfs_set_le_bit(i, bh->b_data); - journal_mark_dirty(&th, bh); - brelse(bh); - - info->free_count -= s->s_blocksize * 8 - block_r_new; - /* update super */ - reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); - free_blocks = SB_FREE_BLOCKS(s); - PUT_SB_FREE_BLOCKS(s, - free_blocks + (block_count_new - block_count - - (bmap_nr_new - bmap_nr))); - PUT_SB_BLOCK_COUNT(s, block_count_new); - PUT_SB_BMAP_NR(s, bmap_would_wrap(bmap_nr_new) ? : bmap_nr_new); - - journal_mark_dirty(&th, SB_BUFFER_WITH_SB(s)); - - SB_JOURNAL(s)->j_must_wait = 1; - return journal_end(&th); -} diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c deleted file mode 100644 index 5faf702f8d15..000000000000 --- a/fs/reiserfs/stree.c +++ /dev/null @@ -1,2280 +0,0 @@ -/* - * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README - */ - -/* - * Written by Anatoly P. Pinchuk pap@namesys.botik.ru - * Programm System Institute - * Pereslavl-Zalessky Russia - */ - -#include -#include -#include -#include -#include "reiserfs.h" -#include -#include - -/* Does the buffer contain a disk block which is in the tree. */ -inline int B_IS_IN_TREE(const struct buffer_head *bh) -{ - - RFALSE(B_LEVEL(bh) > MAX_HEIGHT, - "PAP-1010: block (%b) has too big level (%z)", bh, bh); - - return (B_LEVEL(bh) != FREE_LEVEL); -} - -/* to get item head in le form */ -inline void copy_item_head(struct item_head *to, - const struct item_head *from) -{ - memcpy(to, from, IH_SIZE); -} - -/* - * k1 is pointer to on-disk structure which is stored in little-endian - * form. k2 is pointer to cpu variable. For key of items of the same - * object this returns 0. - * Returns: -1 if key1 < key2 - * 0 if key1 == key2 - * 1 if key1 > key2 - */ -inline int comp_short_keys(const struct reiserfs_key *le_key, - const struct cpu_key *cpu_key) -{ - __u32 n; - n = le32_to_cpu(le_key->k_dir_id); - if (n < cpu_key->on_disk_key.k_dir_id) - return -1; - if (n > cpu_key->on_disk_key.k_dir_id) - return 1; - n = le32_to_cpu(le_key->k_objectid); - if (n < cpu_key->on_disk_key.k_objectid) - return -1; - if (n > cpu_key->on_disk_key.k_objectid) - return 1; - return 0; -} - -/* - * k1 is pointer to on-disk structure which is stored in little-endian - * form. k2 is pointer to cpu variable. - * Compare keys using all 4 key fields. - * Returns: -1 if key1 < key2 0 - * if key1 = key2 1 if key1 > key2 - */ -static inline int comp_keys(const struct reiserfs_key *le_key, - const struct cpu_key *cpu_key) -{ - int retval; - - retval = comp_short_keys(le_key, cpu_key); - if (retval) - return retval; - if (le_key_k_offset(le_key_version(le_key), le_key) < - cpu_key_k_offset(cpu_key)) - return -1; - if (le_key_k_offset(le_key_version(le_key), le_key) > - cpu_key_k_offset(cpu_key)) - return 1; - - if (cpu_key->key_length == 3) - return 0; - - /* this part is needed only when tail conversion is in progress */ - if (le_key_k_type(le_key_version(le_key), le_key) < - cpu_key_k_type(cpu_key)) - return -1; - - if (le_key_k_type(le_key_version(le_key), le_key) > - cpu_key_k_type(cpu_key)) - return 1; - - return 0; -} - -inline int comp_short_le_keys(const struct reiserfs_key *key1, - const struct reiserfs_key *key2) -{ - __u32 *k1_u32, *k2_u32; - int key_length = REISERFS_SHORT_KEY_LEN; - - k1_u32 = (__u32 *) key1; - k2_u32 = (__u32 *) key2; - for (; key_length--; ++k1_u32, ++k2_u32) { - if (le32_to_cpu(*k1_u32) < le32_to_cpu(*k2_u32)) - return -1; - if (le32_to_cpu(*k1_u32) > le32_to_cpu(*k2_u32)) - return 1; - } - return 0; -} - -inline void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from) -{ - int version; - to->on_disk_key.k_dir_id = le32_to_cpu(from->k_dir_id); - to->on_disk_key.k_objectid = le32_to_cpu(from->k_objectid); - - /* find out version of the key */ - version = le_key_version(from); - to->version = version; - to->on_disk_key.k_offset = le_key_k_offset(version, from); - to->on_disk_key.k_type = le_key_k_type(version, from); -} - -/* - * this does not say which one is bigger, it only returns 1 if keys - * are not equal, 0 otherwise - */ -inline int comp_le_keys(const struct reiserfs_key *k1, - const struct reiserfs_key *k2) -{ - return memcmp(k1, k2, sizeof(struct reiserfs_key)); -} - -/************************************************************************** - * Binary search toolkit function * - * Search for an item in the array by the item key * - * Returns: 1 if found, 0 if not found; * - * *pos = number of the searched element if found, else the * - * number of the first element that is larger than key. * - **************************************************************************/ -/* - * For those not familiar with binary search: lbound is the leftmost item - * that it could be, rbound the rightmost item that it could be. We examine - * the item halfway between lbound and rbound, and that tells us either - * that we can increase lbound, or decrease rbound, or that we have found it, - * or if lbound <= rbound that there are no possible items, and we have not - * found it. With each examination we cut the number of possible items it - * could be by one more than half rounded down, or we find it. - */ -static inline int bin_search(const void *key, /* Key to search for. */ - const void *base, /* First item in the array. */ - int num, /* Number of items in the array. */ - /* - * Item size in the array. searched. Lest the - * reader be confused, note that this is crafted - * as a general function, and when it is applied - * specifically to the array of item headers in a - * node, width is actually the item header size - * not the item size. - */ - int width, - int *pos /* Number of the searched for element. */ - ) -{ - int rbound, lbound, j; - - for (j = ((rbound = num - 1) + (lbound = 0)) / 2; - lbound <= rbound; j = (rbound + lbound) / 2) - switch (comp_keys - ((struct reiserfs_key *)((char *)base + j * width), - (struct cpu_key *)key)) { - case -1: - lbound = j + 1; - continue; - case 1: - rbound = j - 1; - continue; - case 0: - *pos = j; - return ITEM_FOUND; /* Key found in the array. */ - } - - /* - * bin_search did not find given key, it returns position of key, - * that is minimal and greater than the given one. - */ - *pos = lbound; - return ITEM_NOT_FOUND; -} - - -/* Minimal possible key. It is never in the tree. */ -const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} }; - -/* Maximal possible key. It is never in the tree. */ -static const struct reiserfs_key MAX_KEY = { - cpu_to_le32(0xffffffff), - cpu_to_le32(0xffffffff), - {{cpu_to_le32(0xffffffff), - cpu_to_le32(0xffffffff)},} -}; - -/* - * Get delimiting key of the buffer by looking for it in the buffers in the - * path, starting from the bottom of the path, and going upwards. We must - * check the path's validity at each step. If the key is not in the path, - * there is no delimiting key in the tree (buffer is first or last buffer - * in tree), and in this case we return a special key, either MIN_KEY or - * MAX_KEY. - */ -static inline const struct reiserfs_key *get_lkey(const struct treepath *chk_path, - const struct super_block *sb) -{ - int position, path_offset = chk_path->path_length; - struct buffer_head *parent; - - RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET, - "PAP-5010: invalid offset in the path"); - - /* While not higher in path than first element. */ - while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) { - - RFALSE(!buffer_uptodate - (PATH_OFFSET_PBUFFER(chk_path, path_offset)), - "PAP-5020: parent is not uptodate"); - - /* Parent at the path is not in the tree now. */ - if (!B_IS_IN_TREE - (parent = - PATH_OFFSET_PBUFFER(chk_path, path_offset))) - return &MAX_KEY; - /* Check whether position in the parent is correct. */ - if ((position = - PATH_OFFSET_POSITION(chk_path, - path_offset)) > - B_NR_ITEMS(parent)) - return &MAX_KEY; - /* Check whether parent at the path really points to the child. */ - if (B_N_CHILD_NUM(parent, position) != - PATH_OFFSET_PBUFFER(chk_path, - path_offset + 1)->b_blocknr) - return &MAX_KEY; - /* - * Return delimiting key if position in the parent - * is not equal to zero. - */ - if (position) - return internal_key(parent, position - 1); - } - /* Return MIN_KEY if we are in the root of the buffer tree. */ - if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)-> - b_blocknr == SB_ROOT_BLOCK(sb)) - return &MIN_KEY; - return &MAX_KEY; -} - -/* Get delimiting key of the buffer at the path and its right neighbor. */ -inline const struct reiserfs_key *get_rkey(const struct treepath *chk_path, - const struct super_block *sb) -{ - int position, path_offset = chk_path->path_length; - struct buffer_head *parent; - - RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET, - "PAP-5030: invalid offset in the path"); - - while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) { - - RFALSE(!buffer_uptodate - (PATH_OFFSET_PBUFFER(chk_path, path_offset)), - "PAP-5040: parent is not uptodate"); - - /* Parent at the path is not in the tree now. */ - if (!B_IS_IN_TREE - (parent = - PATH_OFFSET_PBUFFER(chk_path, path_offset))) - return &MIN_KEY; - /* Check whether position in the parent is correct. */ - if ((position = - PATH_OFFSET_POSITION(chk_path, - path_offset)) > - B_NR_ITEMS(parent)) - return &MIN_KEY; - /* - * Check whether parent at the path really points - * to the child. - */ - if (B_N_CHILD_NUM(parent, position) != - PATH_OFFSET_PBUFFER(chk_path, - path_offset + 1)->b_blocknr) - return &MIN_KEY; - - /* - * Return delimiting key if position in the parent - * is not the last one. - */ - if (position != B_NR_ITEMS(parent)) - return internal_key(parent, position); - } - - /* Return MAX_KEY if we are in the root of the buffer tree. */ - if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)-> - b_blocknr == SB_ROOT_BLOCK(sb)) - return &MAX_KEY; - return &MIN_KEY; -} - -/* - * Check whether a key is contained in the tree rooted from a buffer at a path. - * This works by looking at the left and right delimiting keys for the buffer - * in the last path_element in the path. These delimiting keys are stored - * at least one level above that buffer in the tree. If the buffer is the - * first or last node in the tree order then one of the delimiting keys may - * be absent, and in this case get_lkey and get_rkey return a special key - * which is MIN_KEY or MAX_KEY. - */ -static inline int key_in_buffer( - /* Path which should be checked. */ - struct treepath *chk_path, - /* Key which should be checked. */ - const struct cpu_key *key, - struct super_block *sb - ) -{ - - RFALSE(!key || chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET - || chk_path->path_length > MAX_HEIGHT, - "PAP-5050: pointer to the key(%p) is NULL or invalid path length(%d)", - key, chk_path->path_length); - RFALSE(!PATH_PLAST_BUFFER(chk_path)->b_bdev, - "PAP-5060: device must not be NODEV"); - - if (comp_keys(get_lkey(chk_path, sb), key) == 1) - /* left delimiting key is bigger, that the key we look for */ - return 0; - /* if ( comp_keys(key, get_rkey(chk_path, sb)) != -1 ) */ - if (comp_keys(get_rkey(chk_path, sb), key) != 1) - /* key must be less than right delimitiing key */ - return 0; - return 1; -} - -int reiserfs_check_path(struct treepath *p) -{ - RFALSE(p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET, - "path not properly relsed"); - return 0; -} - -/* - * Drop the reference to each buffer in a path and restore - * dirty bits clean when preparing the buffer for the log. - * This version should only be called from fix_nodes() - */ -void pathrelse_and_restore(struct super_block *sb, - struct treepath *search_path) -{ - int path_offset = search_path->path_length; - - RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET, - "clm-4000: invalid path offset"); - - while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) { - struct buffer_head *bh; - bh = PATH_OFFSET_PBUFFER(search_path, path_offset--); - reiserfs_restore_prepared_buffer(sb, bh); - brelse(bh); - } - search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET; -} - -/* Drop the reference to each buffer in a path */ -void pathrelse(struct treepath *search_path) -{ - int path_offset = search_path->path_length; - - RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET, - "PAP-5090: invalid path offset"); - - while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) - brelse(PATH_OFFSET_PBUFFER(search_path, path_offset--)); - - search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET; -} - -static int has_valid_deh_location(struct buffer_head *bh, struct item_head *ih) -{ - struct reiserfs_de_head *deh; - int i; - - deh = B_I_DEH(bh, ih); - for (i = 0; i < ih_entry_count(ih); i++) { - if (deh_location(&deh[i]) > ih_item_len(ih)) { - reiserfs_warning(NULL, "reiserfs-5094", - "directory entry location seems wrong %h", - &deh[i]); - return 0; - } - } - - return 1; -} - -static int is_leaf(char *buf, int blocksize, struct buffer_head *bh) -{ - struct block_head *blkh; - struct item_head *ih; - int used_space; - int prev_location; - int i; - int nr; - - blkh = (struct block_head *)buf; - if (blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) { - reiserfs_warning(NULL, "reiserfs-5080", - "this should be caught earlier"); - return 0; - } - - nr = blkh_nr_item(blkh); - if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) { - /* item number is too big or too small */ - reiserfs_warning(NULL, "reiserfs-5081", - "nr_item seems wrong: %z", bh); - return 0; - } - ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1; - used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih)); - - /* free space does not match to calculated amount of use space */ - if (used_space != blocksize - blkh_free_space(blkh)) { - reiserfs_warning(NULL, "reiserfs-5082", - "free space seems wrong: %z", bh); - return 0; - } - /* - * FIXME: it is_leaf will hit performance too much - we may have - * return 1 here - */ - - /* check tables of item heads */ - ih = (struct item_head *)(buf + BLKH_SIZE); - prev_location = blocksize; - for (i = 0; i < nr; i++, ih++) { - if (le_ih_k_type(ih) == TYPE_ANY) { - reiserfs_warning(NULL, "reiserfs-5083", - "wrong item type for item %h", - ih); - return 0; - } - if (ih_location(ih) >= blocksize - || ih_location(ih) < IH_SIZE * nr) { - reiserfs_warning(NULL, "reiserfs-5084", - "item location seems wrong: %h", - ih); - return 0; - } - if (ih_item_len(ih) < 1 - || ih_item_len(ih) > MAX_ITEM_LEN(blocksize)) { - reiserfs_warning(NULL, "reiserfs-5085", - "item length seems wrong: %h", - ih); - return 0; - } - if (prev_location - ih_location(ih) != ih_item_len(ih)) { - reiserfs_warning(NULL, "reiserfs-5086", - "item location seems wrong " - "(second one): %h", ih); - return 0; - } - if (is_direntry_le_ih(ih)) { - if (ih_item_len(ih) < (ih_entry_count(ih) * IH_SIZE)) { - reiserfs_warning(NULL, "reiserfs-5093", - "item entry count seems wrong %h", - ih); - return 0; - } - return has_valid_deh_location(bh, ih); - } - prev_location = ih_location(ih); - } - - /* one may imagine many more checks */ - return 1; -} - -/* returns 1 if buf looks like an internal node, 0 otherwise */ -static int is_internal(char *buf, int blocksize, struct buffer_head *bh) -{ - struct block_head *blkh; - int nr; - int used_space; - - blkh = (struct block_head *)buf; - nr = blkh_level(blkh); - if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) { - /* this level is not possible for internal nodes */ - reiserfs_warning(NULL, "reiserfs-5087", - "this should be caught earlier"); - return 0; - } - - nr = blkh_nr_item(blkh); - /* for internal which is not root we might check min number of keys */ - if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) { - reiserfs_warning(NULL, "reiserfs-5088", - "number of key seems wrong: %z", bh); - return 0; - } - - used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1); - if (used_space != blocksize - blkh_free_space(blkh)) { - reiserfs_warning(NULL, "reiserfs-5089", - "free space seems wrong: %z", bh); - return 0; - } - - /* one may imagine many more checks */ - return 1; -} - -/* - * make sure that bh contains formatted node of reiserfs tree of - * 'level'-th level - */ -static int is_tree_node(struct buffer_head *bh, int level) -{ - if (B_LEVEL(bh) != level) { - reiserfs_warning(NULL, "reiserfs-5090", "node level %d does " - "not match to the expected one %d", - B_LEVEL(bh), level); - return 0; - } - if (level == DISK_LEAF_NODE_LEVEL) - return is_leaf(bh->b_data, bh->b_size, bh); - - return is_internal(bh->b_data, bh->b_size, bh); -} - -#define SEARCH_BY_KEY_READA 16 - -/* - * The function is NOT SCHEDULE-SAFE! - * It might unlock the write lock if we needed to wait for a block - * to be read. Note that in this case it won't recover the lock to avoid - * high contention resulting from too much lock requests, especially - * the caller (search_by_key) will perform other schedule-unsafe - * operations just after calling this function. - * - * @return depth of lock to be restored after read completes - */ -static int search_by_key_reada(struct super_block *s, - struct buffer_head **bh, - b_blocknr_t *b, int num) -{ - int i, j; - int depth = -1; - - for (i = 0; i < num; i++) { - bh[i] = sb_getblk(s, b[i]); - } - /* - * We are going to read some blocks on which we - * have a reference. It's safe, though we might be - * reading blocks concurrently changed if we release - * the lock. But it's still fine because we check later - * if the tree changed - */ - for (j = 0; j < i; j++) { - /* - * note, this needs attention if we are getting rid of the BKL - * you have to make sure the prepared bit isn't set on this - * buffer - */ - if (!buffer_uptodate(bh[j])) { - if (depth == -1) - depth = reiserfs_write_unlock_nested(s); - bh_readahead(bh[j], REQ_RAHEAD); - } - brelse(bh[j]); - } - return depth; -} - -/* - * This function fills up the path from the root to the leaf as it - * descends the tree looking for the key. It uses reiserfs_bread to - * try to find buffers in the cache given their block number. If it - * does not find them in the cache it reads them from disk. For each - * node search_by_key finds using reiserfs_bread it then uses - * bin_search to look through that node. bin_search will find the - * position of the block_number of the next node if it is looking - * through an internal node. If it is looking through a leaf node - * bin_search will find the position of the item which has key either - * equal to given key, or which is the maximal key less than the given - * key. search_by_key returns a path that must be checked for the - * correctness of the top of the path but need not be checked for the - * correctness of the bottom of the path - */ -/* - * search_by_key - search for key (and item) in stree - * @sb: superblock - * @key: pointer to key to search for - * @search_path: Allocated and initialized struct treepath; Returned filled - * on success. - * @stop_level: How far down the tree to search, Use DISK_LEAF_NODE_LEVEL to - * stop at leaf level. - * - * The function is NOT SCHEDULE-SAFE! - */ -int search_by_key(struct super_block *sb, const struct cpu_key *key, - struct treepath *search_path, int stop_level) -{ - b_blocknr_t block_number; - int expected_level; - struct buffer_head *bh; - struct path_element *last_element; - int node_level, retval; - int fs_gen; - struct buffer_head *reada_bh[SEARCH_BY_KEY_READA]; - b_blocknr_t reada_blocks[SEARCH_BY_KEY_READA]; - int reada_count = 0; - -#ifdef CONFIG_REISERFS_CHECK - int repeat_counter = 0; -#endif - - PROC_INFO_INC(sb, search_by_key); - - /* - * As we add each node to a path we increase its count. This means - * that we must be careful to release all nodes in a path before we - * either discard the path struct or re-use the path struct, as we - * do here. - */ - - pathrelse(search_path); - - /* - * With each iteration of this loop we search through the items in the - * current node, and calculate the next current node(next path element) - * for the next iteration of this loop.. - */ - block_number = SB_ROOT_BLOCK(sb); - expected_level = -1; - while (1) { - -#ifdef CONFIG_REISERFS_CHECK - if (!(++repeat_counter % 50000)) - reiserfs_warning(sb, "PAP-5100", - "%s: there were %d iterations of " - "while loop looking for key %K", - current->comm, repeat_counter, - key); -#endif - - /* prep path to have another element added to it. */ - last_element = - PATH_OFFSET_PELEMENT(search_path, - ++search_path->path_length); - fs_gen = get_generation(sb); - - /* - * Read the next tree node, and set the last element - * in the path to have a pointer to it. - */ - if ((bh = last_element->pe_buffer = - sb_getblk(sb, block_number))) { - - /* - * We'll need to drop the lock if we encounter any - * buffers that need to be read. If all of them are - * already up to date, we don't need to drop the lock. - */ - int depth = -1; - - if (!buffer_uptodate(bh) && reada_count > 1) - depth = search_by_key_reada(sb, reada_bh, - reada_blocks, reada_count); - - if (!buffer_uptodate(bh) && depth == -1) - depth = reiserfs_write_unlock_nested(sb); - - bh_read_nowait(bh, 0); - wait_on_buffer(bh); - - if (depth != -1) - reiserfs_write_lock_nested(sb, depth); - if (!buffer_uptodate(bh)) - goto io_error; - } else { -io_error: - search_path->path_length--; - pathrelse(search_path); - return IO_ERROR; - } - reada_count = 0; - if (expected_level == -1) - expected_level = SB_TREE_HEIGHT(sb); - expected_level--; - - /* - * It is possible that schedule occurred. We must check - * whether the key to search is still in the tree rooted - * from the current buffer. If not then repeat search - * from the root. - */ - if (fs_changed(fs_gen, sb) && - (!B_IS_IN_TREE(bh) || - B_LEVEL(bh) != expected_level || - !key_in_buffer(search_path, key, sb))) { - PROC_INFO_INC(sb, search_by_key_fs_changed); - PROC_INFO_INC(sb, search_by_key_restarted); - PROC_INFO_INC(sb, - sbk_restarted[expected_level - 1]); - pathrelse(search_path); - - /* - * Get the root block number so that we can - * repeat the search starting from the root. - */ - block_number = SB_ROOT_BLOCK(sb); - expected_level = -1; - - /* repeat search from the root */ - continue; - } - - /* - * only check that the key is in the buffer if key is not - * equal to the MAX_KEY. Latter case is only possible in - * "finish_unfinished()" processing during mount. - */ - RFALSE(comp_keys(&MAX_KEY, key) && - !key_in_buffer(search_path, key, sb), - "PAP-5130: key is not in the buffer"); -#ifdef CONFIG_REISERFS_CHECK - if (REISERFS_SB(sb)->cur_tb) { - print_cur_tb("5140"); - reiserfs_panic(sb, "PAP-5140", - "schedule occurred in do_balance!"); - } -#endif - - /* - * make sure, that the node contents look like a node of - * certain level - */ - if (!is_tree_node(bh, expected_level)) { - reiserfs_error(sb, "vs-5150", - "invalid format found in block %ld. " - "Fsck?", bh->b_blocknr); - pathrelse(search_path); - return IO_ERROR; - } - - /* ok, we have acquired next formatted node in the tree */ - node_level = B_LEVEL(bh); - - PROC_INFO_BH_STAT(sb, bh, node_level - 1); - - RFALSE(node_level < stop_level, - "vs-5152: tree level (%d) is less than stop level (%d)", - node_level, stop_level); - - retval = bin_search(key, item_head(bh, 0), - B_NR_ITEMS(bh), - (node_level == - DISK_LEAF_NODE_LEVEL) ? IH_SIZE : - KEY_SIZE, - &last_element->pe_position); - if (node_level == stop_level) { - return retval; - } - - /* we are not in the stop level */ - /* - * item has been found, so we choose the pointer which - * is to the right of the found one - */ - if (retval == ITEM_FOUND) - last_element->pe_position++; - - /* - * if item was not found we choose the position which is to - * the left of the found item. This requires no code, - * bin_search did it already. - */ - - /* - * So we have chosen a position in the current node which is - * an internal node. Now we calculate child block number by - * position in the node. - */ - block_number = - B_N_CHILD_NUM(bh, last_element->pe_position); - - /* - * if we are going to read leaf nodes, try for read - * ahead as well - */ - if ((search_path->reada & PATH_READA) && - node_level == DISK_LEAF_NODE_LEVEL + 1) { - int pos = last_element->pe_position; - int limit = B_NR_ITEMS(bh); - struct reiserfs_key *le_key; - - if (search_path->reada & PATH_READA_BACK) - limit = 0; - while (reada_count < SEARCH_BY_KEY_READA) { - if (pos == limit) - break; - reada_blocks[reada_count++] = - B_N_CHILD_NUM(bh, pos); - if (search_path->reada & PATH_READA_BACK) - pos--; - else - pos++; - - /* - * check to make sure we're in the same object - */ - le_key = internal_key(bh, pos); - if (le32_to_cpu(le_key->k_objectid) != - key->on_disk_key.k_objectid) { - break; - } - } - } - } -} - -/* - * Form the path to an item and position in this item which contains - * file byte defined by key. If there is no such item - * corresponding to the key, we point the path to the item with - * maximal key less than key, and *pos_in_item is set to one - * past the last entry/byte in the item. If searching for entry in a - * directory item, and it is not found, *pos_in_item is set to one - * entry more than the entry with maximal key which is less than the - * sought key. - * - * Note that if there is no entry in this same node which is one more, - * then we point to an imaginary entry. for direct items, the - * position is in units of bytes, for indirect items the position is - * in units of blocknr entries, for directory items the position is in - * units of directory entries. - */ -/* The function is NOT SCHEDULE-SAFE! */ -int search_for_position_by_key(struct super_block *sb, - /* Key to search (cpu variable) */ - const struct cpu_key *p_cpu_key, - /* Filled up by this function. */ - struct treepath *search_path) -{ - struct item_head *p_le_ih; /* pointer to on-disk structure */ - int blk_size; - loff_t item_offset, offset; - struct reiserfs_dir_entry de; - int retval; - - /* If searching for directory entry. */ - if (is_direntry_cpu_key(p_cpu_key)) - return search_by_entry_key(sb, p_cpu_key, search_path, - &de); - - /* If not searching for directory entry. */ - - /* If item is found. */ - retval = search_item(sb, p_cpu_key, search_path); - if (retval == IO_ERROR) - return retval; - if (retval == ITEM_FOUND) { - - RFALSE(!ih_item_len - (item_head - (PATH_PLAST_BUFFER(search_path), - PATH_LAST_POSITION(search_path))), - "PAP-5165: item length equals zero"); - - pos_in_item(search_path) = 0; - return POSITION_FOUND; - } - - RFALSE(!PATH_LAST_POSITION(search_path), - "PAP-5170: position equals zero"); - - /* Item is not found. Set path to the previous item. */ - p_le_ih = - item_head(PATH_PLAST_BUFFER(search_path), - --PATH_LAST_POSITION(search_path)); - blk_size = sb->s_blocksize; - - if (comp_short_keys(&p_le_ih->ih_key, p_cpu_key)) - return FILE_NOT_FOUND; - - /* FIXME: quite ugly this far */ - - item_offset = le_ih_k_offset(p_le_ih); - offset = cpu_key_k_offset(p_cpu_key); - - /* Needed byte is contained in the item pointed to by the path. */ - if (item_offset <= offset && - item_offset + op_bytes_number(p_le_ih, blk_size) > offset) { - pos_in_item(search_path) = offset - item_offset; - if (is_indirect_le_ih(p_le_ih)) { - pos_in_item(search_path) /= blk_size; - } - return POSITION_FOUND; - } - - /* - * Needed byte is not contained in the item pointed to by the - * path. Set pos_in_item out of the item. - */ - if (is_indirect_le_ih(p_le_ih)) - pos_in_item(search_path) = - ih_item_len(p_le_ih) / UNFM_P_SIZE; - else - pos_in_item(search_path) = ih_item_len(p_le_ih); - - return POSITION_NOT_FOUND; -} - -/* Compare given item and item pointed to by the path. */ -int comp_items(const struct item_head *stored_ih, const struct treepath *path) -{ - struct buffer_head *bh = PATH_PLAST_BUFFER(path); - struct item_head *ih; - - /* Last buffer at the path is not in the tree. */ - if (!B_IS_IN_TREE(bh)) - return 1; - - /* Last path position is invalid. */ - if (PATH_LAST_POSITION(path) >= B_NR_ITEMS(bh)) - return 1; - - /* we need only to know, whether it is the same item */ - ih = tp_item_head(path); - return memcmp(stored_ih, ih, IH_SIZE); -} - -/* prepare for delete or cut of direct item */ -static inline int prepare_for_direct_item(struct treepath *path, - struct item_head *le_ih, - struct inode *inode, - loff_t new_file_length, int *cut_size) -{ - loff_t round_len; - - if (new_file_length == max_reiserfs_offset(inode)) { - /* item has to be deleted */ - *cut_size = -(IH_SIZE + ih_item_len(le_ih)); - return M_DELETE; - } - /* new file gets truncated */ - if (get_inode_item_key_version(inode) == KEY_FORMAT_3_6) { - round_len = ROUND_UP(new_file_length); - /* this was new_file_length < le_ih ... */ - if (round_len < le_ih_k_offset(le_ih)) { - *cut_size = -(IH_SIZE + ih_item_len(le_ih)); - return M_DELETE; /* Delete this item. */ - } - /* Calculate first position and size for cutting from item. */ - pos_in_item(path) = round_len - (le_ih_k_offset(le_ih) - 1); - *cut_size = -(ih_item_len(le_ih) - pos_in_item(path)); - - return M_CUT; /* Cut from this item. */ - } - - /* old file: items may have any length */ - - if (new_file_length < le_ih_k_offset(le_ih)) { - *cut_size = -(IH_SIZE + ih_item_len(le_ih)); - return M_DELETE; /* Delete this item. */ - } - - /* Calculate first position and size for cutting from item. */ - *cut_size = -(ih_item_len(le_ih) - - (pos_in_item(path) = - new_file_length + 1 - le_ih_k_offset(le_ih))); - return M_CUT; /* Cut from this item. */ -} - -static inline int prepare_for_direntry_item(struct treepath *path, - struct item_head *le_ih, - struct inode *inode, - loff_t new_file_length, - int *cut_size) -{ - if (le_ih_k_offset(le_ih) == DOT_OFFSET && - new_file_length == max_reiserfs_offset(inode)) { - RFALSE(ih_entry_count(le_ih) != 2, - "PAP-5220: incorrect empty directory item (%h)", le_ih); - *cut_size = -(IH_SIZE + ih_item_len(le_ih)); - /* Delete the directory item containing "." and ".." entry. */ - return M_DELETE; - } - - if (ih_entry_count(le_ih) == 1) { - /* - * Delete the directory item such as there is one record only - * in this item - */ - *cut_size = -(IH_SIZE + ih_item_len(le_ih)); - return M_DELETE; - } - - /* Cut one record from the directory item. */ - *cut_size = - -(DEH_SIZE + - entry_length(get_last_bh(path), le_ih, pos_in_item(path))); - return M_CUT; -} - -#define JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD (2 * JOURNAL_PER_BALANCE_CNT + 1) - -/* - * If the path points to a directory or direct item, calculate mode - * and the size cut, for balance. - * If the path points to an indirect item, remove some number of its - * unformatted nodes. - * In case of file truncate calculate whether this item must be - * deleted/truncated or last unformatted node of this item will be - * converted to a direct item. - * This function returns a determination of what balance mode the - * calling function should employ. - */ -static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, - struct inode *inode, - struct treepath *path, - const struct cpu_key *item_key, - /* - * Number of unformatted nodes - * which were removed from end - * of the file. - */ - int *removed, - int *cut_size, - /* MAX_KEY_OFFSET in case of delete. */ - unsigned long long new_file_length - ) -{ - struct super_block *sb = inode->i_sb; - struct item_head *p_le_ih = tp_item_head(path); - struct buffer_head *bh = PATH_PLAST_BUFFER(path); - - BUG_ON(!th->t_trans_id); - - /* Stat_data item. */ - if (is_statdata_le_ih(p_le_ih)) { - - RFALSE(new_file_length != max_reiserfs_offset(inode), - "PAP-5210: mode must be M_DELETE"); - - *cut_size = -(IH_SIZE + ih_item_len(p_le_ih)); - return M_DELETE; - } - - /* Directory item. */ - if (is_direntry_le_ih(p_le_ih)) - return prepare_for_direntry_item(path, p_le_ih, inode, - new_file_length, - cut_size); - - /* Direct item. */ - if (is_direct_le_ih(p_le_ih)) - return prepare_for_direct_item(path, p_le_ih, inode, - new_file_length, cut_size); - - /* Case of an indirect item. */ - { - int blk_size = sb->s_blocksize; - struct item_head s_ih; - int need_re_search; - int delete = 0; - int result = M_CUT; - int pos = 0; - - if ( new_file_length == max_reiserfs_offset (inode) ) { - /* - * prepare_for_delete_or_cut() is called by - * reiserfs_delete_item() - */ - new_file_length = 0; - delete = 1; - } - - do { - need_re_search = 0; - *cut_size = 0; - bh = PATH_PLAST_BUFFER(path); - copy_item_head(&s_ih, tp_item_head(path)); - pos = I_UNFM_NUM(&s_ih); - - while (le_ih_k_offset (&s_ih) + (pos - 1) * blk_size > new_file_length) { - __le32 *unfm; - __u32 block; - - /* - * Each unformatted block deletion may involve - * one additional bitmap block into the transaction, - * thereby the initial journal space reservation - * might not be enough. - */ - if (!delete && (*cut_size) != 0 && - reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) - break; - - unfm = (__le32 *)ih_item_body(bh, &s_ih) + pos - 1; - block = get_block_num(unfm, 0); - - if (block != 0) { - reiserfs_prepare_for_journal(sb, bh, 1); - put_block_num(unfm, 0, 0); - journal_mark_dirty(th, bh); - reiserfs_free_block(th, inode, block, 1); - } - - reiserfs_cond_resched(sb); - - if (item_moved (&s_ih, path)) { - need_re_search = 1; - break; - } - - pos --; - (*removed)++; - (*cut_size) -= UNFM_P_SIZE; - - if (pos == 0) { - (*cut_size) -= IH_SIZE; - result = M_DELETE; - break; - } - } - /* - * a trick. If the buffer has been logged, this will - * do nothing. If we've broken the loop without logging - * it, it will restore the buffer - */ - reiserfs_restore_prepared_buffer(sb, bh); - } while (need_re_search && - search_for_position_by_key(sb, item_key, path) == POSITION_FOUND); - pos_in_item(path) = pos * UNFM_P_SIZE; - - if (*cut_size == 0) { - /* - * Nothing was cut. maybe convert last unformatted node to the - * direct item? - */ - result = M_CONVERT; - } - return result; - } -} - -/* Calculate number of bytes which will be deleted or cut during balance */ -static int calc_deleted_bytes_number(struct tree_balance *tb, char mode) -{ - int del_size; - struct item_head *p_le_ih = tp_item_head(tb->tb_path); - - if (is_statdata_le_ih(p_le_ih)) - return 0; - - del_size = - (mode == - M_DELETE) ? ih_item_len(p_le_ih) : -tb->insert_size[0]; - if (is_direntry_le_ih(p_le_ih)) { - /* - * return EMPTY_DIR_SIZE; We delete emty directories only. - * we can't use EMPTY_DIR_SIZE, as old format dirs have a - * different empty size. ick. FIXME, is this right? - */ - return del_size; - } - - if (is_indirect_le_ih(p_le_ih)) - del_size = (del_size / UNFM_P_SIZE) * - (PATH_PLAST_BUFFER(tb->tb_path)->b_size); - return del_size; -} - -static void init_tb_struct(struct reiserfs_transaction_handle *th, - struct tree_balance *tb, - struct super_block *sb, - struct treepath *path, int size) -{ - - BUG_ON(!th->t_trans_id); - - memset(tb, '\0', sizeof(struct tree_balance)); - tb->transaction_handle = th; - tb->tb_sb = sb; - tb->tb_path = path; - PATH_OFFSET_PBUFFER(path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL; - PATH_OFFSET_POSITION(path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0; - tb->insert_size[0] = size; -} - -void padd_item(char *item, int total_length, int length) -{ - int i; - - for (i = total_length; i > length;) - item[--i] = 0; -} - -#ifdef REISERQUOTA_DEBUG -char key2type(struct reiserfs_key *ih) -{ - if (is_direntry_le_key(2, ih)) - return 'd'; - if (is_direct_le_key(2, ih)) - return 'D'; - if (is_indirect_le_key(2, ih)) - return 'i'; - if (is_statdata_le_key(2, ih)) - return 's'; - return 'u'; -} - -char head2type(struct item_head *ih) -{ - if (is_direntry_le_ih(ih)) - return 'd'; - if (is_direct_le_ih(ih)) - return 'D'; - if (is_indirect_le_ih(ih)) - return 'i'; - if (is_statdata_le_ih(ih)) - return 's'; - return 'u'; -} -#endif - -/* - * Delete object item. - * th - active transaction handle - * path - path to the deleted item - * item_key - key to search for the deleted item - * indode - used for updating i_blocks and quotas - * un_bh - NULL or unformatted node pointer - */ -int reiserfs_delete_item(struct reiserfs_transaction_handle *th, - struct treepath *path, const struct cpu_key *item_key, - struct inode *inode, struct buffer_head *un_bh) -{ - struct super_block *sb = inode->i_sb; - struct tree_balance s_del_balance; - struct item_head s_ih; - struct item_head *q_ih; - int quota_cut_bytes; - int ret_value, del_size, removed; - int depth; - -#ifdef CONFIG_REISERFS_CHECK - char mode; -#endif - - BUG_ON(!th->t_trans_id); - - init_tb_struct(th, &s_del_balance, sb, path, - 0 /*size is unknown */ ); - - while (1) { - removed = 0; - -#ifdef CONFIG_REISERFS_CHECK - mode = -#endif - prepare_for_delete_or_cut(th, inode, path, - item_key, &removed, - &del_size, - max_reiserfs_offset(inode)); - - RFALSE(mode != M_DELETE, "PAP-5320: mode must be M_DELETE"); - - copy_item_head(&s_ih, tp_item_head(path)); - s_del_balance.insert_size[0] = del_size; - - ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL); - if (ret_value != REPEAT_SEARCH) - break; - - PROC_INFO_INC(sb, delete_item_restarted); - - /* file system changed, repeat search */ - ret_value = - search_for_position_by_key(sb, item_key, path); - if (ret_value == IO_ERROR) - break; - if (ret_value == FILE_NOT_FOUND) { - reiserfs_warning(sb, "vs-5340", - "no items of the file %K found", - item_key); - break; - } - } /* while (1) */ - - if (ret_value != CARRY_ON) { - unfix_nodes(&s_del_balance); - return 0; - } - - /* reiserfs_delete_item returns item length when success */ - ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE); - q_ih = tp_item_head(path); - quota_cut_bytes = ih_item_len(q_ih); - - /* - * hack so the quota code doesn't have to guess if the file has a - * tail. On tail insert, we allocate quota for 1 unformatted node. - * We test the offset because the tail might have been - * split into multiple items, and we only want to decrement for - * the unfm node once - */ - if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(q_ih)) { - if ((le_ih_k_offset(q_ih) & (sb->s_blocksize - 1)) == 1) { - quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE; - } else { - quota_cut_bytes = 0; - } - } - - if (un_bh) { - int off; - char *data; - - /* - * We are in direct2indirect conversion, so move tail contents - * to the unformatted node - */ - /* - * note, we do the copy before preparing the buffer because we - * don't care about the contents of the unformatted node yet. - * the only thing we really care about is the direct item's - * data is in the unformatted node. - * - * Otherwise, we would have to call - * reiserfs_prepare_for_journal on the unformatted node, - * which might schedule, meaning we'd have to loop all the - * way back up to the start of the while loop. - * - * The unformatted node must be dirtied later on. We can't be - * sure here if the entire tail has been deleted yet. - * - * un_bh is from the page cache (all unformatted nodes are - * from the page cache) and might be a highmem page. So, we - * can't use un_bh->b_data. - * -clm - */ - - data = kmap_atomic(un_bh->b_page); - off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_SIZE - 1)); - memcpy(data + off, - ih_item_body(PATH_PLAST_BUFFER(path), &s_ih), - ret_value); - kunmap_atomic(data); - } - - /* Perform balancing after all resources have been collected at once. */ - do_balance(&s_del_balance, NULL, NULL, M_DELETE); - -#ifdef REISERQUOTA_DEBUG - reiserfs_debug(sb, REISERFS_DEBUG_CODE, - "reiserquota delete_item(): freeing %u, id=%u type=%c", - quota_cut_bytes, inode->i_uid, head2type(&s_ih)); -#endif - depth = reiserfs_write_unlock_nested(inode->i_sb); - dquot_free_space_nodirty(inode, quota_cut_bytes); - reiserfs_write_lock_nested(inode->i_sb, depth); - - /* Return deleted body length */ - return ret_value; -} - -/* - * Summary Of Mechanisms For Handling Collisions Between Processes: - * - * deletion of the body of the object is performed by iput(), with the - * result that if multiple processes are operating on a file, the - * deletion of the body of the file is deferred until the last process - * that has an open inode performs its iput(). - * - * writes and truncates are protected from collisions by use of - * semaphores. - * - * creates, linking, and mknod are protected from collisions with other - * processes by making the reiserfs_add_entry() the last step in the - * creation, and then rolling back all changes if there was a collision. - * - Hans -*/ - -/* this deletes item which never gets split */ -void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th, - struct inode *inode, struct reiserfs_key *key) -{ - struct super_block *sb = th->t_super; - struct tree_balance tb; - INITIALIZE_PATH(path); - int item_len = 0; - int tb_init = 0; - struct cpu_key cpu_key = {}; - int retval; - int quota_cut_bytes = 0; - - BUG_ON(!th->t_trans_id); - - le_key2cpu_key(&cpu_key, key); - - while (1) { - retval = search_item(th->t_super, &cpu_key, &path); - if (retval == IO_ERROR) { - reiserfs_error(th->t_super, "vs-5350", - "i/o failure occurred trying " - "to delete %K", &cpu_key); - break; - } - if (retval != ITEM_FOUND) { - pathrelse(&path); - /* - * No need for a warning, if there is just no free - * space to insert '..' item into the - * newly-created subdir - */ - if (! - ((unsigned long long) - GET_HASH_VALUE(le_key_k_offset - (le_key_version(key), key)) == 0 - && (unsigned long long) - GET_GENERATION_NUMBER(le_key_k_offset - (le_key_version(key), - key)) == 1)) - reiserfs_warning(th->t_super, "vs-5355", - "%k not found", key); - break; - } - if (!tb_init) { - tb_init = 1; - item_len = ih_item_len(tp_item_head(&path)); - init_tb_struct(th, &tb, th->t_super, &path, - -(IH_SIZE + item_len)); - } - quota_cut_bytes = ih_item_len(tp_item_head(&path)); - - retval = fix_nodes(M_DELETE, &tb, NULL, NULL); - if (retval == REPEAT_SEARCH) { - PROC_INFO_INC(th->t_super, delete_solid_item_restarted); - continue; - } - - if (retval == CARRY_ON) { - do_balance(&tb, NULL, NULL, M_DELETE); - /* - * Should we count quota for item? (we don't - * count quotas for save-links) - */ - if (inode) { - int depth; -#ifdef REISERQUOTA_DEBUG - reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE, - "reiserquota delete_solid_item(): freeing %u id=%u type=%c", - quota_cut_bytes, inode->i_uid, - key2type(key)); -#endif - depth = reiserfs_write_unlock_nested(sb); - dquot_free_space_nodirty(inode, - quota_cut_bytes); - reiserfs_write_lock_nested(sb, depth); - } - break; - } - - /* IO_ERROR, NO_DISK_SPACE, etc */ - reiserfs_warning(th->t_super, "vs-5360", - "could not delete %K due to fix_nodes failure", - &cpu_key); - unfix_nodes(&tb); - break; - } - - reiserfs_check_path(&path); -} - -int reiserfs_delete_object(struct reiserfs_transaction_handle *th, - struct inode *inode) -{ - int err; - inode->i_size = 0; - BUG_ON(!th->t_trans_id); - - /* for directory this deletes item containing "." and ".." */ - err = - reiserfs_do_truncate(th, inode, NULL, 0 /*no timestamp updates */ ); - if (err) - return err; - -#if defined( USE_INODE_GENERATION_COUNTER ) - if (!old_format_only(th->t_super)) { - __le32 *inode_generation; - - inode_generation = - &REISERFS_SB(th->t_super)->s_rs->s_inode_generation; - le32_add_cpu(inode_generation, 1); - } -/* USE_INODE_GENERATION_COUNTER */ -#endif - reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode)); - - return err; -} - -static void unmap_buffers(struct page *page, loff_t pos) -{ - struct buffer_head *bh; - struct buffer_head *head; - struct buffer_head *next; - unsigned long tail_index; - unsigned long cur_index; - - if (page) { - if (page_has_buffers(page)) { - tail_index = pos & (PAGE_SIZE - 1); - cur_index = 0; - head = page_buffers(page); - bh = head; - do { - next = bh->b_this_page; - - /* - * we want to unmap the buffers that contain - * the tail, and all the buffers after it - * (since the tail must be at the end of the - * file). We don't want to unmap file data - * before the tail, since it might be dirty - * and waiting to reach disk - */ - cur_index += bh->b_size; - if (cur_index > tail_index) { - reiserfs_unmap_buffer(bh); - } - bh = next; - } while (bh != head); - } - } -} - -static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th, - struct inode *inode, - struct page *page, - struct treepath *path, - const struct cpu_key *item_key, - loff_t new_file_size, char *mode) -{ - struct super_block *sb = inode->i_sb; - int block_size = sb->s_blocksize; - int cut_bytes; - BUG_ON(!th->t_trans_id); - BUG_ON(new_file_size != inode->i_size); - - /* - * the page being sent in could be NULL if there was an i/o error - * reading in the last block. The user will hit problems trying to - * read the file, but for now we just skip the indirect2direct - */ - if (atomic_read(&inode->i_count) > 1 || - !tail_has_to_be_packed(inode) || - !page || (REISERFS_I(inode)->i_flags & i_nopack_mask)) { - /* leave tail in an unformatted node */ - *mode = M_SKIP_BALANCING; - cut_bytes = - block_size - (new_file_size & (block_size - 1)); - pathrelse(path); - return cut_bytes; - } - - /* Perform the conversion to a direct_item. */ - return indirect2direct(th, inode, page, path, item_key, - new_file_size, mode); -} - -/* - * we did indirect_to_direct conversion. And we have inserted direct - * item successesfully, but there were no disk space to cut unfm - * pointer being converted. Therefore we have to delete inserted - * direct item(s) - */ -static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th, - struct inode *inode, struct treepath *path) -{ - struct cpu_key tail_key; - int tail_len; - int removed; - BUG_ON(!th->t_trans_id); - - make_cpu_key(&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4); - tail_key.key_length = 4; - - tail_len = - (cpu_key_k_offset(&tail_key) & (inode->i_sb->s_blocksize - 1)) - 1; - while (tail_len) { - /* look for the last byte of the tail */ - if (search_for_position_by_key(inode->i_sb, &tail_key, path) == - POSITION_NOT_FOUND) - reiserfs_panic(inode->i_sb, "vs-5615", - "found invalid item"); - RFALSE(path->pos_in_item != - ih_item_len(tp_item_head(path)) - 1, - "vs-5616: appended bytes found"); - PATH_LAST_POSITION(path)--; - - removed = - reiserfs_delete_item(th, path, &tail_key, inode, - NULL /*unbh not needed */ ); - RFALSE(removed <= 0 - || removed > tail_len, - "vs-5617: there was tail %d bytes, removed item length %d bytes", - tail_len, removed); - tail_len -= removed; - set_cpu_key_k_offset(&tail_key, - cpu_key_k_offset(&tail_key) - removed); - } - reiserfs_warning(inode->i_sb, "reiserfs-5091", "indirect_to_direct " - "conversion has been rolled back due to " - "lack of disk space"); - mark_inode_dirty(inode); -} - -/* (Truncate or cut entry) or delete object item. Returns < 0 on failure */ -int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th, - struct treepath *path, - struct cpu_key *item_key, - struct inode *inode, - struct page *page, loff_t new_file_size) -{ - struct super_block *sb = inode->i_sb; - /* - * Every function which is going to call do_balance must first - * create a tree_balance structure. Then it must fill up this - * structure by using the init_tb_struct and fix_nodes functions. - * After that we can make tree balancing. - */ - struct tree_balance s_cut_balance; - struct item_head *p_le_ih; - int cut_size = 0; /* Amount to be cut. */ - int ret_value = CARRY_ON; - int removed = 0; /* Number of the removed unformatted nodes. */ - int is_inode_locked = 0; - char mode; /* Mode of the balance. */ - int retval2 = -1; - int quota_cut_bytes; - loff_t tail_pos = 0; - int depth; - - BUG_ON(!th->t_trans_id); - - init_tb_struct(th, &s_cut_balance, inode->i_sb, path, - cut_size); - - /* - * Repeat this loop until we either cut the item without needing - * to balance, or we fix_nodes without schedule occurring - */ - while (1) { - /* - * Determine the balance mode, position of the first byte to - * be cut, and size to be cut. In case of the indirect item - * free unformatted nodes which are pointed to by the cut - * pointers. - */ - - mode = - prepare_for_delete_or_cut(th, inode, path, - item_key, &removed, - &cut_size, new_file_size); - if (mode == M_CONVERT) { - /* - * convert last unformatted node to direct item or - * leave tail in the unformatted node - */ - RFALSE(ret_value != CARRY_ON, - "PAP-5570: can not convert twice"); - - ret_value = - maybe_indirect_to_direct(th, inode, page, - path, item_key, - new_file_size, &mode); - if (mode == M_SKIP_BALANCING) - /* tail has been left in the unformatted node */ - return ret_value; - - is_inode_locked = 1; - - /* - * removing of last unformatted node will - * change value we have to return to truncate. - * Save it - */ - retval2 = ret_value; - - /* - * So, we have performed the first part of the - * conversion: - * inserting the new direct item. Now we are - * removing the last unformatted node pointer. - * Set key to search for it. - */ - set_cpu_key_k_type(item_key, TYPE_INDIRECT); - item_key->key_length = 4; - new_file_size -= - (new_file_size & (sb->s_blocksize - 1)); - tail_pos = new_file_size; - set_cpu_key_k_offset(item_key, new_file_size + 1); - if (search_for_position_by_key - (sb, item_key, - path) == POSITION_NOT_FOUND) { - print_block(PATH_PLAST_BUFFER(path), 3, - PATH_LAST_POSITION(path) - 1, - PATH_LAST_POSITION(path) + 1); - reiserfs_panic(sb, "PAP-5580", "item to " - "convert does not exist (%K)", - item_key); - } - continue; - } - if (cut_size == 0) { - pathrelse(path); - return 0; - } - - s_cut_balance.insert_size[0] = cut_size; - - ret_value = fix_nodes(mode, &s_cut_balance, NULL, NULL); - if (ret_value != REPEAT_SEARCH) - break; - - PROC_INFO_INC(sb, cut_from_item_restarted); - - ret_value = - search_for_position_by_key(sb, item_key, path); - if (ret_value == POSITION_FOUND) - continue; - - reiserfs_warning(sb, "PAP-5610", "item %K not found", - item_key); - unfix_nodes(&s_cut_balance); - return (ret_value == IO_ERROR) ? -EIO : -ENOENT; - } /* while */ - - /* check fix_nodes results (IO_ERROR or NO_DISK_SPACE) */ - if (ret_value != CARRY_ON) { - if (is_inode_locked) { - /* - * FIXME: this seems to be not needed: we are always - * able to cut item - */ - indirect_to_direct_roll_back(th, inode, path); - } - if (ret_value == NO_DISK_SPACE) - reiserfs_warning(sb, "reiserfs-5092", - "NO_DISK_SPACE"); - unfix_nodes(&s_cut_balance); - return -EIO; - } - - /* go ahead and perform balancing */ - - RFALSE(mode == M_PASTE || mode == M_INSERT, "invalid mode"); - - /* Calculate number of bytes that need to be cut from the item. */ - quota_cut_bytes = - (mode == - M_DELETE) ? ih_item_len(tp_item_head(path)) : -s_cut_balance. - insert_size[0]; - if (retval2 == -1) - ret_value = calc_deleted_bytes_number(&s_cut_balance, mode); - else - ret_value = retval2; - - /* - * For direct items, we only change the quota when deleting the last - * item. - */ - p_le_ih = tp_item_head(s_cut_balance.tb_path); - if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_le_ih)) { - if (mode == M_DELETE && - (le_ih_k_offset(p_le_ih) & (sb->s_blocksize - 1)) == - 1) { - /* FIXME: this is to keep 3.5 happy */ - REISERFS_I(inode)->i_first_direct_byte = U32_MAX; - quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE; - } else { - quota_cut_bytes = 0; - } - } -#ifdef CONFIG_REISERFS_CHECK - if (is_inode_locked) { - struct item_head *le_ih = - tp_item_head(s_cut_balance.tb_path); - /* - * we are going to complete indirect2direct conversion. Make - * sure, that we exactly remove last unformatted node pointer - * of the item - */ - if (!is_indirect_le_ih(le_ih)) - reiserfs_panic(sb, "vs-5652", - "item must be indirect %h", le_ih); - - if (mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE) - reiserfs_panic(sb, "vs-5653", "completing " - "indirect2direct conversion indirect " - "item %h being deleted must be of " - "4 byte long", le_ih); - - if (mode == M_CUT - && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) { - reiserfs_panic(sb, "vs-5654", "can not complete " - "indirect2direct conversion of %h " - "(CUT, insert_size==%d)", - le_ih, s_cut_balance.insert_size[0]); - } - /* - * it would be useful to make sure, that right neighboring - * item is direct item of this file - */ - } -#endif - - do_balance(&s_cut_balance, NULL, NULL, mode); - if (is_inode_locked) { - /* - * we've done an indirect->direct conversion. when the - * data block was freed, it was removed from the list of - * blocks that must be flushed before the transaction - * commits, make sure to unmap and invalidate it - */ - unmap_buffers(page, tail_pos); - REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; - } -#ifdef REISERQUOTA_DEBUG - reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE, - "reiserquota cut_from_item(): freeing %u id=%u type=%c", - quota_cut_bytes, inode->i_uid, '?'); -#endif - depth = reiserfs_write_unlock_nested(sb); - dquot_free_space_nodirty(inode, quota_cut_bytes); - reiserfs_write_lock_nested(sb, depth); - return ret_value; -} - -static void truncate_directory(struct reiserfs_transaction_handle *th, - struct inode *inode) -{ - BUG_ON(!th->t_trans_id); - if (inode->i_nlink) - reiserfs_error(inode->i_sb, "vs-5655", "link count != 0"); - - set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), DOT_OFFSET); - set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_DIRENTRY); - reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode)); - reiserfs_update_sd(th, inode); - set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), SD_OFFSET); - set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_STAT_DATA); -} - -/* - * Truncate file to the new size. Note, this must be called with a - * transaction already started - */ -int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, - struct inode *inode, /* ->i_size contains new size */ - struct page *page, /* up to date for last block */ - /* - * when it is called by file_release to convert - * the tail - no timestamps should be updated - */ - int update_timestamps - ) -{ - INITIALIZE_PATH(s_search_path); /* Path to the current object item. */ - struct item_head *p_le_ih; /* Pointer to an item header. */ - - /* Key to search for a previous file item. */ - struct cpu_key s_item_key; - loff_t file_size, /* Old file size. */ - new_file_size; /* New file size. */ - int deleted; /* Number of deleted or truncated bytes. */ - int retval; - int err = 0; - - BUG_ON(!th->t_trans_id); - if (! - (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) - || S_ISLNK(inode->i_mode))) - return 0; - - /* deletion of directory - no need to update timestamps */ - if (S_ISDIR(inode->i_mode)) { - truncate_directory(th, inode); - return 0; - } - - /* Get new file size. */ - new_file_size = inode->i_size; - - /* FIXME: note, that key type is unimportant here */ - make_cpu_key(&s_item_key, inode, max_reiserfs_offset(inode), - TYPE_DIRECT, 3); - - retval = - search_for_position_by_key(inode->i_sb, &s_item_key, - &s_search_path); - if (retval == IO_ERROR) { - reiserfs_error(inode->i_sb, "vs-5657", - "i/o failure occurred trying to truncate %K", - &s_item_key); - err = -EIO; - goto out; - } - if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) { - reiserfs_error(inode->i_sb, "PAP-5660", - "wrong result %d of search for %K", retval, - &s_item_key); - - err = -EIO; - goto out; - } - - s_search_path.pos_in_item--; - - /* Get real file size (total length of all file items) */ - p_le_ih = tp_item_head(&s_search_path); - if (is_statdata_le_ih(p_le_ih)) - file_size = 0; - else { - loff_t offset = le_ih_k_offset(p_le_ih); - int bytes = - op_bytes_number(p_le_ih, inode->i_sb->s_blocksize); - - /* - * this may mismatch with real file size: if last direct item - * had no padding zeros and last unformatted node had no free - * space, this file would have this file size - */ - file_size = offset + bytes - 1; - } - /* - * are we doing a full truncate or delete, if so - * kick in the reada code - */ - if (new_file_size == 0) - s_search_path.reada = PATH_READA | PATH_READA_BACK; - - if (file_size == 0 || file_size < new_file_size) { - goto update_and_out; - } - - /* Update key to search for the last file item. */ - set_cpu_key_k_offset(&s_item_key, file_size); - - do { - /* Cut or delete file item. */ - deleted = - reiserfs_cut_from_item(th, &s_search_path, &s_item_key, - inode, page, new_file_size); - if (deleted < 0) { - reiserfs_warning(inode->i_sb, "vs-5665", - "reiserfs_cut_from_item failed"); - reiserfs_check_path(&s_search_path); - return 0; - } - - RFALSE(deleted > file_size, - "PAP-5670: reiserfs_cut_from_item: too many bytes deleted: deleted %d, file_size %lu, item_key %K", - deleted, file_size, &s_item_key); - - /* Change key to search the last file item. */ - file_size -= deleted; - - set_cpu_key_k_offset(&s_item_key, file_size); - - /* - * While there are bytes to truncate and previous - * file item is presented in the tree. - */ - - /* - * This loop could take a really long time, and could log - * many more blocks than a transaction can hold. So, we do - * a polite journal end here, and if the transaction needs - * ending, we make sure the file is consistent before ending - * the current trans and starting a new one - */ - if (journal_transaction_should_end(th, 0) || - reiserfs_transaction_free_space(th) <= JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) { - pathrelse(&s_search_path); - - if (update_timestamps) { - inode_set_mtime_to_ts(inode, - current_time(inode)); - inode_set_ctime_current(inode); - } - reiserfs_update_sd(th, inode); - - err = journal_end(th); - if (err) - goto out; - err = journal_begin(th, inode->i_sb, - JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD + JOURNAL_PER_BALANCE_CNT * 4) ; - if (err) - goto out; - reiserfs_update_inode_transaction(inode); - } - } while (file_size > ROUND_UP(new_file_size) && - search_for_position_by_key(inode->i_sb, &s_item_key, - &s_search_path) == POSITION_FOUND); - - RFALSE(file_size > ROUND_UP(new_file_size), - "PAP-5680: truncate did not finish: new_file_size %lld, current %lld, oid %d", - new_file_size, file_size, s_item_key.on_disk_key.k_objectid); - -update_and_out: - if (update_timestamps) { - /* this is truncate, not file closing */ - inode_set_mtime_to_ts(inode, current_time(inode)); - inode_set_ctime_current(inode); - } - reiserfs_update_sd(th, inode); - -out: - pathrelse(&s_search_path); - return err; -} - -#ifdef CONFIG_REISERFS_CHECK -/* this makes sure, that we __append__, not overwrite or add holes */ -static void check_research_for_paste(struct treepath *path, - const struct cpu_key *key) -{ - struct item_head *found_ih = tp_item_head(path); - - if (is_direct_le_ih(found_ih)) { - if (le_ih_k_offset(found_ih) + - op_bytes_number(found_ih, - get_last_bh(path)->b_size) != - cpu_key_k_offset(key) - || op_bytes_number(found_ih, - get_last_bh(path)->b_size) != - pos_in_item(path)) - reiserfs_panic(NULL, "PAP-5720", "found direct item " - "%h or position (%d) does not match " - "to key %K", found_ih, - pos_in_item(path), key); - } - if (is_indirect_le_ih(found_ih)) { - if (le_ih_k_offset(found_ih) + - op_bytes_number(found_ih, - get_last_bh(path)->b_size) != - cpu_key_k_offset(key) - || I_UNFM_NUM(found_ih) != pos_in_item(path) - || get_ih_free_space(found_ih) != 0) - reiserfs_panic(NULL, "PAP-5730", "found indirect " - "item (%h) or position (%d) does not " - "match to key (%K)", - found_ih, pos_in_item(path), key); - } -} -#endif /* config reiserfs check */ - -/* - * Paste bytes to the existing item. - * Returns bytes number pasted into the item. - */ -int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, - /* Path to the pasted item. */ - struct treepath *search_path, - /* Key to search for the needed item. */ - const struct cpu_key *key, - /* Inode item belongs to */ - struct inode *inode, - /* Pointer to the bytes to paste. */ - const char *body, - /* Size of pasted bytes. */ - int pasted_size) -{ - struct super_block *sb = inode->i_sb; - struct tree_balance s_paste_balance; - int retval; - int fs_gen; - int depth; - - BUG_ON(!th->t_trans_id); - - fs_gen = get_generation(inode->i_sb); - -#ifdef REISERQUOTA_DEBUG - reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE, - "reiserquota paste_into_item(): allocating %u id=%u type=%c", - pasted_size, inode->i_uid, - key2type(&key->on_disk_key)); -#endif - - depth = reiserfs_write_unlock_nested(sb); - retval = dquot_alloc_space_nodirty(inode, pasted_size); - reiserfs_write_lock_nested(sb, depth); - if (retval) { - pathrelse(search_path); - return retval; - } - init_tb_struct(th, &s_paste_balance, th->t_super, search_path, - pasted_size); -#ifdef DISPLACE_NEW_PACKING_LOCALITIES - s_paste_balance.key = key->on_disk_key; -#endif - - /* DQUOT_* can schedule, must check before the fix_nodes */ - if (fs_changed(fs_gen, inode->i_sb)) { - goto search_again; - } - - while ((retval = - fix_nodes(M_PASTE, &s_paste_balance, NULL, - body)) == REPEAT_SEARCH) { -search_again: - /* file system changed while we were in the fix_nodes */ - PROC_INFO_INC(th->t_super, paste_into_item_restarted); - retval = - search_for_position_by_key(th->t_super, key, - search_path); - if (retval == IO_ERROR) { - retval = -EIO; - goto error_out; - } - if (retval == POSITION_FOUND) { - reiserfs_warning(inode->i_sb, "PAP-5710", - "entry or pasted byte (%K) exists", - key); - retval = -EEXIST; - goto error_out; - } -#ifdef CONFIG_REISERFS_CHECK - check_research_for_paste(search_path, key); -#endif - } - - /* - * Perform balancing after all resources are collected by fix_nodes, - * and accessing them will not risk triggering schedule. - */ - if (retval == CARRY_ON) { - do_balance(&s_paste_balance, NULL /*ih */ , body, M_PASTE); - return 0; - } - retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO; -error_out: - /* this also releases the path */ - unfix_nodes(&s_paste_balance); -#ifdef REISERQUOTA_DEBUG - reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE, - "reiserquota paste_into_item(): freeing %u id=%u type=%c", - pasted_size, inode->i_uid, - key2type(&key->on_disk_key)); -#endif - depth = reiserfs_write_unlock_nested(sb); - dquot_free_space_nodirty(inode, pasted_size); - reiserfs_write_lock_nested(sb, depth); - return retval; -} - -/* - * Insert new item into the buffer at the path. - * th - active transaction handle - * path - path to the inserted item - * ih - pointer to the item header to insert - * body - pointer to the bytes to insert - */ -int reiserfs_insert_item(struct reiserfs_transaction_handle *th, - struct treepath *path, const struct cpu_key *key, - struct item_head *ih, struct inode *inode, - const char *body) -{ - struct tree_balance s_ins_balance; - int retval; - int fs_gen = 0; - int quota_bytes = 0; - - BUG_ON(!th->t_trans_id); - - if (inode) { /* Do we count quotas for item? */ - int depth; - fs_gen = get_generation(inode->i_sb); - quota_bytes = ih_item_len(ih); - - /* - * hack so the quota code doesn't have to guess - * if the file has a tail, links are always tails, - * so there's no guessing needed - */ - if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(ih)) - quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE; -#ifdef REISERQUOTA_DEBUG - reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE, - "reiserquota insert_item(): allocating %u id=%u type=%c", - quota_bytes, inode->i_uid, head2type(ih)); -#endif - /* - * We can't dirty inode here. It would be immediately - * written but appropriate stat item isn't inserted yet... - */ - depth = reiserfs_write_unlock_nested(inode->i_sb); - retval = dquot_alloc_space_nodirty(inode, quota_bytes); - reiserfs_write_lock_nested(inode->i_sb, depth); - if (retval) { - pathrelse(path); - return retval; - } - } - init_tb_struct(th, &s_ins_balance, th->t_super, path, - IH_SIZE + ih_item_len(ih)); -#ifdef DISPLACE_NEW_PACKING_LOCALITIES - s_ins_balance.key = key->on_disk_key; -#endif - /* - * DQUOT_* can schedule, must check to be sure calling - * fix_nodes is safe - */ - if (inode && fs_changed(fs_gen, inode->i_sb)) { - goto search_again; - } - - while ((retval = - fix_nodes(M_INSERT, &s_ins_balance, ih, - body)) == REPEAT_SEARCH) { -search_again: - /* file system changed while we were in the fix_nodes */ - PROC_INFO_INC(th->t_super, insert_item_restarted); - retval = search_item(th->t_super, key, path); - if (retval == IO_ERROR) { - retval = -EIO; - goto error_out; - } - if (retval == ITEM_FOUND) { - reiserfs_warning(th->t_super, "PAP-5760", - "key %K already exists in the tree", - key); - retval = -EEXIST; - goto error_out; - } - } - - /* make balancing after all resources will be collected at a time */ - if (retval == CARRY_ON) { - do_balance(&s_ins_balance, ih, body, M_INSERT); - return 0; - } - - retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO; -error_out: - /* also releases the path */ - unfix_nodes(&s_ins_balance); -#ifdef REISERQUOTA_DEBUG - if (inode) - reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE, - "reiserquota insert_item(): freeing %u id=%u type=%c", - quota_bytes, inode->i_uid, head2type(ih)); -#endif - if (inode) { - int depth = reiserfs_write_unlock_nested(inode->i_sb); - dquot_free_space_nodirty(inode, quota_bytes); - reiserfs_write_lock_nested(inode->i_sb, depth); - } - return retval; -} diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c deleted file mode 100644 index ab76468da02d..000000000000 --- a/fs/reiserfs/super.c +++ /dev/null @@ -1,2646 +0,0 @@ -/* - * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README - * - * Trivial changes by Alan Cox to add the LFS fixes - * - * Trivial Changes: - * Rights granted to Hans Reiser to redistribute under other terms providing - * he accepts all liability including but not limited to patent, fitness - * for purpose, and direct or indirect claims arising from failure to perform. - * - * NO WARRANTY - */ - -#include -#include -#include -#include -#include -#include "reiserfs.h" -#include "acl.h" -#include "xattr.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct file_system_type reiserfs_fs_type; - -static const char reiserfs_3_5_magic_string[] = REISERFS_SUPER_MAGIC_STRING; -static const char reiserfs_3_6_magic_string[] = REISER2FS_SUPER_MAGIC_STRING; -static const char reiserfs_jr_magic_string[] = REISER2FS_JR_SUPER_MAGIC_STRING; - -int is_reiserfs_3_5(struct reiserfs_super_block *rs) -{ - return !strncmp(rs->s_v1.s_magic, reiserfs_3_5_magic_string, - strlen(reiserfs_3_5_magic_string)); -} - -int is_reiserfs_3_6(struct reiserfs_super_block *rs) -{ - return !strncmp(rs->s_v1.s_magic, reiserfs_3_6_magic_string, - strlen(reiserfs_3_6_magic_string)); -} - -int is_reiserfs_jr(struct reiserfs_super_block *rs) -{ - return !strncmp(rs->s_v1.s_magic, reiserfs_jr_magic_string, - strlen(reiserfs_jr_magic_string)); -} - -static int is_any_reiserfs_magic_string(struct reiserfs_super_block *rs) -{ - return (is_reiserfs_3_5(rs) || is_reiserfs_3_6(rs) || - is_reiserfs_jr(rs)); -} - -static int reiserfs_remount(struct super_block *s, int *flags, char *data); -static int reiserfs_statfs(struct dentry *dentry, struct kstatfs *buf); - -static int reiserfs_sync_fs(struct super_block *s, int wait) -{ - struct reiserfs_transaction_handle th; - - /* - * Writeback quota in non-journalled quota case - journalled quota has - * no dirty dquots - */ - dquot_writeback_dquots(s, -1); - reiserfs_write_lock(s); - if (!journal_begin(&th, s, 1)) - if (!journal_end_sync(&th)) - reiserfs_flush_old_commits(s); - reiserfs_write_unlock(s); - return 0; -} - -static void flush_old_commits(struct work_struct *work) -{ - struct reiserfs_sb_info *sbi; - struct super_block *s; - - sbi = container_of(work, struct reiserfs_sb_info, old_work.work); - s = sbi->s_journal->j_work_sb; - - /* - * We need s_umount for protecting quota writeback. We have to use - * trylock as reiserfs_cancel_old_flush() may be waiting for this work - * to complete with s_umount held. - */ - if (!down_read_trylock(&s->s_umount)) { - /* Requeue work if we are not cancelling it */ - spin_lock(&sbi->old_work_lock); - if (sbi->work_queued == 1) - queue_delayed_work(system_long_wq, &sbi->old_work, HZ); - spin_unlock(&sbi->old_work_lock); - return; - } - spin_lock(&sbi->old_work_lock); - /* Avoid clobbering the cancel state... */ - if (sbi->work_queued == 1) - sbi->work_queued = 0; - spin_unlock(&sbi->old_work_lock); - - reiserfs_sync_fs(s, 1); - up_read(&s->s_umount); -} - -void reiserfs_schedule_old_flush(struct super_block *s) -{ - struct reiserfs_sb_info *sbi = REISERFS_SB(s); - unsigned long delay; - - /* - * Avoid scheduling flush when sb is being shut down. It can race - * with journal shutdown and free still queued delayed work. - */ - if (sb_rdonly(s) || !(s->s_flags & SB_ACTIVE)) - return; - - spin_lock(&sbi->old_work_lock); - if (!sbi->work_queued) { - delay = msecs_to_jiffies(dirty_writeback_interval * 10); - queue_delayed_work(system_long_wq, &sbi->old_work, delay); - sbi->work_queued = 1; - } - spin_unlock(&sbi->old_work_lock); -} - -void reiserfs_cancel_old_flush(struct super_block *s) -{ - struct reiserfs_sb_info *sbi = REISERFS_SB(s); - - spin_lock(&sbi->old_work_lock); - /* Make sure no new flushes will be queued */ - sbi->work_queued = 2; - spin_unlock(&sbi->old_work_lock); - cancel_delayed_work_sync(&REISERFS_SB(s)->old_work); -} - -static int reiserfs_freeze(struct super_block *s) -{ - struct reiserfs_transaction_handle th; - - reiserfs_cancel_old_flush(s); - - reiserfs_write_lock(s); - if (!sb_rdonly(s)) { - int err = journal_begin(&th, s, 1); - if (err) { - reiserfs_block_writes(&th); - } else { - reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), - 1); - journal_mark_dirty(&th, SB_BUFFER_WITH_SB(s)); - reiserfs_block_writes(&th); - journal_end_sync(&th); - } - } - reiserfs_write_unlock(s); - return 0; -} - -static int reiserfs_unfreeze(struct super_block *s) -{ - struct reiserfs_sb_info *sbi = REISERFS_SB(s); - - reiserfs_allow_writes(s); - spin_lock(&sbi->old_work_lock); - /* Allow old_work to run again */ - sbi->work_queued = 0; - spin_unlock(&sbi->old_work_lock); - return 0; -} - -extern const struct in_core_key MAX_IN_CORE_KEY; - -/* - * this is used to delete "save link" when there are no items of a - * file it points to. It can either happen if unlink is completed but - * "save unlink" removal, or if file has both unlink and truncate - * pending and as unlink completes first (because key of "save link" - * protecting unlink is bigger that a key lf "save link" which - * protects truncate), so there left no items to make truncate - * completion on - */ -static int remove_save_link_only(struct super_block *s, - struct reiserfs_key *key, int oid_free) -{ - struct reiserfs_transaction_handle th; - int err; - - /* we are going to do one balancing */ - err = journal_begin(&th, s, JOURNAL_PER_BALANCE_CNT); - if (err) - return err; - - reiserfs_delete_solid_item(&th, NULL, key); - if (oid_free) - /* removals are protected by direct items */ - reiserfs_release_objectid(&th, le32_to_cpu(key->k_objectid)); - - return journal_end(&th); -} - -#ifdef CONFIG_QUOTA -static int reiserfs_quota_on_mount(struct super_block *, int); -#endif - -/* - * Look for uncompleted unlinks and truncates and complete them - * - * Called with superblock write locked. If quotas are enabled, we have to - * release/retake lest we call dquot_quota_on_mount(), proceed to - * schedule_on_each_cpu() in invalidate_bdev() and deadlock waiting for the per - * cpu worklets to complete flush_async_commits() that in turn wait for the - * superblock write lock. - */ -static int finish_unfinished(struct super_block *s) -{ - INITIALIZE_PATH(path); - struct cpu_key max_cpu_key, obj_key; - struct reiserfs_key save_link_key, last_inode_key; - int retval = 0; - struct item_head *ih; - struct buffer_head *bh; - int item_pos; - char *item; - int done; - struct inode *inode; - int truncate; -#ifdef CONFIG_QUOTA - int i; - int ms_active_set; - int quota_enabled[REISERFS_MAXQUOTAS]; -#endif - - /* compose key to look for "save" links */ - max_cpu_key.version = KEY_FORMAT_3_5; - max_cpu_key.on_disk_key.k_dir_id = ~0U; - max_cpu_key.on_disk_key.k_objectid = ~0U; - set_cpu_key_k_offset(&max_cpu_key, ~0U); - max_cpu_key.key_length = 3; - - memset(&last_inode_key, 0, sizeof(last_inode_key)); - -#ifdef CONFIG_QUOTA - /* Needed for iput() to work correctly and not trash data */ - if (s->s_flags & SB_ACTIVE) { - ms_active_set = 0; - } else { - ms_active_set = 1; - s->s_flags |= SB_ACTIVE; - } - /* Turn on quotas so that they are updated correctly */ - for (i = 0; i < REISERFS_MAXQUOTAS; i++) { - quota_enabled[i] = 1; - if (REISERFS_SB(s)->s_qf_names[i]) { - int ret; - - if (sb_has_quota_active(s, i)) { - quota_enabled[i] = 0; - continue; - } - reiserfs_write_unlock(s); - ret = reiserfs_quota_on_mount(s, i); - reiserfs_write_lock(s); - if (ret < 0) - reiserfs_warning(s, "reiserfs-2500", - "cannot turn on journaled " - "quota: error %d", ret); - } - } -#endif - - done = 0; - REISERFS_SB(s)->s_is_unlinked_ok = 1; - while (!retval) { - int depth; - retval = search_item(s, &max_cpu_key, &path); - if (retval != ITEM_NOT_FOUND) { - reiserfs_error(s, "vs-2140", - "search_by_key returned %d", retval); - break; - } - - bh = get_last_bh(&path); - item_pos = get_item_pos(&path); - if (item_pos != B_NR_ITEMS(bh)) { - reiserfs_warning(s, "vs-2060", - "wrong position found"); - break; - } - item_pos--; - ih = item_head(bh, item_pos); - - if (le32_to_cpu(ih->ih_key.k_dir_id) != MAX_KEY_OBJECTID) - /* there are no "save" links anymore */ - break; - - save_link_key = ih->ih_key; - if (is_indirect_le_ih(ih)) - truncate = 1; - else - truncate = 0; - - /* reiserfs_iget needs k_dirid and k_objectid only */ - item = ih_item_body(bh, ih); - obj_key.on_disk_key.k_dir_id = le32_to_cpu(*(__le32 *) item); - obj_key.on_disk_key.k_objectid = - le32_to_cpu(ih->ih_key.k_objectid); - obj_key.on_disk_key.k_offset = 0; - obj_key.on_disk_key.k_type = 0; - - pathrelse(&path); - - inode = reiserfs_iget(s, &obj_key); - if (IS_ERR_OR_NULL(inode)) { - /* - * the unlink almost completed, it just did not - * manage to remove "save" link and release objectid - */ - reiserfs_warning(s, "vs-2180", "iget failed for %K", - &obj_key); - retval = remove_save_link_only(s, &save_link_key, 1); - continue; - } - - if (!truncate && inode->i_nlink) { - /* file is not unlinked */ - reiserfs_warning(s, "vs-2185", - "file %K is not unlinked", - &obj_key); - retval = remove_save_link_only(s, &save_link_key, 0); - continue; - } - depth = reiserfs_write_unlock_nested(inode->i_sb); - dquot_initialize(inode); - reiserfs_write_lock_nested(inode->i_sb, depth); - - if (truncate && S_ISDIR(inode->i_mode)) { - /* - * We got a truncate request for a dir which - * is impossible. The only imaginable way is to - * execute unfinished truncate request then boot - * into old kernel, remove the file and create dir - * with the same key. - */ - reiserfs_warning(s, "green-2101", - "impossible truncate on a " - "directory %k. Please report", - INODE_PKEY(inode)); - retval = remove_save_link_only(s, &save_link_key, 0); - truncate = 0; - iput(inode); - continue; - } - - if (truncate) { - REISERFS_I(inode)->i_flags |= - i_link_saved_truncate_mask; - /* - * not completed truncate found. New size was - * committed together with "save" link - */ - reiserfs_info(s, "Truncating %k to %lld ..", - INODE_PKEY(inode), inode->i_size); - - /* don't update modification time */ - reiserfs_truncate_file(inode, 0); - - retval = remove_save_link(inode, truncate); - } else { - REISERFS_I(inode)->i_flags |= i_link_saved_unlink_mask; - /* not completed unlink (rmdir) found */ - reiserfs_info(s, "Removing %k..", INODE_PKEY(inode)); - if (memcmp(&last_inode_key, INODE_PKEY(inode), - sizeof(last_inode_key))){ - last_inode_key = *INODE_PKEY(inode); - /* removal gets completed in iput */ - retval = 0; - } else { - reiserfs_warning(s, "super-2189", "Dead loop " - "in finish_unfinished " - "detected, just remove " - "save link\n"); - retval = remove_save_link_only(s, - &save_link_key, 0); - } - } - - iput(inode); - printk("done\n"); - done++; - } - REISERFS_SB(s)->s_is_unlinked_ok = 0; - -#ifdef CONFIG_QUOTA - /* Turn quotas off */ - reiserfs_write_unlock(s); - for (i = 0; i < REISERFS_MAXQUOTAS; i++) { - if (sb_dqopt(s)->files[i] && quota_enabled[i]) - dquot_quota_off(s, i); - } - reiserfs_write_lock(s); - if (ms_active_set) - /* Restore the flag back */ - s->s_flags &= ~SB_ACTIVE; -#endif - pathrelse(&path); - if (done) - reiserfs_info(s, "There were %d uncompleted unlinks/truncates. " - "Completed\n", done); - return retval; -} - -/* - * to protect file being unlinked from getting lost we "safe" link files - * being unlinked. This link will be deleted in the same transaction with last - * item of file. mounting the filesystem we scan all these links and remove - * files which almost got lost - */ -void add_save_link(struct reiserfs_transaction_handle *th, - struct inode *inode, int truncate) -{ - INITIALIZE_PATH(path); - int retval; - struct cpu_key key; - struct item_head ih; - __le32 link; - - BUG_ON(!th->t_trans_id); - - /* file can only get one "save link" of each kind */ - RFALSE(truncate && - (REISERFS_I(inode)->i_flags & i_link_saved_truncate_mask), - "saved link already exists for truncated inode %lx", - (long)inode->i_ino); - RFALSE(!truncate && - (REISERFS_I(inode)->i_flags & i_link_saved_unlink_mask), - "saved link already exists for unlinked inode %lx", - (long)inode->i_ino); - - /* setup key of "save" link */ - key.version = KEY_FORMAT_3_5; - key.on_disk_key.k_dir_id = MAX_KEY_OBJECTID; - key.on_disk_key.k_objectid = inode->i_ino; - if (!truncate) { - /* unlink, rmdir, rename */ - set_cpu_key_k_offset(&key, 1 + inode->i_sb->s_blocksize); - set_cpu_key_k_type(&key, TYPE_DIRECT); - - /* item head of "safe" link */ - make_le_item_head(&ih, &key, key.version, - 1 + inode->i_sb->s_blocksize, TYPE_DIRECT, - 4 /*length */ , 0xffff /*free space */ ); - } else { - /* truncate */ - if (S_ISDIR(inode->i_mode)) - reiserfs_warning(inode->i_sb, "green-2102", - "Adding a truncate savelink for " - "a directory %k! Please report", - INODE_PKEY(inode)); - set_cpu_key_k_offset(&key, 1); - set_cpu_key_k_type(&key, TYPE_INDIRECT); - - /* item head of "safe" link */ - make_le_item_head(&ih, &key, key.version, 1, TYPE_INDIRECT, - 4 /*length */ , 0 /*free space */ ); - } - key.key_length = 3; - - /* look for its place in the tree */ - retval = search_item(inode->i_sb, &key, &path); - if (retval != ITEM_NOT_FOUND) { - if (retval != -ENOSPC) - reiserfs_error(inode->i_sb, "vs-2100", - "search_by_key (%K) returned %d", &key, - retval); - pathrelse(&path); - return; - } - - /* body of "save" link */ - link = INODE_PKEY(inode)->k_dir_id; - - /* put "save" link into tree, don't charge quota to anyone */ - retval = - reiserfs_insert_item(th, &path, &key, &ih, NULL, (char *)&link); - if (retval) { - if (retval != -ENOSPC) - reiserfs_error(inode->i_sb, "vs-2120", - "insert_item returned %d", retval); - } else { - if (truncate) - REISERFS_I(inode)->i_flags |= - i_link_saved_truncate_mask; - else - REISERFS_I(inode)->i_flags |= i_link_saved_unlink_mask; - } -} - -/* this opens transaction unlike add_save_link */ -int remove_save_link(struct inode *inode, int truncate) -{ - struct reiserfs_transaction_handle th; - struct reiserfs_key key; - int err; - - /* we are going to do one balancing only */ - err = journal_begin(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT); - if (err) - return err; - - /* setup key of "save" link */ - key.k_dir_id = cpu_to_le32(MAX_KEY_OBJECTID); - key.k_objectid = INODE_PKEY(inode)->k_objectid; - if (!truncate) { - /* unlink, rmdir, rename */ - set_le_key_k_offset(KEY_FORMAT_3_5, &key, - 1 + inode->i_sb->s_blocksize); - set_le_key_k_type(KEY_FORMAT_3_5, &key, TYPE_DIRECT); - } else { - /* truncate */ - set_le_key_k_offset(KEY_FORMAT_3_5, &key, 1); - set_le_key_k_type(KEY_FORMAT_3_5, &key, TYPE_INDIRECT); - } - - if ((truncate && - (REISERFS_I(inode)->i_flags & i_link_saved_truncate_mask)) || - (!truncate && - (REISERFS_I(inode)->i_flags & i_link_saved_unlink_mask))) - /* don't take quota bytes from anywhere */ - reiserfs_delete_solid_item(&th, NULL, &key); - if (!truncate) { - reiserfs_release_objectid(&th, inode->i_ino); - REISERFS_I(inode)->i_flags &= ~i_link_saved_unlink_mask; - } else - REISERFS_I(inode)->i_flags &= ~i_link_saved_truncate_mask; - - return journal_end(&th); -} - -static void reiserfs_kill_sb(struct super_block *s) -{ - if (REISERFS_SB(s)) { - reiserfs_proc_info_done(s); - /* - * Force any pending inode evictions to occur now. Any - * inodes to be removed that have extended attributes - * associated with them need to clean them up before - * we can release the extended attribute root dentries. - * shrink_dcache_for_umount will BUG if we don't release - * those before it's called so ->put_super is too late. - */ - shrink_dcache_sb(s); - - dput(REISERFS_SB(s)->xattr_root); - REISERFS_SB(s)->xattr_root = NULL; - dput(REISERFS_SB(s)->priv_root); - REISERFS_SB(s)->priv_root = NULL; - } - - kill_block_super(s); -} - -#ifdef CONFIG_QUOTA -static int reiserfs_quota_off(struct super_block *sb, int type); - -static void reiserfs_quota_off_umount(struct super_block *s) -{ - int type; - - for (type = 0; type < REISERFS_MAXQUOTAS; type++) - reiserfs_quota_off(s, type); -} -#else -static inline void reiserfs_quota_off_umount(struct super_block *s) -{ -} -#endif - -static void reiserfs_put_super(struct super_block *s) -{ - struct reiserfs_transaction_handle th; - th.t_trans_id = 0; - - reiserfs_quota_off_umount(s); - - reiserfs_write_lock(s); - - /* - * change file system state to current state if it was mounted - * with read-write permissions - */ - if (!sb_rdonly(s)) { - if (!journal_begin(&th, s, 10)) { - reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), - 1); - set_sb_umount_state(SB_DISK_SUPER_BLOCK(s), - REISERFS_SB(s)->s_mount_state); - journal_mark_dirty(&th, SB_BUFFER_WITH_SB(s)); - } - } - - /* - * note, journal_release checks for readonly mount, and can - * decide not to do a journal_end - */ - journal_release(&th, s); - - reiserfs_free_bitmap_cache(s); - - brelse(SB_BUFFER_WITH_SB(s)); - - print_statistics(s); - - if (REISERFS_SB(s)->reserved_blocks != 0) { - reiserfs_warning(s, "green-2005", "reserved blocks left %d", - REISERFS_SB(s)->reserved_blocks); - } - - reiserfs_write_unlock(s); - mutex_destroy(&REISERFS_SB(s)->lock); - destroy_workqueue(REISERFS_SB(s)->commit_wq); - kfree(REISERFS_SB(s)->s_jdev); - kfree(s->s_fs_info); - s->s_fs_info = NULL; -} - -static struct kmem_cache *reiserfs_inode_cachep; - -static struct inode *reiserfs_alloc_inode(struct super_block *sb) -{ - struct reiserfs_inode_info *ei; - ei = alloc_inode_sb(sb, reiserfs_inode_cachep, GFP_KERNEL); - if (!ei) - return NULL; - atomic_set(&ei->openers, 0); - mutex_init(&ei->tailpack); -#ifdef CONFIG_QUOTA - memset(&ei->i_dquot, 0, sizeof(ei->i_dquot)); -#endif - - return &ei->vfs_inode; -} - -static void reiserfs_free_inode(struct inode *inode) -{ - kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode)); -} - -static void init_once(void *foo) -{ - struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo; - - INIT_LIST_HEAD(&ei->i_prealloc_list); - inode_init_once(&ei->vfs_inode); -} - -static int __init init_inodecache(void) -{ - reiserfs_inode_cachep = kmem_cache_create("reiser_inode_cache", - sizeof(struct - reiserfs_inode_info), - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_ACCOUNT), - init_once); - if (reiserfs_inode_cachep == NULL) - return -ENOMEM; - return 0; -} - -static void destroy_inodecache(void) -{ - /* - * Make sure all delayed rcu free inodes are flushed before we - * destroy cache. - */ - rcu_barrier(); - kmem_cache_destroy(reiserfs_inode_cachep); -} - -/* we don't mark inodes dirty, we just log them */ -static void reiserfs_dirty_inode(struct inode *inode, int flags) -{ - struct reiserfs_transaction_handle th; - - int err = 0; - - if (sb_rdonly(inode->i_sb)) { - reiserfs_warning(inode->i_sb, "clm-6006", - "writing inode %lu on readonly FS", - inode->i_ino); - return; - } - reiserfs_write_lock(inode->i_sb); - - /* - * this is really only used for atime updates, so they don't have - * to be included in O_SYNC or fsync - */ - err = journal_begin(&th, inode->i_sb, 1); - if (err) - goto out; - - reiserfs_update_sd(&th, inode); - journal_end(&th); - -out: - reiserfs_write_unlock(inode->i_sb); -} - -static int reiserfs_show_options(struct seq_file *seq, struct dentry *root) -{ - struct super_block *s = root->d_sb; - struct reiserfs_journal *journal = SB_JOURNAL(s); - long opts = REISERFS_SB(s)->s_mount_opt; - - if (opts & (1 << REISERFS_LARGETAIL)) - seq_puts(seq, ",tails=on"); - else if (!(opts & (1 << REISERFS_SMALLTAIL))) - seq_puts(seq, ",notail"); - /* tails=small is default so we don't show it */ - - if (!(opts & (1 << REISERFS_BARRIER_FLUSH))) - seq_puts(seq, ",barrier=none"); - /* barrier=flush is default so we don't show it */ - - if (opts & (1 << REISERFS_ERROR_CONTINUE)) - seq_puts(seq, ",errors=continue"); - else if (opts & (1 << REISERFS_ERROR_PANIC)) - seq_puts(seq, ",errors=panic"); - /* errors=ro is default so we don't show it */ - - if (opts & (1 << REISERFS_DATA_LOG)) - seq_puts(seq, ",data=journal"); - else if (opts & (1 << REISERFS_DATA_WRITEBACK)) - seq_puts(seq, ",data=writeback"); - /* data=ordered is default so we don't show it */ - - if (opts & (1 << REISERFS_ATTRS)) - seq_puts(seq, ",attrs"); - - if (opts & (1 << REISERFS_XATTRS_USER)) - seq_puts(seq, ",user_xattr"); - - if (opts & (1 << REISERFS_EXPOSE_PRIVROOT)) - seq_puts(seq, ",expose_privroot"); - - if (opts & (1 << REISERFS_POSIXACL)) - seq_puts(seq, ",acl"); - - if (REISERFS_SB(s)->s_jdev) - seq_show_option(seq, "jdev", REISERFS_SB(s)->s_jdev); - - if (journal->j_max_commit_age != journal->j_default_max_commit_age) - seq_printf(seq, ",commit=%d", journal->j_max_commit_age); - -#ifdef CONFIG_QUOTA - if (REISERFS_SB(s)->s_qf_names[USRQUOTA]) - seq_show_option(seq, "usrjquota", - REISERFS_SB(s)->s_qf_names[USRQUOTA]); - else if (opts & (1 << REISERFS_USRQUOTA)) - seq_puts(seq, ",usrquota"); - if (REISERFS_SB(s)->s_qf_names[GRPQUOTA]) - seq_show_option(seq, "grpjquota", - REISERFS_SB(s)->s_qf_names[GRPQUOTA]); - else if (opts & (1 << REISERFS_GRPQUOTA)) - seq_puts(seq, ",grpquota"); - if (REISERFS_SB(s)->s_jquota_fmt) { - if (REISERFS_SB(s)->s_jquota_fmt == QFMT_VFS_OLD) - seq_puts(seq, ",jqfmt=vfsold"); - else if (REISERFS_SB(s)->s_jquota_fmt == QFMT_VFS_V0) - seq_puts(seq, ",jqfmt=vfsv0"); - } -#endif - - /* Block allocator options */ - if (opts & (1 << REISERFS_NO_BORDER)) - seq_puts(seq, ",block-allocator=noborder"); - if (opts & (1 << REISERFS_NO_UNHASHED_RELOCATION)) - seq_puts(seq, ",block-allocator=no_unhashed_relocation"); - if (opts & (1 << REISERFS_HASHED_RELOCATION)) - seq_puts(seq, ",block-allocator=hashed_relocation"); - if (opts & (1 << REISERFS_TEST4)) - seq_puts(seq, ",block-allocator=test4"); - show_alloc_options(seq, s); - return 0; -} - -#ifdef CONFIG_QUOTA -static ssize_t reiserfs_quota_write(struct super_block *, int, const char *, - size_t, loff_t); -static ssize_t reiserfs_quota_read(struct super_block *, int, char *, size_t, - loff_t); - -static struct dquot __rcu **reiserfs_get_dquots(struct inode *inode) -{ - return REISERFS_I(inode)->i_dquot; -} -#endif - -static const struct super_operations reiserfs_sops = { - .alloc_inode = reiserfs_alloc_inode, - .free_inode = reiserfs_free_inode, - .write_inode = reiserfs_write_inode, - .dirty_inode = reiserfs_dirty_inode, - .evict_inode = reiserfs_evict_inode, - .put_super = reiserfs_put_super, - .sync_fs = reiserfs_sync_fs, - .freeze_fs = reiserfs_freeze, - .unfreeze_fs = reiserfs_unfreeze, - .statfs = reiserfs_statfs, - .remount_fs = reiserfs_remount, - .show_options = reiserfs_show_options, -#ifdef CONFIG_QUOTA - .quota_read = reiserfs_quota_read, - .quota_write = reiserfs_quota_write, - .get_dquots = reiserfs_get_dquots, -#endif -}; - -#ifdef CONFIG_QUOTA -#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group") - -static int reiserfs_write_dquot(struct dquot *); -static int reiserfs_acquire_dquot(struct dquot *); -static int reiserfs_release_dquot(struct dquot *); -static int reiserfs_mark_dquot_dirty(struct dquot *); -static int reiserfs_write_info(struct super_block *, int); -static int reiserfs_quota_on(struct super_block *, int, int, const struct path *); - -static const struct dquot_operations reiserfs_quota_operations = { - .write_dquot = reiserfs_write_dquot, - .acquire_dquot = reiserfs_acquire_dquot, - .release_dquot = reiserfs_release_dquot, - .mark_dirty = reiserfs_mark_dquot_dirty, - .write_info = reiserfs_write_info, - .alloc_dquot = dquot_alloc, - .destroy_dquot = dquot_destroy, - .get_next_id = dquot_get_next_id, -}; - -static const struct quotactl_ops reiserfs_qctl_operations = { - .quota_on = reiserfs_quota_on, - .quota_off = reiserfs_quota_off, - .quota_sync = dquot_quota_sync, - .get_state = dquot_get_state, - .set_info = dquot_set_dqinfo, - .get_dqblk = dquot_get_dqblk, - .set_dqblk = dquot_set_dqblk, -}; -#endif - -static const struct export_operations reiserfs_export_ops = { - .encode_fh = reiserfs_encode_fh, - .fh_to_dentry = reiserfs_fh_to_dentry, - .fh_to_parent = reiserfs_fh_to_parent, - .get_parent = reiserfs_get_parent, -}; - -/* - * this struct is used in reiserfs_getopt () for containing the value for - * those mount options that have values rather than being toggles. - */ -typedef struct { - char *value; - /* - * bitmask which is to set on mount_options bitmask - * when this value is found, 0 is no bits are to be changed. - */ - int setmask; - /* - * bitmask which is to clear on mount_options bitmask - * when this value is found, 0 is no bits are to be changed. - * This is applied BEFORE setmask - */ - int clrmask; -} arg_desc_t; - -/* Set this bit in arg_required to allow empty arguments */ -#define REISERFS_OPT_ALLOWEMPTY 31 - -/* - * this struct is used in reiserfs_getopt() for describing the - * set of reiserfs mount options - */ -typedef struct { - char *option_name; - - /* 0 if argument is not required, not 0 otherwise */ - int arg_required; - - /* list of values accepted by an option */ - const arg_desc_t *values; - - /* - * bitmask which is to set on mount_options bitmask - * when this value is found, 0 is no bits are to be changed. - */ - int setmask; - - /* - * bitmask which is to clear on mount_options bitmask - * when this value is found, 0 is no bits are to be changed. - * This is applied BEFORE setmask - */ - int clrmask; -} opt_desc_t; - -/* possible values for -o data= */ -static const arg_desc_t logging_mode[] = { - {"ordered", 1 << REISERFS_DATA_ORDERED, - (1 << REISERFS_DATA_LOG | 1 << REISERFS_DATA_WRITEBACK)}, - {"journal", 1 << REISERFS_DATA_LOG, - (1 << REISERFS_DATA_ORDERED | 1 << REISERFS_DATA_WRITEBACK)}, - {"writeback", 1 << REISERFS_DATA_WRITEBACK, - (1 << REISERFS_DATA_ORDERED | 1 << REISERFS_DATA_LOG)}, - {.value = NULL} -}; - -/* possible values for -o barrier= */ -static const arg_desc_t barrier_mode[] = { - {"none", 1 << REISERFS_BARRIER_NONE, 1 << REISERFS_BARRIER_FLUSH}, - {"flush", 1 << REISERFS_BARRIER_FLUSH, 1 << REISERFS_BARRIER_NONE}, - {.value = NULL} -}; - -/* - * possible values for "-o block-allocator=" and bits which are to be set in - * s_mount_opt of reiserfs specific part of in-core super block - */ -static const arg_desc_t balloc[] = { - {"noborder", 1 << REISERFS_NO_BORDER, 0}, - {"border", 0, 1 << REISERFS_NO_BORDER}, - {"no_unhashed_relocation", 1 << REISERFS_NO_UNHASHED_RELOCATION, 0}, - {"hashed_relocation", 1 << REISERFS_HASHED_RELOCATION, 0}, - {"test4", 1 << REISERFS_TEST4, 0}, - {"notest4", 0, 1 << REISERFS_TEST4}, - {NULL, 0, 0} -}; - -static const arg_desc_t tails[] = { - {"on", 1 << REISERFS_LARGETAIL, 1 << REISERFS_SMALLTAIL}, - {"off", 0, (1 << REISERFS_LARGETAIL) | (1 << REISERFS_SMALLTAIL)}, - {"small", 1 << REISERFS_SMALLTAIL, 1 << REISERFS_LARGETAIL}, - {NULL, 0, 0} -}; - -static const arg_desc_t error_actions[] = { - {"panic", 1 << REISERFS_ERROR_PANIC, - (1 << REISERFS_ERROR_RO | 1 << REISERFS_ERROR_CONTINUE)}, - {"ro-remount", 1 << REISERFS_ERROR_RO, - (1 << REISERFS_ERROR_PANIC | 1 << REISERFS_ERROR_CONTINUE)}, -#ifdef REISERFS_JOURNAL_ERROR_ALLOWS_NO_LOG - {"continue", 1 << REISERFS_ERROR_CONTINUE, - (1 << REISERFS_ERROR_PANIC | 1 << REISERFS_ERROR_RO)}, -#endif - {NULL, 0, 0}, -}; - -/* - * proceed only one option from a list *cur - string containing of mount - * options - * opts - array of options which are accepted - * opt_arg - if option is found and requires an argument and if it is specifed - * in the input - pointer to the argument is stored here - * bit_flags - if option requires to set a certain bit - it is set here - * return -1 if unknown option is found, opt->arg_required otherwise - */ -static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts, - char **opt_arg, unsigned long *bit_flags) -{ - char *p; - /* - * foo=bar, - * ^ ^ ^ - * | | +-- option_end - * | +-- arg_start - * +-- option_start - */ - const opt_desc_t *opt; - const arg_desc_t *arg; - - p = *cur; - - /* assume argument cannot contain commas */ - *cur = strchr(p, ','); - if (*cur) { - *(*cur) = '\0'; - (*cur)++; - } - - if (!strncmp(p, "alloc=", 6)) { - /* - * Ugly special case, probably we should redo options - * parser so that it can understand several arguments for - * some options, also so that it can fill several bitfields - * with option values. - */ - if (reiserfs_parse_alloc_options(s, p + 6)) { - return -1; - } else { - return 0; - } - } - - /* for every option in the list */ - for (opt = opts; opt->option_name; opt++) { - if (!strncmp(p, opt->option_name, strlen(opt->option_name))) { - if (bit_flags) { - if (opt->clrmask == - (1 << REISERFS_UNSUPPORTED_OPT)) - reiserfs_warning(s, "super-6500", - "%s not supported.\n", - p); - else - *bit_flags &= ~opt->clrmask; - if (opt->setmask == - (1 << REISERFS_UNSUPPORTED_OPT)) - reiserfs_warning(s, "super-6501", - "%s not supported.\n", - p); - else - *bit_flags |= opt->setmask; - } - break; - } - } - if (!opt->option_name) { - reiserfs_warning(s, "super-6502", - "unknown mount option \"%s\"", p); - return -1; - } - - p += strlen(opt->option_name); - switch (*p) { - case '=': - if (!opt->arg_required) { - reiserfs_warning(s, "super-6503", - "the option \"%s\" does not " - "require an argument\n", - opt->option_name); - return -1; - } - break; - - case 0: - if (opt->arg_required) { - reiserfs_warning(s, "super-6504", - "the option \"%s\" requires an " - "argument\n", opt->option_name); - return -1; - } - break; - default: - reiserfs_warning(s, "super-6505", - "head of option \"%s\" is only correct\n", - opt->option_name); - return -1; - } - - /* - * move to the argument, or to next option if argument is not - * required - */ - p++; - - if (opt->arg_required - && !(opt->arg_required & (1 << REISERFS_OPT_ALLOWEMPTY)) - && !strlen(p)) { - /* this catches "option=," if not allowed */ - reiserfs_warning(s, "super-6506", - "empty argument for \"%s\"\n", - opt->option_name); - return -1; - } - - if (!opt->values) { - /* *=NULLopt_arg contains pointer to argument */ - *opt_arg = p; - return opt->arg_required & ~(1 << REISERFS_OPT_ALLOWEMPTY); - } - - /* values possible for this option are listed in opt->values */ - for (arg = opt->values; arg->value; arg++) { - if (!strcmp(p, arg->value)) { - if (bit_flags) { - *bit_flags &= ~arg->clrmask; - *bit_flags |= arg->setmask; - } - return opt->arg_required; - } - } - - reiserfs_warning(s, "super-6506", - "bad value \"%s\" for option \"%s\"\n", p, - opt->option_name); - return -1; -} - -/* returns 0 if something is wrong in option string, 1 - otherwise */ -static int reiserfs_parse_options(struct super_block *s, - - /* string given via mount's -o */ - char *options, - - /* - * after the parsing phase, contains the - * collection of bitflags defining what - * mount options were selected. - */ - unsigned long *mount_options, - - /* strtol-ed from NNN of resize=NNN */ - unsigned long *blocks, - char **jdev_name, - unsigned int *commit_max_age, - char **qf_names, - unsigned int *qfmt) -{ - int c; - char *arg = NULL; - char *pos; - opt_desc_t opts[] = { - /* - * Compatibility stuff, so that -o notail for old - * setups still work - */ - {"tails",.arg_required = 't',.values = tails}, - {"notail",.clrmask = - (1 << REISERFS_LARGETAIL) | (1 << REISERFS_SMALLTAIL)}, - {"conv",.setmask = 1 << REISERFS_CONVERT}, - {"attrs",.setmask = 1 << REISERFS_ATTRS}, - {"noattrs",.clrmask = 1 << REISERFS_ATTRS}, - {"expose_privroot", .setmask = 1 << REISERFS_EXPOSE_PRIVROOT}, -#ifdef CONFIG_REISERFS_FS_XATTR - {"user_xattr",.setmask = 1 << REISERFS_XATTRS_USER}, - {"nouser_xattr",.clrmask = 1 << REISERFS_XATTRS_USER}, -#else - {"user_xattr",.setmask = 1 << REISERFS_UNSUPPORTED_OPT}, - {"nouser_xattr",.clrmask = 1 << REISERFS_UNSUPPORTED_OPT}, -#endif -#ifdef CONFIG_REISERFS_FS_POSIX_ACL - {"acl",.setmask = 1 << REISERFS_POSIXACL}, - {"noacl",.clrmask = 1 << REISERFS_POSIXACL}, -#else - {"acl",.setmask = 1 << REISERFS_UNSUPPORTED_OPT}, - {"noacl",.clrmask = 1 << REISERFS_UNSUPPORTED_OPT}, -#endif - {.option_name = "nolog"}, - {"replayonly",.setmask = 1 << REPLAYONLY}, - {"block-allocator",.arg_required = 'a',.values = balloc}, - {"data",.arg_required = 'd',.values = logging_mode}, - {"barrier",.arg_required = 'b',.values = barrier_mode}, - {"resize",.arg_required = 'r',.values = NULL}, - {"jdev",.arg_required = 'j',.values = NULL}, - {"nolargeio",.arg_required = 'w',.values = NULL}, - {"commit",.arg_required = 'c',.values = NULL}, - {"usrquota",.setmask = 1 << REISERFS_USRQUOTA}, - {"grpquota",.setmask = 1 << REISERFS_GRPQUOTA}, - {"noquota",.clrmask = 1 << REISERFS_USRQUOTA | 1 << REISERFS_GRPQUOTA}, - {"errors",.arg_required = 'e',.values = error_actions}, - {"usrjquota",.arg_required = - 'u' | (1 << REISERFS_OPT_ALLOWEMPTY),.values = NULL}, - {"grpjquota",.arg_required = - 'g' | (1 << REISERFS_OPT_ALLOWEMPTY),.values = NULL}, - {"jqfmt",.arg_required = 'f',.values = NULL}, - {.option_name = NULL} - }; - - *blocks = 0; - if (!options || !*options) - /* - * use default configuration: create tails, journaling on, no - * conversion to newest format - */ - return 1; - - for (pos = options; pos;) { - c = reiserfs_getopt(s, &pos, opts, &arg, mount_options); - if (c == -1) - /* wrong option is given */ - return 0; - - if (c == 'r') { - char *p; - - p = NULL; - /* "resize=NNN" or "resize=auto" */ - - if (!strcmp(arg, "auto")) { - /* From JFS code, to auto-get the size. */ - *blocks = sb_bdev_nr_blocks(s); - } else { - *blocks = simple_strtoul(arg, &p, 0); - if (*p != '\0') { - /* NNN does not look like a number */ - reiserfs_warning(s, "super-6507", - "bad value %s for " - "-oresize\n", arg); - return 0; - } - } - } - - if (c == 'c') { - char *p = NULL; - unsigned long val = simple_strtoul(arg, &p, 0); - /* commit=NNN (time in seconds) */ - if (*p != '\0' || val >= (unsigned int)-1) { - reiserfs_warning(s, "super-6508", - "bad value %s for -ocommit\n", - arg); - return 0; - } - *commit_max_age = (unsigned int)val; - } - - if (c == 'w') { - reiserfs_warning(s, "super-6509", "nolargeio option " - "is no longer supported"); - return 0; - } - - if (c == 'j') { - if (arg && *arg && jdev_name) { - /* Hm, already assigned? */ - if (*jdev_name) { - reiserfs_warning(s, "super-6510", - "journal device was " - "already specified to " - "be %s", *jdev_name); - return 0; - } - *jdev_name = arg; - } - } -#ifdef CONFIG_QUOTA - if (c == 'u' || c == 'g') { - int qtype = c == 'u' ? USRQUOTA : GRPQUOTA; - - if (sb_any_quota_loaded(s) && - (!*arg != !REISERFS_SB(s)->s_qf_names[qtype])) { - reiserfs_warning(s, "super-6511", - "cannot change journaled " - "quota options when quota " - "turned on."); - return 0; - } - if (qf_names[qtype] != - REISERFS_SB(s)->s_qf_names[qtype]) - kfree(qf_names[qtype]); - qf_names[qtype] = NULL; - if (*arg) { /* Some filename specified? */ - if (REISERFS_SB(s)->s_qf_names[qtype] - && strcmp(REISERFS_SB(s)->s_qf_names[qtype], - arg)) { - reiserfs_warning(s, "super-6512", - "%s quota file " - "already specified.", - QTYPE2NAME(qtype)); - return 0; - } - if (strchr(arg, '/')) { - reiserfs_warning(s, "super-6513", - "quotafile must be " - "on filesystem root."); - return 0; - } - qf_names[qtype] = kstrdup(arg, GFP_KERNEL); - if (!qf_names[qtype]) { - reiserfs_warning(s, "reiserfs-2502", - "not enough memory " - "for storing " - "quotafile name."); - return 0; - } - if (qtype == USRQUOTA) - *mount_options |= 1 << REISERFS_USRQUOTA; - else - *mount_options |= 1 << REISERFS_GRPQUOTA; - } else { - if (qtype == USRQUOTA) - *mount_options &= ~(1 << REISERFS_USRQUOTA); - else - *mount_options &= ~(1 << REISERFS_GRPQUOTA); - } - } - if (c == 'f') { - if (!strcmp(arg, "vfsold")) - *qfmt = QFMT_VFS_OLD; - else if (!strcmp(arg, "vfsv0")) - *qfmt = QFMT_VFS_V0; - else { - reiserfs_warning(s, "super-6514", - "unknown quota format " - "specified."); - return 0; - } - if (sb_any_quota_loaded(s) && - *qfmt != REISERFS_SB(s)->s_jquota_fmt) { - reiserfs_warning(s, "super-6515", - "cannot change journaled " - "quota options when quota " - "turned on."); - return 0; - } - } -#else - if (c == 'u' || c == 'g' || c == 'f') { - reiserfs_warning(s, "reiserfs-2503", "journaled " - "quota options not supported."); - return 0; - } -#endif - } - -#ifdef CONFIG_QUOTA - if (!REISERFS_SB(s)->s_jquota_fmt && !*qfmt - && (qf_names[USRQUOTA] || qf_names[GRPQUOTA])) { - reiserfs_warning(s, "super-6515", - "journaled quota format not specified."); - return 0; - } - if ((!(*mount_options & (1 << REISERFS_USRQUOTA)) && - sb_has_quota_loaded(s, USRQUOTA)) || - (!(*mount_options & (1 << REISERFS_GRPQUOTA)) && - sb_has_quota_loaded(s, GRPQUOTA))) { - reiserfs_warning(s, "super-6516", "quota options must " - "be present when quota is turned on."); - return 0; - } -#endif - - return 1; -} - -static void switch_data_mode(struct super_block *s, unsigned long mode) -{ - REISERFS_SB(s)->s_mount_opt &= ~((1 << REISERFS_DATA_LOG) | - (1 << REISERFS_DATA_ORDERED) | - (1 << REISERFS_DATA_WRITEBACK)); - REISERFS_SB(s)->s_mount_opt |= (1 << mode); -} - -static void handle_data_mode(struct super_block *s, unsigned long mount_options) -{ - if (mount_options & (1 << REISERFS_DATA_LOG)) { - if (!reiserfs_data_log(s)) { - switch_data_mode(s, REISERFS_DATA_LOG); - reiserfs_info(s, "switching to journaled data mode\n"); - } - } else if (mount_options & (1 << REISERFS_DATA_ORDERED)) { - if (!reiserfs_data_ordered(s)) { - switch_data_mode(s, REISERFS_DATA_ORDERED); - reiserfs_info(s, "switching to ordered data mode\n"); - } - } else if (mount_options & (1 << REISERFS_DATA_WRITEBACK)) { - if (!reiserfs_data_writeback(s)) { - switch_data_mode(s, REISERFS_DATA_WRITEBACK); - reiserfs_info(s, "switching to writeback data mode\n"); - } - } -} - -static void handle_barrier_mode(struct super_block *s, unsigned long bits) -{ - int flush = (1 << REISERFS_BARRIER_FLUSH); - int none = (1 << REISERFS_BARRIER_NONE); - int all_barrier = flush | none; - - if (bits & all_barrier) { - REISERFS_SB(s)->s_mount_opt &= ~all_barrier; - if (bits & flush) { - REISERFS_SB(s)->s_mount_opt |= flush; - printk("reiserfs: enabling write barrier flush mode\n"); - } else if (bits & none) { - REISERFS_SB(s)->s_mount_opt |= none; - printk("reiserfs: write barriers turned off\n"); - } - } -} - -static void handle_attrs(struct super_block *s) -{ - struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(s); - - if (reiserfs_attrs(s)) { - if (old_format_only(s)) { - reiserfs_warning(s, "super-6517", "cannot support " - "attributes on 3.5.x disk format"); - REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_ATTRS); - return; - } - if (!(le32_to_cpu(rs->s_flags) & reiserfs_attrs_cleared)) { - reiserfs_warning(s, "super-6518", "cannot support " - "attributes until flag is set in " - "super-block"); - REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_ATTRS); - } - } -} - -#ifdef CONFIG_QUOTA -static void handle_quota_files(struct super_block *s, char **qf_names, - unsigned int *qfmt) -{ - int i; - - for (i = 0; i < REISERFS_MAXQUOTAS; i++) { - if (qf_names[i] != REISERFS_SB(s)->s_qf_names[i]) - kfree(REISERFS_SB(s)->s_qf_names[i]); - REISERFS_SB(s)->s_qf_names[i] = qf_names[i]; - } - if (*qfmt) - REISERFS_SB(s)->s_jquota_fmt = *qfmt; -} -#endif - -static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) -{ - struct reiserfs_super_block *rs; - struct reiserfs_transaction_handle th; - unsigned long blocks; - unsigned long mount_options = REISERFS_SB(s)->s_mount_opt; - unsigned long safe_mask = 0; - unsigned int commit_max_age = (unsigned int)-1; - struct reiserfs_journal *journal = SB_JOURNAL(s); - int err; - char *qf_names[REISERFS_MAXQUOTAS]; - unsigned int qfmt = 0; -#ifdef CONFIG_QUOTA - int i; -#endif - - sync_filesystem(s); - reiserfs_write_lock(s); - -#ifdef CONFIG_QUOTA - memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names)); -#endif - - rs = SB_DISK_SUPER_BLOCK(s); - - if (!reiserfs_parse_options - (s, arg, &mount_options, &blocks, NULL, &commit_max_age, - qf_names, &qfmt)) { -#ifdef CONFIG_QUOTA - for (i = 0; i < REISERFS_MAXQUOTAS; i++) - if (qf_names[i] != REISERFS_SB(s)->s_qf_names[i]) - kfree(qf_names[i]); -#endif - err = -EINVAL; - goto out_err_unlock; - } -#ifdef CONFIG_QUOTA - handle_quota_files(s, qf_names, &qfmt); -#endif - - handle_attrs(s); - - /* Add options that are safe here */ - safe_mask |= 1 << REISERFS_SMALLTAIL; - safe_mask |= 1 << REISERFS_LARGETAIL; - safe_mask |= 1 << REISERFS_NO_BORDER; - safe_mask |= 1 << REISERFS_NO_UNHASHED_RELOCATION; - safe_mask |= 1 << REISERFS_HASHED_RELOCATION; - safe_mask |= 1 << REISERFS_TEST4; - safe_mask |= 1 << REISERFS_ATTRS; - safe_mask |= 1 << REISERFS_XATTRS_USER; - safe_mask |= 1 << REISERFS_POSIXACL; - safe_mask |= 1 << REISERFS_BARRIER_FLUSH; - safe_mask |= 1 << REISERFS_BARRIER_NONE; - safe_mask |= 1 << REISERFS_ERROR_RO; - safe_mask |= 1 << REISERFS_ERROR_CONTINUE; - safe_mask |= 1 << REISERFS_ERROR_PANIC; - safe_mask |= 1 << REISERFS_USRQUOTA; - safe_mask |= 1 << REISERFS_GRPQUOTA; - - /* - * Update the bitmask, taking care to keep - * the bits we're not allowed to change here - */ - REISERFS_SB(s)->s_mount_opt = - (REISERFS_SB(s)-> - s_mount_opt & ~safe_mask) | (mount_options & safe_mask); - - if (commit_max_age != 0 && commit_max_age != (unsigned int)-1) { - journal->j_max_commit_age = commit_max_age; - journal->j_max_trans_age = commit_max_age; - } else if (commit_max_age == 0) { - /* 0 means restore defaults. */ - journal->j_max_commit_age = journal->j_default_max_commit_age; - journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE; - } - - if (blocks) { - err = reiserfs_resize(s, blocks); - if (err != 0) - goto out_err_unlock; - } - - if (*mount_flags & SB_RDONLY) { - reiserfs_write_unlock(s); - reiserfs_xattr_init(s, *mount_flags); - /* remount read-only */ - if (sb_rdonly(s)) - /* it is read-only already */ - goto out_ok_unlocked; - - err = dquot_suspend(s, -1); - if (err < 0) - goto out_err; - - /* try to remount file system with read-only permissions */ - if (sb_umount_state(rs) == REISERFS_VALID_FS - || REISERFS_SB(s)->s_mount_state != REISERFS_VALID_FS) { - goto out_ok_unlocked; - } - - reiserfs_write_lock(s); - - err = journal_begin(&th, s, 10); - if (err) - goto out_err_unlock; - - /* Mounting a rw partition read-only. */ - reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); - set_sb_umount_state(rs, REISERFS_SB(s)->s_mount_state); - journal_mark_dirty(&th, SB_BUFFER_WITH_SB(s)); - } else { - /* remount read-write */ - if (!sb_rdonly(s)) { - reiserfs_write_unlock(s); - reiserfs_xattr_init(s, *mount_flags); - goto out_ok_unlocked; /* We are read-write already */ - } - - if (reiserfs_is_journal_aborted(journal)) { - err = journal->j_errno; - goto out_err_unlock; - } - - handle_data_mode(s, mount_options); - handle_barrier_mode(s, mount_options); - REISERFS_SB(s)->s_mount_state = sb_umount_state(rs); - - /* now it is safe to call journal_begin */ - s->s_flags &= ~SB_RDONLY; - err = journal_begin(&th, s, 10); - if (err) - goto out_err_unlock; - - /* Mount a partition which is read-only, read-write */ - reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); - REISERFS_SB(s)->s_mount_state = sb_umount_state(rs); - s->s_flags &= ~SB_RDONLY; - set_sb_umount_state(rs, REISERFS_ERROR_FS); - if (!old_format_only(s)) - set_sb_mnt_count(rs, sb_mnt_count(rs) + 1); - /* mark_buffer_dirty (SB_BUFFER_WITH_SB (s), 1); */ - journal_mark_dirty(&th, SB_BUFFER_WITH_SB(s)); - REISERFS_SB(s)->s_mount_state = REISERFS_VALID_FS; - } - /* this will force a full flush of all journal lists */ - SB_JOURNAL(s)->j_must_wait = 1; - err = journal_end(&th); - if (err) - goto out_err_unlock; - - reiserfs_write_unlock(s); - if (!(*mount_flags & SB_RDONLY)) { - dquot_resume(s, -1); - reiserfs_write_lock(s); - finish_unfinished(s); - reiserfs_write_unlock(s); - reiserfs_xattr_init(s, *mount_flags); - } - -out_ok_unlocked: - return 0; - -out_err_unlock: - reiserfs_write_unlock(s); -out_err: - return err; -} - -static int read_super_block(struct super_block *s, int offset) -{ - struct buffer_head *bh; - struct reiserfs_super_block *rs; - int fs_blocksize; - - bh = sb_bread(s, offset / s->s_blocksize); - if (!bh) { - reiserfs_warning(s, "sh-2006", - "bread failed (dev %s, block %lu, size %lu)", - s->s_id, offset / s->s_blocksize, - s->s_blocksize); - return 1; - } - - rs = (struct reiserfs_super_block *)bh->b_data; - if (!is_any_reiserfs_magic_string(rs)) { - brelse(bh); - return 1; - } - /* - * ok, reiserfs signature (old or new) found in at the given offset - */ - fs_blocksize = sb_blocksize(rs); - brelse(bh); - sb_set_blocksize(s, fs_blocksize); - - bh = sb_bread(s, offset / s->s_blocksize); - if (!bh) { - reiserfs_warning(s, "sh-2007", - "bread failed (dev %s, block %lu, size %lu)", - s->s_id, offset / s->s_blocksize, - s->s_blocksize); - return 1; - } - - rs = (struct reiserfs_super_block *)bh->b_data; - if (sb_blocksize(rs) != s->s_blocksize) { - reiserfs_warning(s, "sh-2011", "can't find a reiserfs " - "filesystem on (dev %s, block %llu, size %lu)", - s->s_id, - (unsigned long long)bh->b_blocknr, - s->s_blocksize); - brelse(bh); - return 1; - } - - if (rs->s_v1.s_root_block == cpu_to_le32(-1)) { - brelse(bh); - reiserfs_warning(s, "super-6519", "Unfinished reiserfsck " - "--rebuild-tree run detected. Please run\n" - "reiserfsck --rebuild-tree and wait for a " - "completion. If that fails\n" - "get newer reiserfsprogs package"); - return 1; - } - - reiserfs_warning(NULL, "", "reiserfs filesystem is deprecated and " - "scheduled to be removed from the kernel in 2025"); - SB_BUFFER_WITH_SB(s) = bh; - SB_DISK_SUPER_BLOCK(s) = rs; - - /* - * magic is of non-standard journal filesystem, look at s_version to - * find which format is in use - */ - if (is_reiserfs_jr(rs)) { - if (sb_version(rs) == REISERFS_VERSION_2) - reiserfs_info(s, "found reiserfs format \"3.6\"" - " with non-standard journal\n"); - else if (sb_version(rs) == REISERFS_VERSION_1) - reiserfs_info(s, "found reiserfs format \"3.5\"" - " with non-standard journal\n"); - else { - reiserfs_warning(s, "sh-2012", "found unknown " - "format \"%u\" of reiserfs with " - "non-standard magic", sb_version(rs)); - return 1; - } - } else - /* - * s_version of standard format may contain incorrect - * information, so we just look at the magic string - */ - reiserfs_info(s, - "found reiserfs format \"%s\" with standard journal\n", - is_reiserfs_3_5(rs) ? "3.5" : "3.6"); - - s->s_op = &reiserfs_sops; - s->s_export_op = &reiserfs_export_ops; -#ifdef CONFIG_QUOTA - s->s_qcop = &reiserfs_qctl_operations; - s->dq_op = &reiserfs_quota_operations; - s->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; -#endif - - /* - * new format is limited by the 32 bit wide i_blocks field, want to - * be one full block below that. - */ - s->s_maxbytes = (512LL << 32) - s->s_blocksize; - return 0; -} - -/* after journal replay, reread all bitmap and super blocks */ -static int reread_meta_blocks(struct super_block *s) -{ - if (bh_read(SB_BUFFER_WITH_SB(s), 0) < 0) { - reiserfs_warning(s, "reiserfs-2504", "error reading the super"); - return 1; - } - - return 0; -} - -/* hash detection stuff */ - -/* - * if root directory is empty - we set default - Yura's - hash and - * warn about it - * FIXME: we look for only one name in a directory. If tea and yura - * both have the same value - we ask user to send report to the - * mailing list - */ -static __u32 find_hash_out(struct super_block *s) -{ - int retval; - struct inode *inode; - struct cpu_key key; - INITIALIZE_PATH(path); - struct reiserfs_dir_entry de; - struct reiserfs_de_head *deh; - __u32 hash = DEFAULT_HASH; - __u32 deh_hashval, teahash, r5hash, yurahash; - - inode = d_inode(s->s_root); - - make_cpu_key(&key, inode, ~0, TYPE_DIRENTRY, 3); - retval = search_by_entry_key(s, &key, &path, &de); - if (retval == IO_ERROR) { - pathrelse(&path); - return UNSET_HASH; - } - if (retval == NAME_NOT_FOUND) - de.de_entry_num--; - - set_de_name_and_namelen(&de); - deh = de.de_deh + de.de_entry_num; - - if (deh_offset(deh) == DOT_DOT_OFFSET) { - /* allow override in this case */ - if (reiserfs_rupasov_hash(s)) - hash = YURA_HASH; - reiserfs_info(s, "FS seems to be empty, autodetect is using the default hash\n"); - goto out; - } - - deh_hashval = GET_HASH_VALUE(deh_offset(deh)); - r5hash = GET_HASH_VALUE(r5_hash(de.de_name, de.de_namelen)); - teahash = GET_HASH_VALUE(keyed_hash(de.de_name, de.de_namelen)); - yurahash = GET_HASH_VALUE(yura_hash(de.de_name, de.de_namelen)); - - if ((teahash == r5hash && deh_hashval == r5hash) || - (teahash == yurahash && deh_hashval == yurahash) || - (r5hash == yurahash && deh_hashval == yurahash)) { - reiserfs_warning(s, "reiserfs-2506", - "Unable to automatically detect hash " - "function. Please mount with -o " - "hash={tea,rupasov,r5}"); - hash = UNSET_HASH; - goto out; - } - - if (deh_hashval == yurahash) - hash = YURA_HASH; - else if (deh_hashval == teahash) - hash = TEA_HASH; - else if (deh_hashval == r5hash) - hash = R5_HASH; - else { - reiserfs_warning(s, "reiserfs-2506", - "Unrecognised hash function"); - hash = UNSET_HASH; - } -out: - pathrelse(&path); - return hash; -} - -/* finds out which hash names are sorted with */ -static int what_hash(struct super_block *s) -{ - __u32 code; - - code = sb_hash_function_code(SB_DISK_SUPER_BLOCK(s)); - - /* - * reiserfs_hash_detect() == true if any of the hash mount options - * were used. We must check them to make sure the user isn't - * using a bad hash value - */ - if (code == UNSET_HASH || reiserfs_hash_detect(s)) - code = find_hash_out(s); - - if (code != UNSET_HASH && reiserfs_hash_detect(s)) { - /* - * detection has found the hash, and we must check against the - * mount options - */ - if (reiserfs_rupasov_hash(s) && code != YURA_HASH) { - reiserfs_warning(s, "reiserfs-2507", - "Error, %s hash detected, " - "unable to force rupasov hash", - reiserfs_hashname(code)); - code = UNSET_HASH; - } else if (reiserfs_tea_hash(s) && code != TEA_HASH) { - reiserfs_warning(s, "reiserfs-2508", - "Error, %s hash detected, " - "unable to force tea hash", - reiserfs_hashname(code)); - code = UNSET_HASH; - } else if (reiserfs_r5_hash(s) && code != R5_HASH) { - reiserfs_warning(s, "reiserfs-2509", - "Error, %s hash detected, " - "unable to force r5 hash", - reiserfs_hashname(code)); - code = UNSET_HASH; - } - } else { - /* - * find_hash_out was not called or - * could not determine the hash - */ - if (reiserfs_rupasov_hash(s)) { - code = YURA_HASH; - } else if (reiserfs_tea_hash(s)) { - code = TEA_HASH; - } else if (reiserfs_r5_hash(s)) { - code = R5_HASH; - } - } - - /* - * if we are mounted RW, and we have a new valid hash code, update - * the super - */ - if (code != UNSET_HASH && - !sb_rdonly(s) && - code != sb_hash_function_code(SB_DISK_SUPER_BLOCK(s))) { - set_sb_hash_function_code(SB_DISK_SUPER_BLOCK(s), code); - } - return code; -} - -/* return pointer to appropriate function */ -static hashf_t hash_function(struct super_block *s) -{ - switch (what_hash(s)) { - case TEA_HASH: - reiserfs_info(s, "Using tea hash to sort names\n"); - return keyed_hash; - case YURA_HASH: - reiserfs_info(s, "Using rupasov hash to sort names\n"); - return yura_hash; - case R5_HASH: - reiserfs_info(s, "Using r5 hash to sort names\n"); - return r5_hash; - } - return NULL; -} - -/* this is used to set up correct value for old partitions */ -static int function2code(hashf_t func) -{ - if (func == keyed_hash) - return TEA_HASH; - if (func == yura_hash) - return YURA_HASH; - if (func == r5_hash) - return R5_HASH; - - BUG(); /* should never happen */ - - return 0; -} - -#define SWARN(silent, s, id, ...) \ - if (!(silent)) \ - reiserfs_warning(s, id, __VA_ARGS__) - -static int reiserfs_fill_super(struct super_block *s, void *data, int silent) -{ - struct inode *root_inode; - struct reiserfs_transaction_handle th; - int old_format = 0; - unsigned long blocks; - unsigned int commit_max_age = 0; - int jinit_done = 0; - struct reiserfs_iget_args args; - struct reiserfs_super_block *rs; - char *jdev_name; - struct reiserfs_sb_info *sbi; - int errval = -EINVAL; - char *qf_names[REISERFS_MAXQUOTAS] = {}; - unsigned int qfmt = 0; - - sbi = kzalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL); - if (!sbi) - return -ENOMEM; - s->s_fs_info = sbi; - /* Set default values for options: non-aggressive tails, RO on errors */ - sbi->s_mount_opt |= (1 << REISERFS_SMALLTAIL); - sbi->s_mount_opt |= (1 << REISERFS_ERROR_RO); - sbi->s_mount_opt |= (1 << REISERFS_BARRIER_FLUSH); - /* no preallocation minimum, be smart in reiserfs_file_write instead */ - sbi->s_alloc_options.preallocmin = 0; - /* Preallocate by 16 blocks (17-1) at once */ - sbi->s_alloc_options.preallocsize = 17; - /* setup default block allocator options */ - reiserfs_init_alloc_options(s); - - spin_lock_init(&sbi->old_work_lock); - INIT_DELAYED_WORK(&sbi->old_work, flush_old_commits); - mutex_init(&sbi->lock); - sbi->lock_depth = -1; - - sbi->commit_wq = alloc_workqueue("reiserfs/%s", WQ_MEM_RECLAIM, 0, - s->s_id); - if (!sbi->commit_wq) { - SWARN(silent, s, "", "Cannot allocate commit workqueue"); - errval = -ENOMEM; - goto error_unlocked; - } - - jdev_name = NULL; - if (reiserfs_parse_options - (s, (char *)data, &sbi->s_mount_opt, &blocks, &jdev_name, - &commit_max_age, qf_names, &qfmt) == 0) { - goto error_unlocked; - } - if (jdev_name && jdev_name[0]) { - sbi->s_jdev = kstrdup(jdev_name, GFP_KERNEL); - if (!sbi->s_jdev) { - SWARN(silent, s, "", "Cannot allocate memory for " - "journal device name"); - goto error_unlocked; - } - } -#ifdef CONFIG_QUOTA - handle_quota_files(s, qf_names, &qfmt); -#endif - - if (blocks) { - SWARN(silent, s, "jmacd-7", "resize option for remount only"); - goto error_unlocked; - } - - /* - * try old format (undistributed bitmap, super block in 8-th 1k - * block of a device) - */ - if (!read_super_block(s, REISERFS_OLD_DISK_OFFSET_IN_BYTES)) - old_format = 1; - - /* - * try new format (64-th 1k block), which can contain reiserfs - * super block - */ - else if (read_super_block(s, REISERFS_DISK_OFFSET_IN_BYTES)) { - SWARN(silent, s, "sh-2021", "can not find reiserfs on %s", - s->s_id); - goto error_unlocked; - } - - s->s_time_min = 0; - s->s_time_max = U32_MAX; - - rs = SB_DISK_SUPER_BLOCK(s); - /* - * Let's do basic sanity check to verify that underlying device is not - * smaller than the filesystem. If the check fails then abort and - * scream, because bad stuff will happen otherwise. - */ - if (bdev_nr_bytes(s->s_bdev) < sb_block_count(rs) * sb_blocksize(rs)) { - SWARN(silent, s, "", "Filesystem cannot be " - "mounted because it is bigger than the device"); - SWARN(silent, s, "", "You may need to run fsck " - "or increase size of your LVM partition"); - SWARN(silent, s, "", "Or may be you forgot to " - "reboot after fdisk when it told you to"); - goto error_unlocked; - } - - sbi->s_mount_state = SB_REISERFS_STATE(s); - sbi->s_mount_state = REISERFS_VALID_FS; - - if ((errval = reiserfs_init_bitmap_cache(s))) { - SWARN(silent, s, "jmacd-8", "unable to read bitmap"); - goto error_unlocked; - } - - errval = -EINVAL; -#ifdef CONFIG_REISERFS_CHECK - SWARN(silent, s, "", "CONFIG_REISERFS_CHECK is set ON"); - SWARN(silent, s, "", "- it is slow mode for debugging."); -#endif - - /* make data=ordered the default */ - if (!reiserfs_data_log(s) && !reiserfs_data_ordered(s) && - !reiserfs_data_writeback(s)) { - sbi->s_mount_opt |= (1 << REISERFS_DATA_ORDERED); - } - - if (reiserfs_data_log(s)) { - reiserfs_info(s, "using journaled data mode\n"); - } else if (reiserfs_data_ordered(s)) { - reiserfs_info(s, "using ordered data mode\n"); - } else { - reiserfs_info(s, "using writeback data mode\n"); - } - if (reiserfs_barrier_flush(s)) { - printk("reiserfs: using flush barriers\n"); - } - - if (journal_init(s, jdev_name, old_format, commit_max_age)) { - SWARN(silent, s, "sh-2022", - "unable to initialize journal space"); - goto error_unlocked; - } else { - /* - * once this is set, journal_release must be called - * if we error out of the mount - */ - jinit_done = 1; - } - - if (reread_meta_blocks(s)) { - SWARN(silent, s, "jmacd-9", - "unable to reread meta blocks after journal init"); - goto error_unlocked; - } - - if (replay_only(s)) - goto error_unlocked; - - s->s_xattr = reiserfs_xattr_handlers; - - if (bdev_read_only(s->s_bdev) && !sb_rdonly(s)) { - SWARN(silent, s, "clm-7000", - "Detected readonly device, marking FS readonly"); - s->s_flags |= SB_RDONLY; - } - args.objectid = REISERFS_ROOT_OBJECTID; - args.dirid = REISERFS_ROOT_PARENT_OBJECTID; - root_inode = - iget5_locked(s, REISERFS_ROOT_OBJECTID, reiserfs_find_actor, - reiserfs_init_locked_inode, (void *)&args); - if (!root_inode) { - SWARN(silent, s, "jmacd-10", "get root inode failed"); - goto error_unlocked; - } - - /* - * This path assumed to be called with the BKL in the old times. - * Now we have inherited the big reiserfs lock from it and many - * reiserfs helpers called in the mount path and elsewhere require - * this lock to be held even if it's not always necessary. Let's be - * conservative and hold it early. The window can be reduced after - * careful review of the code. - */ - reiserfs_write_lock(s); - - if (root_inode->i_state & I_NEW) { - reiserfs_read_locked_inode(root_inode, &args); - unlock_new_inode(root_inode); - } - - if (!S_ISDIR(root_inode->i_mode) || !inode_get_bytes(root_inode) || - !root_inode->i_size) { - SWARN(silent, s, "", "corrupt root inode, run fsck"); - iput(root_inode); - errval = -EUCLEAN; - goto error; - } - - s->s_root = d_make_root(root_inode); - if (!s->s_root) - goto error; - /* define and initialize hash function */ - sbi->s_hash_function = hash_function(s); - if (sbi->s_hash_function == NULL) { - dput(s->s_root); - s->s_root = NULL; - goto error; - } - - if (is_reiserfs_3_5(rs) - || (is_reiserfs_jr(rs) && SB_VERSION(s) == REISERFS_VERSION_1)) - set_bit(REISERFS_3_5, &sbi->s_properties); - else if (old_format) - set_bit(REISERFS_OLD_FORMAT, &sbi->s_properties); - else - set_bit(REISERFS_3_6, &sbi->s_properties); - - if (!sb_rdonly(s)) { - - errval = journal_begin(&th, s, 1); - if (errval) { - dput(s->s_root); - s->s_root = NULL; - goto error; - } - reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); - - set_sb_umount_state(rs, REISERFS_ERROR_FS); - set_sb_fs_state(rs, 0); - - /* - * Clear out s_bmap_nr if it would wrap. We can handle this - * case, but older revisions can't. This will cause the - * file system to fail mount on those older implementations, - * avoiding corruption. -jeffm - */ - if (bmap_would_wrap(reiserfs_bmap_count(s)) && - sb_bmap_nr(rs) != 0) { - reiserfs_warning(s, "super-2030", "This file system " - "claims to use %u bitmap blocks in " - "its super block, but requires %u. " - "Clearing to zero.", sb_bmap_nr(rs), - reiserfs_bmap_count(s)); - - set_sb_bmap_nr(rs, 0); - } - - if (old_format_only(s)) { - /* - * filesystem of format 3.5 either with standard - * or non-standard journal - */ - if (convert_reiserfs(s)) { - /* and -o conv is given */ - if (!silent) - reiserfs_info(s, - "converting 3.5 filesystem to the 3.6 format"); - - if (is_reiserfs_3_5(rs)) - /* - * put magic string of 3.6 format. - * 2.2 will not be able to - * mount this filesystem anymore - */ - memcpy(rs->s_v1.s_magic, - reiserfs_3_6_magic_string, - sizeof - (reiserfs_3_6_magic_string)); - - set_sb_version(rs, REISERFS_VERSION_2); - reiserfs_convert_objectid_map_v1(s); - set_bit(REISERFS_3_6, &sbi->s_properties); - clear_bit(REISERFS_3_5, &sbi->s_properties); - } else if (!silent) { - reiserfs_info(s, "using 3.5.x disk format\n"); - } - } else - set_sb_mnt_count(rs, sb_mnt_count(rs) + 1); - - - journal_mark_dirty(&th, SB_BUFFER_WITH_SB(s)); - errval = journal_end(&th); - if (errval) { - dput(s->s_root); - s->s_root = NULL; - goto error; - } - - reiserfs_write_unlock(s); - if ((errval = reiserfs_lookup_privroot(s)) || - (errval = reiserfs_xattr_init(s, s->s_flags))) { - dput(s->s_root); - s->s_root = NULL; - goto error_unlocked; - } - reiserfs_write_lock(s); - - /* - * look for files which were to be removed in previous session - */ - finish_unfinished(s); - } else { - if (old_format_only(s) && !silent) { - reiserfs_info(s, "using 3.5.x disk format\n"); - } - - reiserfs_write_unlock(s); - if ((errval = reiserfs_lookup_privroot(s)) || - (errval = reiserfs_xattr_init(s, s->s_flags))) { - dput(s->s_root); - s->s_root = NULL; - goto error_unlocked; - } - reiserfs_write_lock(s); - } - /* - * mark hash in super block: it could be unset. overwrite should be ok - */ - set_sb_hash_function_code(rs, function2code(sbi->s_hash_function)); - - handle_attrs(s); - - reiserfs_proc_info_init(s); - - init_waitqueue_head(&(sbi->s_wait)); - spin_lock_init(&sbi->bitmap_lock); - - reiserfs_write_unlock(s); - - return (0); - -error: - reiserfs_write_unlock(s); - -error_unlocked: - /* kill the commit thread, free journal ram */ - if (jinit_done) { - reiserfs_write_lock(s); - journal_release_error(NULL, s); - reiserfs_write_unlock(s); - } - - if (sbi->commit_wq) - destroy_workqueue(sbi->commit_wq); - - reiserfs_cancel_old_flush(s); - - reiserfs_free_bitmap_cache(s); - if (SB_BUFFER_WITH_SB(s)) - brelse(SB_BUFFER_WITH_SB(s)); -#ifdef CONFIG_QUOTA - { - int j; - for (j = 0; j < REISERFS_MAXQUOTAS; j++) - kfree(qf_names[j]); - } -#endif - kfree(sbi->s_jdev); - kfree(sbi); - - s->s_fs_info = NULL; - return errval; -} - -static int reiserfs_statfs(struct dentry *dentry, struct kstatfs *buf) -{ - struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(dentry->d_sb); - - buf->f_namelen = (REISERFS_MAX_NAME(s->s_blocksize)); - buf->f_bfree = sb_free_blocks(rs); - buf->f_bavail = buf->f_bfree; - buf->f_blocks = sb_block_count(rs) - sb_bmap_nr(rs) - 1; - buf->f_bsize = dentry->d_sb->s_blocksize; - /* changed to accommodate gcc folks. */ - buf->f_type = REISERFS_SUPER_MAGIC; - buf->f_fsid.val[0] = (u32)crc32_le(0, rs->s_uuid, sizeof(rs->s_uuid)/2); - buf->f_fsid.val[1] = (u32)crc32_le(0, rs->s_uuid + sizeof(rs->s_uuid)/2, - sizeof(rs->s_uuid)/2); - - return 0; -} - -#ifdef CONFIG_QUOTA -static int reiserfs_write_dquot(struct dquot *dquot) -{ - struct reiserfs_transaction_handle th; - int ret, err; - int depth; - - reiserfs_write_lock(dquot->dq_sb); - ret = - journal_begin(&th, dquot->dq_sb, - REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); - if (ret) - goto out; - depth = reiserfs_write_unlock_nested(dquot->dq_sb); - ret = dquot_commit(dquot); - reiserfs_write_lock_nested(dquot->dq_sb, depth); - err = journal_end(&th); - if (!ret && err) - ret = err; -out: - reiserfs_write_unlock(dquot->dq_sb); - return ret; -} - -static int reiserfs_acquire_dquot(struct dquot *dquot) -{ - struct reiserfs_transaction_handle th; - int ret, err; - int depth; - - reiserfs_write_lock(dquot->dq_sb); - ret = - journal_begin(&th, dquot->dq_sb, - REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb)); - if (ret) - goto out; - depth = reiserfs_write_unlock_nested(dquot->dq_sb); - ret = dquot_acquire(dquot); - reiserfs_write_lock_nested(dquot->dq_sb, depth); - err = journal_end(&th); - if (!ret && err) - ret = err; -out: - reiserfs_write_unlock(dquot->dq_sb); - return ret; -} - -static int reiserfs_release_dquot(struct dquot *dquot) -{ - struct reiserfs_transaction_handle th; - int ret, err; - - reiserfs_write_lock(dquot->dq_sb); - ret = - journal_begin(&th, dquot->dq_sb, - REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb)); - reiserfs_write_unlock(dquot->dq_sb); - if (ret) { - /* Release dquot anyway to avoid endless cycle in dqput() */ - dquot_release(dquot); - goto out; - } - ret = dquot_release(dquot); - reiserfs_write_lock(dquot->dq_sb); - err = journal_end(&th); - if (!ret && err) - ret = err; - reiserfs_write_unlock(dquot->dq_sb); -out: - return ret; -} - -static int reiserfs_mark_dquot_dirty(struct dquot *dquot) -{ - /* Are we journaling quotas? */ - if (REISERFS_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || - REISERFS_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { - dquot_mark_dquot_dirty(dquot); - return reiserfs_write_dquot(dquot); - } else - return dquot_mark_dquot_dirty(dquot); -} - -static int reiserfs_write_info(struct super_block *sb, int type) -{ - struct reiserfs_transaction_handle th; - int ret, err; - int depth; - - /* Data block + inode block */ - reiserfs_write_lock(sb); - ret = journal_begin(&th, sb, 2); - if (ret) - goto out; - depth = reiserfs_write_unlock_nested(sb); - ret = dquot_commit_info(sb, type); - reiserfs_write_lock_nested(sb, depth); - err = journal_end(&th); - if (!ret && err) - ret = err; -out: - reiserfs_write_unlock(sb); - return ret; -} - -/* - * Turn on quotas during mount time - we need to find the quota file and such... - */ -static int reiserfs_quota_on_mount(struct super_block *sb, int type) -{ - return dquot_quota_on_mount(sb, REISERFS_SB(sb)->s_qf_names[type], - REISERFS_SB(sb)->s_jquota_fmt, type); -} - -/* - * Standard function to be called on quota_on - */ -static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, - const struct path *path) -{ - int err; - struct inode *inode; - struct reiserfs_transaction_handle th; - int opt = type == USRQUOTA ? REISERFS_USRQUOTA : REISERFS_GRPQUOTA; - - reiserfs_write_lock(sb); - if (!(REISERFS_SB(sb)->s_mount_opt & (1 << opt))) { - err = -EINVAL; - goto out; - } - - /* Quotafile not on the same filesystem? */ - if (path->dentry->d_sb != sb) { - err = -EXDEV; - goto out; - } - inode = d_inode(path->dentry); - /* - * We must not pack tails for quota files on reiserfs for quota - * IO to work - */ - if (!(REISERFS_I(inode)->i_flags & i_nopack_mask)) { - err = reiserfs_unpack(inode); - if (err) { - reiserfs_warning(sb, "super-6520", - "Unpacking tail of quota file failed" - " (%d). Cannot turn on quotas.", err); - err = -EINVAL; - goto out; - } - mark_inode_dirty(inode); - } - /* Journaling quota? */ - if (REISERFS_SB(sb)->s_qf_names[type]) { - /* Quotafile not of fs root? */ - if (path->dentry->d_parent != sb->s_root) - reiserfs_warning(sb, "super-6521", - "Quota file not on filesystem root. " - "Journalled quota will not work."); - } - - /* - * When we journal data on quota file, we have to flush journal to see - * all updates to the file when we bypass pagecache... - */ - if (reiserfs_file_data_log(inode)) { - /* Just start temporary transaction and finish it */ - err = journal_begin(&th, sb, 1); - if (err) - goto out; - err = journal_end_sync(&th); - if (err) - goto out; - } - reiserfs_write_unlock(sb); - err = dquot_quota_on(sb, type, format_id, path); - if (!err) { - inode_lock(inode); - REISERFS_I(inode)->i_attrs |= REISERFS_IMMUTABLE_FL | - REISERFS_NOATIME_FL; - inode_set_flags(inode, S_IMMUTABLE | S_NOATIME, - S_IMMUTABLE | S_NOATIME); - inode_unlock(inode); - mark_inode_dirty(inode); - } - return err; -out: - reiserfs_write_unlock(sb); - return err; -} - -static int reiserfs_quota_off(struct super_block *sb, int type) -{ - int err; - struct inode *inode = sb_dqopt(sb)->files[type]; - - if (!inode || !igrab(inode)) - goto out; - - err = dquot_quota_off(sb, type); - if (err) - goto out_put; - - inode_lock(inode); - REISERFS_I(inode)->i_attrs &= ~(REISERFS_IMMUTABLE_FL | - REISERFS_NOATIME_FL); - inode_set_flags(inode, 0, S_IMMUTABLE | S_NOATIME); - inode_unlock(inode); - mark_inode_dirty(inode); -out_put: - iput(inode); - return err; -out: - return dquot_quota_off(sb, type); -} - -/* - * Read data from quotafile - avoid pagecache and such because we cannot afford - * acquiring the locks... As quota files are never truncated and quota code - * itself serializes the operations (and no one else should touch the files) - * we don't have to be afraid of races - */ -static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data, - size_t len, loff_t off) -{ - struct inode *inode = sb_dqopt(sb)->files[type]; - unsigned long blk = off >> sb->s_blocksize_bits; - int err = 0, offset = off & (sb->s_blocksize - 1), tocopy; - size_t toread; - struct buffer_head tmp_bh, *bh; - loff_t i_size = i_size_read(inode); - - if (off > i_size) - return 0; - if (off + len > i_size) - len = i_size - off; - toread = len; - while (toread > 0) { - tocopy = min_t(unsigned long, sb->s_blocksize - offset, toread); - tmp_bh.b_state = 0; - /* - * Quota files are without tails so we can safely - * use this function - */ - reiserfs_write_lock(sb); - err = reiserfs_get_block(inode, blk, &tmp_bh, 0); - reiserfs_write_unlock(sb); - if (err) - return err; - if (!buffer_mapped(&tmp_bh)) /* A hole? */ - memset(data, 0, tocopy); - else { - bh = sb_bread(sb, tmp_bh.b_blocknr); - if (!bh) - return -EIO; - memcpy(data, bh->b_data + offset, tocopy); - brelse(bh); - } - offset = 0; - toread -= tocopy; - data += tocopy; - blk++; - } - return len; -} - -/* - * Write to quotafile (we know the transaction is already started and has - * enough credits) - */ -static ssize_t reiserfs_quota_write(struct super_block *sb, int type, - const char *data, size_t len, loff_t off) -{ - struct inode *inode = sb_dqopt(sb)->files[type]; - unsigned long blk = off >> sb->s_blocksize_bits; - int err = 0, offset = off & (sb->s_blocksize - 1), tocopy; - int journal_quota = REISERFS_SB(sb)->s_qf_names[type] != NULL; - size_t towrite = len; - struct buffer_head tmp_bh, *bh; - - if (!current->journal_info) { - printk(KERN_WARNING "reiserfs: Quota write (off=%llu, len=%llu) cancelled because transaction is not started.\n", - (unsigned long long)off, (unsigned long long)len); - return -EIO; - } - while (towrite > 0) { - tocopy = min_t(unsigned long, sb->s_blocksize - offset, towrite); - tmp_bh.b_state = 0; - reiserfs_write_lock(sb); - err = reiserfs_get_block(inode, blk, &tmp_bh, GET_BLOCK_CREATE); - reiserfs_write_unlock(sb); - if (err) - goto out; - if (offset || tocopy != sb->s_blocksize) - bh = sb_bread(sb, tmp_bh.b_blocknr); - else - bh = sb_getblk(sb, tmp_bh.b_blocknr); - if (!bh) { - err = -EIO; - goto out; - } - lock_buffer(bh); - memcpy(bh->b_data + offset, data, tocopy); - flush_dcache_page(bh->b_page); - set_buffer_uptodate(bh); - unlock_buffer(bh); - reiserfs_write_lock(sb); - reiserfs_prepare_for_journal(sb, bh, 1); - journal_mark_dirty(current->journal_info, bh); - if (!journal_quota) - reiserfs_add_ordered_list(inode, bh); - reiserfs_write_unlock(sb); - brelse(bh); - offset = 0; - towrite -= tocopy; - data += tocopy; - blk++; - } -out: - if (len == towrite) - return err; - if (inode->i_size < off + len - towrite) - i_size_write(inode, off + len - towrite); - inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); - mark_inode_dirty(inode); - return len - towrite; -} - -#endif - -static struct dentry *get_super_block(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data) -{ - return mount_bdev(fs_type, flags, dev_name, data, reiserfs_fill_super); -} - -static int __init init_reiserfs_fs(void) -{ - int ret; - - ret = init_inodecache(); - if (ret) - return ret; - - reiserfs_proc_info_global_init(); - - ret = register_filesystem(&reiserfs_fs_type); - if (ret) - goto out; - - return 0; -out: - reiserfs_proc_info_global_done(); - destroy_inodecache(); - - return ret; -} - -static void __exit exit_reiserfs_fs(void) -{ - reiserfs_proc_info_global_done(); - unregister_filesystem(&reiserfs_fs_type); - destroy_inodecache(); -} - -struct file_system_type reiserfs_fs_type = { - .owner = THIS_MODULE, - .name = "reiserfs", - .mount = get_super_block, - .kill_sb = reiserfs_kill_sb, - .fs_flags = FS_REQUIRES_DEV, -}; -MODULE_ALIAS_FS("reiserfs"); - -MODULE_DESCRIPTION("ReiserFS journaled filesystem"); -MODULE_AUTHOR("Hans Reiser "); -MODULE_LICENSE("GPL"); - -module_init(init_reiserfs_fs); -module_exit(exit_reiserfs_fs); diff --git a/fs/reiserfs/tail_conversion.c b/fs/reiserfs/tail_conversion.c deleted file mode 100644 index 2cec61af2a9e..000000000000 --- a/fs/reiserfs/tail_conversion.c +++ /dev/null @@ -1,318 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright 1999 Hans Reiser, see reiserfs/README for licensing and copyright - * details - */ - -#include -#include -#include -#include "reiserfs.h" - -/* - * access to tail : when one is going to read tail it must make sure, that is - * not running. direct2indirect and indirect2direct can not run concurrently - */ - -/* - * Converts direct items to an unformatted node. Panics if file has no - * tail. -ENOSPC if no disk space for conversion - */ -/* - * path points to first direct item of the file regardless of how many of - * them are there - */ -int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode, - struct treepath *path, struct buffer_head *unbh, - loff_t tail_offset) -{ - struct super_block *sb = inode->i_sb; - struct buffer_head *up_to_date_bh; - struct item_head *p_le_ih = tp_item_head(path); - unsigned long total_tail = 0; - - /* Key to search for the last byte of the converted item. */ - struct cpu_key end_key; - - /* - * new indirect item to be inserted or key - * of unfm pointer to be pasted - */ - struct item_head ind_ih; - int blk_size; - /* returned value for reiserfs_insert_item and clones */ - int retval; - /* Handle on an unformatted node that will be inserted in the tree. */ - unp_t unfm_ptr; - - BUG_ON(!th->t_trans_id); - - REISERFS_SB(sb)->s_direct2indirect++; - - blk_size = sb->s_blocksize; - - /* - * and key to search for append or insert pointer to the new - * unformatted node. - */ - copy_item_head(&ind_ih, p_le_ih); - set_le_ih_k_offset(&ind_ih, tail_offset); - set_le_ih_k_type(&ind_ih, TYPE_INDIRECT); - - /* Set the key to search for the place for new unfm pointer */ - make_cpu_key(&end_key, inode, tail_offset, TYPE_INDIRECT, 4); - - /* FIXME: we could avoid this */ - if (search_for_position_by_key(sb, &end_key, path) == POSITION_FOUND) { - reiserfs_error(sb, "PAP-14030", - "pasted or inserted byte exists in " - "the tree %K. Use fsck to repair.", &end_key); - pathrelse(path); - return -EIO; - } - - p_le_ih = tp_item_head(path); - - unfm_ptr = cpu_to_le32(unbh->b_blocknr); - - if (is_statdata_le_ih(p_le_ih)) { - /* Insert new indirect item. */ - set_ih_free_space(&ind_ih, 0); /* delete at nearest future */ - put_ih_item_len(&ind_ih, UNFM_P_SIZE); - PATH_LAST_POSITION(path)++; - retval = - reiserfs_insert_item(th, path, &end_key, &ind_ih, inode, - (char *)&unfm_ptr); - } else { - /* Paste into last indirect item of an object. */ - retval = reiserfs_paste_into_item(th, path, &end_key, inode, - (char *)&unfm_ptr, - UNFM_P_SIZE); - } - if (retval) { - return retval; - } - /* - * note: from here there are two keys which have matching first - * three key components. They only differ by the fourth one. - */ - - /* Set the key to search for the direct items of the file */ - make_cpu_key(&end_key, inode, max_reiserfs_offset(inode), TYPE_DIRECT, - 4); - - /* - * Move bytes from the direct items to the new unformatted node - * and delete them. - */ - while (1) { - int tail_size; - - /* - * end_key.k_offset is set so, that we will always have found - * last item of the file - */ - if (search_for_position_by_key(sb, &end_key, path) == - POSITION_FOUND) - reiserfs_panic(sb, "PAP-14050", - "direct item (%K) not found", &end_key); - p_le_ih = tp_item_head(path); - RFALSE(!is_direct_le_ih(p_le_ih), - "vs-14055: direct item expected(%K), found %h", - &end_key, p_le_ih); - tail_size = (le_ih_k_offset(p_le_ih) & (blk_size - 1)) - + ih_item_len(p_le_ih) - 1; - - /* - * we only send the unbh pointer if the buffer is not - * up to date. this avoids overwriting good data from - * writepage() with old data from the disk or buffer cache - * Special case: unbh->b_page will be NULL if we are coming - * through DIRECT_IO handler here. - */ - if (!unbh->b_page || buffer_uptodate(unbh) - || PageUptodate(unbh->b_page)) { - up_to_date_bh = NULL; - } else { - up_to_date_bh = unbh; - } - retval = reiserfs_delete_item(th, path, &end_key, inode, - up_to_date_bh); - - total_tail += retval; - - /* done: file does not have direct items anymore */ - if (tail_size == retval) - break; - - } - /* - * if we've copied bytes from disk into the page, we need to zero - * out the unused part of the block (it was not up to date before) - */ - if (up_to_date_bh) { - unsigned pgoff = - (tail_offset + total_tail - 1) & (PAGE_SIZE - 1); - char *kaddr = kmap_atomic(up_to_date_bh->b_page); - memset(kaddr + pgoff, 0, blk_size - total_tail); - kunmap_atomic(kaddr); - } - - REISERFS_I(inode)->i_first_direct_byte = U32_MAX; - - return 0; -} - -/* stolen from fs/buffer.c */ -void reiserfs_unmap_buffer(struct buffer_head *bh) -{ - lock_buffer(bh); - if (buffer_journaled(bh) || buffer_journal_dirty(bh)) { - BUG(); - } - clear_buffer_dirty(bh); - /* - * Remove the buffer from whatever list it belongs to. We are mostly - * interested in removing it from per-sb j_dirty_buffers list, to avoid - * BUG() on attempt to write not mapped buffer - */ - if ((!list_empty(&bh->b_assoc_buffers) || bh->b_private) && bh->b_page) { - struct inode *inode = bh->b_folio->mapping->host; - struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb); - spin_lock(&j->j_dirty_buffers_lock); - list_del_init(&bh->b_assoc_buffers); - reiserfs_free_jh(bh); - spin_unlock(&j->j_dirty_buffers_lock); - } - clear_buffer_mapped(bh); - clear_buffer_req(bh); - clear_buffer_new(bh); - bh->b_bdev = NULL; - unlock_buffer(bh); -} - -/* - * this first locks inode (neither reads nor sync are permitted), - * reads tail through page cache, insert direct item. When direct item - * inserted successfully inode is left locked. Return value is always - * what we expect from it (number of cut bytes). But when tail remains - * in the unformatted node, we set mode to SKIP_BALANCING and unlock - * inode - */ -int indirect2direct(struct reiserfs_transaction_handle *th, - struct inode *inode, struct page *page, - struct treepath *path, /* path to the indirect item. */ - const struct cpu_key *item_key, /* Key to look for - * unformatted node - * pointer to be cut. */ - loff_t n_new_file_size, /* New file size. */ - char *mode) -{ - struct super_block *sb = inode->i_sb; - struct item_head s_ih; - unsigned long block_size = sb->s_blocksize; - char *tail; - int tail_len, round_tail_len; - loff_t pos, pos1; /* position of first byte of the tail */ - struct cpu_key key; - - BUG_ON(!th->t_trans_id); - - REISERFS_SB(sb)->s_indirect2direct++; - - *mode = M_SKIP_BALANCING; - - /* store item head path points to. */ - copy_item_head(&s_ih, tp_item_head(path)); - - tail_len = (n_new_file_size & (block_size - 1)); - if (get_inode_sd_version(inode) == STAT_DATA_V2) - round_tail_len = ROUND_UP(tail_len); - else - round_tail_len = tail_len; - - pos = - le_ih_k_offset(&s_ih) - 1 + (ih_item_len(&s_ih) / UNFM_P_SIZE - - 1) * sb->s_blocksize; - pos1 = pos; - - /* - * we are protected by i_mutex. The tail can not disapper, not - * append can be done either - * we are in truncate or packing tail in file_release - */ - - tail = (char *)kmap(page); /* this can schedule */ - - if (path_changed(&s_ih, path)) { - /* re-search indirect item */ - if (search_for_position_by_key(sb, item_key, path) - == POSITION_NOT_FOUND) - reiserfs_panic(sb, "PAP-5520", - "item to be converted %K does not exist", - item_key); - copy_item_head(&s_ih, tp_item_head(path)); -#ifdef CONFIG_REISERFS_CHECK - pos = le_ih_k_offset(&s_ih) - 1 + - (ih_item_len(&s_ih) / UNFM_P_SIZE - - 1) * sb->s_blocksize; - if (pos != pos1) - reiserfs_panic(sb, "vs-5530", "tail position " - "changed while we were reading it"); -#endif - } - - /* Set direct item header to insert. */ - make_le_item_head(&s_ih, NULL, get_inode_item_key_version(inode), - pos1 + 1, TYPE_DIRECT, round_tail_len, - 0xffff /*ih_free_space */ ); - - /* - * we want a pointer to the first byte of the tail in the page. - * the page was locked and this part of the page was up to date when - * indirect2direct was called, so we know the bytes are still valid - */ - tail = tail + (pos & (PAGE_SIZE - 1)); - - PATH_LAST_POSITION(path)++; - - key = *item_key; - set_cpu_key_k_type(&key, TYPE_DIRECT); - key.key_length = 4; - /* Insert tail as new direct item in the tree */ - if (reiserfs_insert_item(th, path, &key, &s_ih, inode, - tail ? tail : NULL) < 0) { - /* - * No disk memory. So we can not convert last unformatted node - * to the direct item. In this case we used to adjust - * indirect items's ih_free_space. Now ih_free_space is not - * used, it would be ideal to write zeros to corresponding - * unformatted node. For now i_size is considered as guard for - * going out of file size - */ - kunmap(page); - return block_size - round_tail_len; - } - kunmap(page); - - /* make sure to get the i_blocks changes from reiserfs_insert_item */ - reiserfs_update_sd(th, inode); - - /* - * note: we have now the same as in above direct2indirect - * conversion: there are two keys which have matching first three - * key components. They only differ by the fourth one. - */ - - /* - * We have inserted new direct item and must remove last - * unformatted node. - */ - *mode = M_CUT; - - /* we store position of first direct item in the in-core inode */ - /* mark_file_with_tail (inode, pos1 + 1); */ - REISERFS_I(inode)->i_first_direct_byte = pos1 + 1; - - return block_size - round_tail_len; -} diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c deleted file mode 100644 index 998035a6388e..000000000000 --- a/fs/reiserfs/xattr.c +++ /dev/null @@ -1,1039 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * linux/fs/reiserfs/xattr.c - * - * Copyright (c) 2002 by Jeff Mahoney, - * - */ - -/* - * In order to implement EA/ACLs in a clean, backwards compatible manner, - * they are implemented as files in a "private" directory. - * Each EA is in it's own file, with the directory layout like so (/ is assumed - * to be relative to fs root). Inside the /.reiserfs_priv/xattrs directory, - * directories named using the capital-hex form of the objectid and - * generation number are used. Inside each directory are individual files - * named with the name of the extended attribute. - * - * So, for objectid 12648430, we could have: - * /.reiserfs_priv/xattrs/C0FFEE.0/system.posix_acl_access - * /.reiserfs_priv/xattrs/C0FFEE.0/system.posix_acl_default - * /.reiserfs_priv/xattrs/C0FFEE.0/user.Content-Type - * .. or similar. - * - * The file contents are the text of the EA. The size is known based on the - * stat data describing the file. - * - * In the case of system.posix_acl_access and system.posix_acl_default, since - * these are special cases for filesystem ACLs, they are interpreted by the - * kernel, in addition, they are negatively and positively cached and attached - * to the inode so that unnecessary lookups are avoided. - * - * Locking works like so: - * Directory components (xattr root, xattr dir) are protectd by their i_mutex. - * The xattrs themselves are protected by the xattr_sem. - */ - -#include "reiserfs.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "xattr.h" -#include "acl.h" -#include -#include -#include -#include -#include -#include -#include - -#define PRIVROOT_NAME ".reiserfs_priv" -#define XAROOT_NAME "xattrs" - - -/* - * Helpers for inode ops. We do this so that we don't have all the VFS - * overhead and also for proper i_mutex annotation. - * dir->i_mutex must be held for all of them. - */ -#ifdef CONFIG_REISERFS_FS_XATTR -static int xattr_create(struct inode *dir, struct dentry *dentry, int mode) -{ - BUG_ON(!inode_is_locked(dir)); - return dir->i_op->create(&nop_mnt_idmap, dir, dentry, mode, true); -} -#endif - -static int xattr_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) -{ - BUG_ON(!inode_is_locked(dir)); - return dir->i_op->mkdir(&nop_mnt_idmap, dir, dentry, mode); -} - -/* - * We use I_MUTEX_CHILD here to silence lockdep. It's safe because xattr - * mutation ops aren't called during rename or splace, which are the - * only other users of I_MUTEX_CHILD. It violates the ordering, but that's - * better than allocating another subclass just for this code. - */ -static int xattr_unlink(struct inode *dir, struct dentry *dentry) -{ - int error; - - BUG_ON(!inode_is_locked(dir)); - - inode_lock_nested(d_inode(dentry), I_MUTEX_CHILD); - error = dir->i_op->unlink(dir, dentry); - inode_unlock(d_inode(dentry)); - - if (!error) - d_delete(dentry); - return error; -} - -static int xattr_rmdir(struct inode *dir, struct dentry *dentry) -{ - int error; - - BUG_ON(!inode_is_locked(dir)); - - inode_lock_nested(d_inode(dentry), I_MUTEX_CHILD); - error = dir->i_op->rmdir(dir, dentry); - if (!error) - d_inode(dentry)->i_flags |= S_DEAD; - inode_unlock(d_inode(dentry)); - if (!error) - d_delete(dentry); - - return error; -} - -#define xattr_may_create(flags) (!flags || flags & XATTR_CREATE) - -static struct dentry *open_xa_root(struct super_block *sb, int flags) -{ - struct dentry *privroot = REISERFS_SB(sb)->priv_root; - struct dentry *xaroot; - - if (d_really_is_negative(privroot)) - return ERR_PTR(-EOPNOTSUPP); - - inode_lock_nested(d_inode(privroot), I_MUTEX_XATTR); - - xaroot = dget(REISERFS_SB(sb)->xattr_root); - if (!xaroot) - xaroot = ERR_PTR(-EOPNOTSUPP); - else if (d_really_is_negative(xaroot)) { - int err = -ENODATA; - - if (xattr_may_create(flags)) - err = xattr_mkdir(d_inode(privroot), xaroot, 0700); - if (err) { - dput(xaroot); - xaroot = ERR_PTR(err); - } - } - - inode_unlock(d_inode(privroot)); - return xaroot; -} - -static struct dentry *open_xa_dir(const struct inode *inode, int flags) -{ - struct dentry *xaroot, *xadir; - char namebuf[17]; - - xaroot = open_xa_root(inode->i_sb, flags); - if (IS_ERR(xaroot)) - return xaroot; - - snprintf(namebuf, sizeof(namebuf), "%X.%X", - le32_to_cpu(INODE_PKEY(inode)->k_objectid), - inode->i_generation); - - inode_lock_nested(d_inode(xaroot), I_MUTEX_XATTR); - - xadir = lookup_one_len(namebuf, xaroot, strlen(namebuf)); - if (!IS_ERR(xadir) && d_really_is_negative(xadir)) { - int err = -ENODATA; - - if (xattr_may_create(flags)) - err = xattr_mkdir(d_inode(xaroot), xadir, 0700); - if (err) { - dput(xadir); - xadir = ERR_PTR(err); - } - } - - inode_unlock(d_inode(xaroot)); - dput(xaroot); - return xadir; -} - -/* - * The following are side effects of other operations that aren't explicitly - * modifying extended attributes. This includes operations such as permissions - * or ownership changes, object deletions, etc. - */ -struct reiserfs_dentry_buf { - struct dir_context ctx; - struct dentry *xadir; - int count; - int err; - struct dentry *dentries[8]; -}; - -static bool -fill_with_dentries(struct dir_context *ctx, const char *name, int namelen, - loff_t offset, u64 ino, unsigned int d_type) -{ - struct reiserfs_dentry_buf *dbuf = - container_of(ctx, struct reiserfs_dentry_buf, ctx); - struct dentry *dentry; - - WARN_ON_ONCE(!inode_is_locked(d_inode(dbuf->xadir))); - - if (dbuf->count == ARRAY_SIZE(dbuf->dentries)) - return false; - - if (name[0] == '.' && (namelen < 2 || - (namelen == 2 && name[1] == '.'))) - return true; - - dentry = lookup_one_len(name, dbuf->xadir, namelen); - if (IS_ERR(dentry)) { - dbuf->err = PTR_ERR(dentry); - return false; - } else if (d_really_is_negative(dentry)) { - /* A directory entry exists, but no file? */ - reiserfs_error(dentry->d_sb, "xattr-20003", - "Corrupted directory: xattr %pd listed but " - "not found for file %pd.\n", - dentry, dbuf->xadir); - dput(dentry); - dbuf->err = -EIO; - return false; - } - - dbuf->dentries[dbuf->count++] = dentry; - return true; -} - -static void -cleanup_dentry_buf(struct reiserfs_dentry_buf *buf) -{ - int i; - - for (i = 0; i < buf->count; i++) - if (buf->dentries[i]) - dput(buf->dentries[i]); -} - -static int reiserfs_for_each_xattr(struct inode *inode, - int (*action)(struct dentry *, void *), - void *data) -{ - struct dentry *dir; - int i, err = 0; - struct reiserfs_dentry_buf buf = { - .ctx.actor = fill_with_dentries, - }; - - /* Skip out, an xattr has no xattrs associated with it */ - if (IS_PRIVATE(inode) || get_inode_sd_version(inode) == STAT_DATA_V1) - return 0; - - dir = open_xa_dir(inode, XATTR_REPLACE); - if (IS_ERR(dir)) { - err = PTR_ERR(dir); - goto out; - } else if (d_really_is_negative(dir)) { - err = 0; - goto out_dir; - } - - inode_lock_nested(d_inode(dir), I_MUTEX_XATTR); - - buf.xadir = dir; - while (1) { - err = reiserfs_readdir_inode(d_inode(dir), &buf.ctx); - if (err) - break; - if (buf.err) { - err = buf.err; - break; - } - if (!buf.count) - break; - for (i = 0; !err && i < buf.count && buf.dentries[i]; i++) { - struct dentry *dentry = buf.dentries[i]; - - if (!d_is_dir(dentry)) - err = action(dentry, data); - - dput(dentry); - buf.dentries[i] = NULL; - } - if (err) - break; - buf.count = 0; - } - inode_unlock(d_inode(dir)); - - cleanup_dentry_buf(&buf); - - if (!err) { - /* - * We start a transaction here to avoid a ABBA situation - * between the xattr root's i_mutex and the journal lock. - * This doesn't incur much additional overhead since the - * new transaction will just nest inside the - * outer transaction. - */ - int blocks = JOURNAL_PER_BALANCE_CNT * 2 + 2 + - 4 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb); - struct reiserfs_transaction_handle th; - - reiserfs_write_lock(inode->i_sb); - err = journal_begin(&th, inode->i_sb, blocks); - reiserfs_write_unlock(inode->i_sb); - if (!err) { - int jerror; - - inode_lock_nested(d_inode(dir->d_parent), - I_MUTEX_XATTR); - err = action(dir, data); - reiserfs_write_lock(inode->i_sb); - jerror = journal_end(&th); - reiserfs_write_unlock(inode->i_sb); - inode_unlock(d_inode(dir->d_parent)); - err = jerror ?: err; - } - } -out_dir: - dput(dir); -out: - /* - * -ENODATA: this object doesn't have any xattrs - * -EOPNOTSUPP: this file system doesn't have xattrs enabled on disk. - * Neither are errors - */ - if (err == -ENODATA || err == -EOPNOTSUPP) - err = 0; - return err; -} - -static int delete_one_xattr(struct dentry *dentry, void *data) -{ - struct inode *dir = d_inode(dentry->d_parent); - - /* This is the xattr dir, handle specially. */ - if (d_is_dir(dentry)) - return xattr_rmdir(dir, dentry); - - return xattr_unlink(dir, dentry); -} - -static int chown_one_xattr(struct dentry *dentry, void *data) -{ - struct iattr *attrs = data; - int ia_valid = attrs->ia_valid; - int err; - - /* - * We only want the ownership bits. Otherwise, we'll do - * things like change a directory to a regular file if - * ATTR_MODE is set. - */ - attrs->ia_valid &= (ATTR_UID|ATTR_GID); - err = reiserfs_setattr(&nop_mnt_idmap, dentry, attrs); - attrs->ia_valid = ia_valid; - - return err; -} - -/* No i_mutex, but the inode is unconnected. */ -int reiserfs_delete_xattrs(struct inode *inode) -{ - int err = reiserfs_for_each_xattr(inode, delete_one_xattr, NULL); - - if (err) - reiserfs_warning(inode->i_sb, "jdm-20004", - "Couldn't delete all xattrs (%d)\n", err); - return err; -} - -/* inode->i_mutex: down */ -int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs) -{ - int err = reiserfs_for_each_xattr(inode, chown_one_xattr, attrs); - - if (err) - reiserfs_warning(inode->i_sb, "jdm-20007", - "Couldn't chown all xattrs (%d)\n", err); - return err; -} - -#ifdef CONFIG_REISERFS_FS_XATTR -/* - * Returns a dentry corresponding to a specific extended attribute file - * for the inode. If flags allow, the file is created. Otherwise, a - * valid or negative dentry, or an error is returned. - */ -static struct dentry *xattr_lookup(struct inode *inode, const char *name, - int flags) -{ - struct dentry *xadir, *xafile; - int err = 0; - - xadir = open_xa_dir(inode, flags); - if (IS_ERR(xadir)) - return ERR_CAST(xadir); - - inode_lock_nested(d_inode(xadir), I_MUTEX_XATTR); - xafile = lookup_one_len(name, xadir, strlen(name)); - if (IS_ERR(xafile)) { - err = PTR_ERR(xafile); - goto out; - } - - if (d_really_is_positive(xafile) && (flags & XATTR_CREATE)) - err = -EEXIST; - - if (d_really_is_negative(xafile)) { - err = -ENODATA; - if (xattr_may_create(flags)) - err = xattr_create(d_inode(xadir), xafile, - 0700|S_IFREG); - } - - if (err) - dput(xafile); -out: - inode_unlock(d_inode(xadir)); - dput(xadir); - if (err) - return ERR_PTR(err); - return xafile; -} - -/* Internal operations on file data */ -static inline void reiserfs_put_page(struct page *page) -{ - kunmap(page); - put_page(page); -} - -static struct page *reiserfs_get_page(struct inode *dir, size_t n) -{ - struct address_space *mapping = dir->i_mapping; - struct page *page; - /* - * We can deadlock if we try to free dentries, - * and an unlink/rmdir has just occurred - GFP_NOFS avoids this - */ - mapping_set_gfp_mask(mapping, GFP_NOFS); - page = read_mapping_page(mapping, n >> PAGE_SHIFT, NULL); - if (!IS_ERR(page)) - kmap(page); - return page; -} - -static inline __u32 xattr_hash(const char *msg, int len) -{ - /* - * csum_partial() gives different results for little-endian and - * big endian hosts. Images created on little-endian hosts and - * mounted on big-endian hosts(and vice versa) will see csum mismatches - * when trying to fetch xattrs. Treating the hash as __wsum_t would - * lower the frequency of mismatch. This is an endianness bug in - * reiserfs. The return statement would result in a sparse warning. Do - * not fix the sparse warning so as to not hide a reminder of the bug. - */ - return csum_partial(msg, len, 0); -} - -int reiserfs_commit_write(struct file *f, struct page *page, - unsigned from, unsigned to); - -static void update_ctime(struct inode *inode) -{ - struct timespec64 now = current_time(inode); - struct timespec64 ctime = inode_get_ctime(inode); - - if (inode_unhashed(inode) || !inode->i_nlink || - timespec64_equal(&ctime, &now)) - return; - - inode_set_ctime_to_ts(inode, now); - mark_inode_dirty(inode); -} - -static int lookup_and_delete_xattr(struct inode *inode, const char *name) -{ - int err = 0; - struct dentry *dentry, *xadir; - - xadir = open_xa_dir(inode, XATTR_REPLACE); - if (IS_ERR(xadir)) - return PTR_ERR(xadir); - - inode_lock_nested(d_inode(xadir), I_MUTEX_XATTR); - dentry = lookup_one_len(name, xadir, strlen(name)); - if (IS_ERR(dentry)) { - err = PTR_ERR(dentry); - goto out_dput; - } - - if (d_really_is_positive(dentry)) { - err = xattr_unlink(d_inode(xadir), dentry); - update_ctime(inode); - } - - dput(dentry); -out_dput: - inode_unlock(d_inode(xadir)); - dput(xadir); - return err; -} - - -/* Generic extended attribute operations that can be used by xa plugins */ - -/* - * inode->i_mutex: down - */ -int -reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th, - struct inode *inode, const char *name, - const void *buffer, size_t buffer_size, int flags) -{ - int err = 0; - struct dentry *dentry; - struct page *page; - char *data; - size_t file_pos = 0; - size_t buffer_pos = 0; - size_t new_size; - __u32 xahash = 0; - - if (get_inode_sd_version(inode) == STAT_DATA_V1) - return -EOPNOTSUPP; - - if (!buffer) { - err = lookup_and_delete_xattr(inode, name); - return err; - } - - dentry = xattr_lookup(inode, name, flags); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - - down_write(&REISERFS_I(inode)->i_xattr_sem); - - xahash = xattr_hash(buffer, buffer_size); - while (buffer_pos < buffer_size || buffer_pos == 0) { - size_t chunk; - size_t skip = 0; - size_t page_offset = (file_pos & (PAGE_SIZE - 1)); - - if (buffer_size - buffer_pos > PAGE_SIZE) - chunk = PAGE_SIZE; - else - chunk = buffer_size - buffer_pos; - - page = reiserfs_get_page(d_inode(dentry), file_pos); - if (IS_ERR(page)) { - err = PTR_ERR(page); - goto out_unlock; - } - - lock_page(page); - data = page_address(page); - - if (file_pos == 0) { - struct reiserfs_xattr_header *rxh; - - skip = file_pos = sizeof(struct reiserfs_xattr_header); - if (chunk + skip > PAGE_SIZE) - chunk = PAGE_SIZE - skip; - rxh = (struct reiserfs_xattr_header *)data; - rxh->h_magic = cpu_to_le32(REISERFS_XATTR_MAGIC); - rxh->h_hash = cpu_to_le32(xahash); - } - - reiserfs_write_lock(inode->i_sb); - err = __reiserfs_write_begin(page, page_offset, chunk + skip); - if (!err) { - if (buffer) - memcpy(data + skip, buffer + buffer_pos, chunk); - err = reiserfs_commit_write(NULL, page, page_offset, - page_offset + chunk + - skip); - } - reiserfs_write_unlock(inode->i_sb); - unlock_page(page); - reiserfs_put_page(page); - buffer_pos += chunk; - file_pos += chunk; - skip = 0; - if (err || buffer_size == 0 || !buffer) - break; - } - - new_size = buffer_size + sizeof(struct reiserfs_xattr_header); - if (!err && new_size < i_size_read(d_inode(dentry))) { - struct iattr newattrs = { - .ia_ctime = current_time(inode), - .ia_size = new_size, - .ia_valid = ATTR_SIZE | ATTR_CTIME, - }; - - inode_lock_nested(d_inode(dentry), I_MUTEX_XATTR); - inode_dio_wait(d_inode(dentry)); - - err = reiserfs_setattr(&nop_mnt_idmap, dentry, &newattrs); - inode_unlock(d_inode(dentry)); - } else - update_ctime(inode); -out_unlock: - up_write(&REISERFS_I(inode)->i_xattr_sem); - dput(dentry); - return err; -} - -/* We need to start a transaction to maintain lock ordering */ -int reiserfs_xattr_set(struct inode *inode, const char *name, - const void *buffer, size_t buffer_size, int flags) -{ - - struct reiserfs_transaction_handle th; - int error, error2; - size_t jbegin_count = reiserfs_xattr_nblocks(inode, buffer_size); - - /* Check before we start a transaction and then do nothing. */ - if (!d_really_is_positive(REISERFS_SB(inode->i_sb)->priv_root)) - return -EOPNOTSUPP; - - if (!(flags & XATTR_REPLACE)) - jbegin_count += reiserfs_xattr_jcreate_nblocks(inode); - - reiserfs_write_lock(inode->i_sb); - error = journal_begin(&th, inode->i_sb, jbegin_count); - reiserfs_write_unlock(inode->i_sb); - if (error) { - return error; - } - - error = reiserfs_xattr_set_handle(&th, inode, name, - buffer, buffer_size, flags); - - reiserfs_write_lock(inode->i_sb); - error2 = journal_end(&th); - reiserfs_write_unlock(inode->i_sb); - if (error == 0) - error = error2; - - return error; -} - -/* - * inode->i_mutex: down - */ -int -reiserfs_xattr_get(struct inode *inode, const char *name, void *buffer, - size_t buffer_size) -{ - ssize_t err = 0; - struct dentry *dentry; - size_t isize; - size_t file_pos = 0; - size_t buffer_pos = 0; - struct page *page; - __u32 hash = 0; - - if (name == NULL) - return -EINVAL; - - /* - * We can't have xattrs attached to v1 items since they don't have - * generation numbers - */ - if (get_inode_sd_version(inode) == STAT_DATA_V1) - return -EOPNOTSUPP; - - /* - * priv_root needn't be initialized during mount so allow initial - * lookups to succeed. - */ - if (!REISERFS_SB(inode->i_sb)->priv_root) - return 0; - - dentry = xattr_lookup(inode, name, XATTR_REPLACE); - if (IS_ERR(dentry)) { - err = PTR_ERR(dentry); - goto out; - } - - down_read(&REISERFS_I(inode)->i_xattr_sem); - - isize = i_size_read(d_inode(dentry)); - - /* Just return the size needed */ - if (buffer == NULL) { - err = isize - sizeof(struct reiserfs_xattr_header); - goto out_unlock; - } - - if (buffer_size < isize - sizeof(struct reiserfs_xattr_header)) { - err = -ERANGE; - goto out_unlock; - } - - while (file_pos < isize) { - size_t chunk; - char *data; - size_t skip = 0; - - if (isize - file_pos > PAGE_SIZE) - chunk = PAGE_SIZE; - else - chunk = isize - file_pos; - - page = reiserfs_get_page(d_inode(dentry), file_pos); - if (IS_ERR(page)) { - err = PTR_ERR(page); - goto out_unlock; - } - - lock_page(page); - data = page_address(page); - if (file_pos == 0) { - struct reiserfs_xattr_header *rxh = - (struct reiserfs_xattr_header *)data; - skip = file_pos = sizeof(struct reiserfs_xattr_header); - chunk -= skip; - /* Magic doesn't match up.. */ - if (rxh->h_magic != cpu_to_le32(REISERFS_XATTR_MAGIC)) { - unlock_page(page); - reiserfs_put_page(page); - reiserfs_warning(inode->i_sb, "jdm-20001", - "Invalid magic for xattr (%s) " - "associated with %k", name, - INODE_PKEY(inode)); - err = -EIO; - goto out_unlock; - } - hash = le32_to_cpu(rxh->h_hash); - } - memcpy(buffer + buffer_pos, data + skip, chunk); - unlock_page(page); - reiserfs_put_page(page); - file_pos += chunk; - buffer_pos += chunk; - skip = 0; - } - err = isize - sizeof(struct reiserfs_xattr_header); - - if (xattr_hash(buffer, isize - sizeof(struct reiserfs_xattr_header)) != - hash) { - reiserfs_warning(inode->i_sb, "jdm-20002", - "Invalid hash for xattr (%s) associated " - "with %k", name, INODE_PKEY(inode)); - err = -EIO; - } - -out_unlock: - up_read(&REISERFS_I(inode)->i_xattr_sem); - dput(dentry); - -out: - return err; -} - -/* - * In order to implement different sets of xattr operations for each xattr - * prefix with the generic xattr API, a filesystem should create a - * null-terminated array of struct xattr_handler (one for each prefix) and - * hang a pointer to it off of the s_xattr field of the superblock. - * - * The generic_fooxattr() functions will use this list to dispatch xattr - * operations to the correct xattr_handler. - */ -#define for_each_xattr_handler(handlers, handler) \ - for ((handler) = *(handlers)++; \ - (handler) != NULL; \ - (handler) = *(handlers)++) - -static inline bool reiserfs_posix_acl_list(const char *name, - struct dentry *dentry) -{ - return (posix_acl_type(name) >= 0) && - IS_POSIXACL(d_backing_inode(dentry)); -} - -/* This is the implementation for the xattr plugin infrastructure */ -static inline bool reiserfs_xattr_list(const struct xattr_handler * const *handlers, - const char *name, struct dentry *dentry) -{ - if (handlers) { - const struct xattr_handler *xah = NULL; - - for_each_xattr_handler(handlers, xah) { - const char *prefix = xattr_prefix(xah); - - if (strncmp(prefix, name, strlen(prefix))) - continue; - - if (!xattr_handler_can_list(xah, dentry)) - return false; - - return true; - } - } - - return reiserfs_posix_acl_list(name, dentry); -} - -struct listxattr_buf { - struct dir_context ctx; - size_t size; - size_t pos; - char *buf; - struct dentry *dentry; -}; - -static bool listxattr_filler(struct dir_context *ctx, const char *name, - int namelen, loff_t offset, u64 ino, - unsigned int d_type) -{ - struct listxattr_buf *b = - container_of(ctx, struct listxattr_buf, ctx); - size_t size; - - if (name[0] != '.' || - (namelen != 1 && (name[1] != '.' || namelen != 2))) { - if (!reiserfs_xattr_list(b->dentry->d_sb->s_xattr, name, - b->dentry)) - return true; - size = namelen + 1; - if (b->buf) { - if (b->pos + size > b->size) { - b->pos = -ERANGE; - return false; - } - memcpy(b->buf + b->pos, name, namelen); - b->buf[b->pos + namelen] = 0; - } - b->pos += size; - } - return true; -} - -/* - * Inode operation listxattr() - * - * We totally ignore the generic listxattr here because it would be stupid - * not to. Since the xattrs are organized in a directory, we can just - * readdir to find them. - */ -ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size) -{ - struct dentry *dir; - int err = 0; - struct listxattr_buf buf = { - .ctx.actor = listxattr_filler, - .dentry = dentry, - .buf = buffer, - .size = buffer ? size : 0, - }; - - if (d_really_is_negative(dentry)) - return -EINVAL; - - if (get_inode_sd_version(d_inode(dentry)) == STAT_DATA_V1) - return -EOPNOTSUPP; - - dir = open_xa_dir(d_inode(dentry), XATTR_REPLACE); - if (IS_ERR(dir)) { - err = PTR_ERR(dir); - if (err == -ENODATA) - err = 0; /* Not an error if there aren't any xattrs */ - goto out; - } - - inode_lock_nested(d_inode(dir), I_MUTEX_XATTR); - err = reiserfs_readdir_inode(d_inode(dir), &buf.ctx); - inode_unlock(d_inode(dir)); - - if (!err) - err = buf.pos; - - dput(dir); -out: - return err; -} - -static int create_privroot(struct dentry *dentry) -{ - int err; - struct inode *inode = d_inode(dentry->d_parent); - - WARN_ON_ONCE(!inode_is_locked(inode)); - - err = xattr_mkdir(inode, dentry, 0700); - if (err || d_really_is_negative(dentry)) { - reiserfs_warning(dentry->d_sb, "jdm-20006", - "xattrs/ACLs enabled and couldn't " - "find/create .reiserfs_priv. " - "Failing mount."); - return -EOPNOTSUPP; - } - - reiserfs_init_priv_inode(d_inode(dentry)); - reiserfs_info(dentry->d_sb, "Created %s - reserved for xattr " - "storage.\n", PRIVROOT_NAME); - - return 0; -} - -#else -int __init reiserfs_xattr_register_handlers(void) { return 0; } -void reiserfs_xattr_unregister_handlers(void) {} -static int create_privroot(struct dentry *dentry) { return 0; } -#endif - -/* Actual operations that are exported to VFS-land */ -const struct xattr_handler * const reiserfs_xattr_handlers[] = { -#ifdef CONFIG_REISERFS_FS_XATTR - &reiserfs_xattr_user_handler, - &reiserfs_xattr_trusted_handler, -#endif -#ifdef CONFIG_REISERFS_FS_SECURITY - &reiserfs_xattr_security_handler, -#endif - NULL -}; - -static int xattr_mount_check(struct super_block *s) -{ - /* - * We need generation numbers to ensure that the oid mapping is correct - * v3.5 filesystems don't have them. - */ - if (old_format_only(s)) { - if (reiserfs_xattrs_optional(s)) { - /* - * Old format filesystem, but optional xattrs have - * been enabled. Error out. - */ - reiserfs_warning(s, "jdm-2005", - "xattrs/ACLs not supported " - "on pre-v3.6 format filesystems. " - "Failing mount."); - return -EOPNOTSUPP; - } - } - - return 0; -} - -int reiserfs_permission(struct mnt_idmap *idmap, struct inode *inode, - int mask) -{ - /* - * We don't do permission checks on the internal objects. - * Permissions are determined by the "owning" object. - */ - if (IS_PRIVATE(inode)) - return 0; - - return generic_permission(&nop_mnt_idmap, inode, mask); -} - -static int xattr_hide_revalidate(struct dentry *dentry, unsigned int flags) -{ - return -EPERM; -} - -static const struct dentry_operations xattr_lookup_poison_ops = { - .d_revalidate = xattr_hide_revalidate, -}; - -int reiserfs_lookup_privroot(struct super_block *s) -{ - struct dentry *dentry; - int err = 0; - - /* If we don't have the privroot located yet - go find it */ - inode_lock(d_inode(s->s_root)); - dentry = lookup_one_len(PRIVROOT_NAME, s->s_root, - strlen(PRIVROOT_NAME)); - if (!IS_ERR(dentry)) { - REISERFS_SB(s)->priv_root = dentry; - d_set_d_op(dentry, &xattr_lookup_poison_ops); - if (d_really_is_positive(dentry)) - reiserfs_init_priv_inode(d_inode(dentry)); - } else - err = PTR_ERR(dentry); - inode_unlock(d_inode(s->s_root)); - - return err; -} - -/* - * We need to take a copy of the mount flags since things like - * SB_RDONLY don't get set until *after* we're called. - * mount_flags != mount_options - */ -int reiserfs_xattr_init(struct super_block *s, int mount_flags) -{ - int err = 0; - struct dentry *privroot = REISERFS_SB(s)->priv_root; - - err = xattr_mount_check(s); - if (err) - goto error; - - if (d_really_is_negative(privroot) && !(mount_flags & SB_RDONLY)) { - inode_lock(d_inode(s->s_root)); - err = create_privroot(REISERFS_SB(s)->priv_root); - inode_unlock(d_inode(s->s_root)); - } - - if (d_really_is_positive(privroot)) { - inode_lock(d_inode(privroot)); - if (!REISERFS_SB(s)->xattr_root) { - struct dentry *dentry; - - dentry = lookup_one_len(XAROOT_NAME, privroot, - strlen(XAROOT_NAME)); - if (!IS_ERR(dentry)) - REISERFS_SB(s)->xattr_root = dentry; - else - err = PTR_ERR(dentry); - } - inode_unlock(d_inode(privroot)); - } - -error: - if (err) { - clear_bit(REISERFS_XATTRS_USER, &REISERFS_SB(s)->s_mount_opt); - clear_bit(REISERFS_POSIXACL, &REISERFS_SB(s)->s_mount_opt); - } - - /* The super_block SB_POSIXACL must mirror the (no)acl mount option. */ - if (reiserfs_posixacl(s)) - s->s_flags |= SB_POSIXACL; - else - s->s_flags &= ~SB_POSIXACL; - - return err; -} diff --git a/fs/reiserfs/xattr.h b/fs/reiserfs/xattr.h deleted file mode 100644 index 5868a4e990e3..000000000000 --- a/fs/reiserfs/xattr.h +++ /dev/null @@ -1,117 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include -#include -#include -#include -#include - -struct inode; -struct dentry; -struct iattr; -struct super_block; - -int reiserfs_xattr_register_handlers(void) __init; -void reiserfs_xattr_unregister_handlers(void); -int reiserfs_xattr_init(struct super_block *sb, int mount_flags); -int reiserfs_lookup_privroot(struct super_block *sb); -int reiserfs_delete_xattrs(struct inode *inode); -int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs); -int reiserfs_permission(struct mnt_idmap *idmap, - struct inode *inode, int mask); - -#ifdef CONFIG_REISERFS_FS_XATTR -#define has_xattr_dir(inode) (REISERFS_I(inode)->i_flags & i_has_xattr_dir) -ssize_t reiserfs_listxattr(struct dentry *dentry, char *buffer, size_t size); - -int reiserfs_xattr_get(struct inode *, const char *, void *, size_t); -int reiserfs_xattr_set(struct inode *, const char *, const void *, size_t, int); -int reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *, - struct inode *, const char *, const void *, - size_t, int); - -extern const struct xattr_handler reiserfs_xattr_user_handler; -extern const struct xattr_handler reiserfs_xattr_trusted_handler; -extern const struct xattr_handler reiserfs_xattr_security_handler; -#ifdef CONFIG_REISERFS_FS_SECURITY -int reiserfs_security_init(struct inode *dir, struct inode *inode, - const struct qstr *qstr, - struct reiserfs_security_handle *sec); -int reiserfs_security_write(struct reiserfs_transaction_handle *th, - struct inode *inode, - struct reiserfs_security_handle *sec); -void reiserfs_security_free(struct reiserfs_security_handle *sec); -#endif - -static inline int reiserfs_xattrs_initialized(struct super_block *sb) -{ - return REISERFS_SB(sb)->priv_root && REISERFS_SB(sb)->xattr_root; -} - -#define xattr_size(size) ((size) + sizeof(struct reiserfs_xattr_header)) -static inline loff_t reiserfs_xattr_nblocks(struct inode *inode, loff_t size) -{ - loff_t ret = 0; - if (reiserfs_file_data_log(inode)) { - ret = _ROUND_UP(xattr_size(size), inode->i_sb->s_blocksize); - ret >>= inode->i_sb->s_blocksize_bits; - } - return ret; -} - -/* - * We may have to create up to 3 objects: xattr root, xattr dir, xattr file. - * Let's try to be smart about it. - * xattr root: We cache it. If it's not cached, we may need to create it. - * xattr dir: If anything has been loaded for this inode, we can set a flag - * saying so. - * xattr file: Since we don't cache xattrs, we can't tell. We always include - * blocks for it. - * - * However, since root and dir can be created between calls - YOU MUST SAVE - * THIS VALUE. - */ -static inline size_t reiserfs_xattr_jcreate_nblocks(struct inode *inode) -{ - size_t nblocks = JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb); - - if ((REISERFS_I(inode)->i_flags & i_has_xattr_dir) == 0) { - nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb); - if (d_really_is_negative(REISERFS_SB(inode->i_sb)->xattr_root)) - nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb); - } - - return nblocks; -} - -static inline void reiserfs_init_xattr_rwsem(struct inode *inode) -{ - init_rwsem(&REISERFS_I(inode)->i_xattr_sem); -} - -#else - -#define reiserfs_listxattr NULL - -static inline void reiserfs_init_xattr_rwsem(struct inode *inode) -{ -} -#endif /* CONFIG_REISERFS_FS_XATTR */ - -#ifndef CONFIG_REISERFS_FS_SECURITY -static inline int reiserfs_security_init(struct inode *dir, - struct inode *inode, - const struct qstr *qstr, - struct reiserfs_security_handle *sec) -{ - return 0; -} -static inline int -reiserfs_security_write(struct reiserfs_transaction_handle *th, - struct inode *inode, - struct reiserfs_security_handle *sec) -{ - return 0; -} -static inline void reiserfs_security_free(struct reiserfs_security_handle *sec) -{} -#endif diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c deleted file mode 100644 index 064264992b49..000000000000 --- a/fs/reiserfs/xattr_acl.c +++ /dev/null @@ -1,411 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include "reiserfs.h" -#include -#include -#include -#include -#include -#include "xattr.h" -#include "acl.h" -#include - -static int __reiserfs_set_acl(struct reiserfs_transaction_handle *th, - struct inode *inode, int type, - struct posix_acl *acl); - - -int -reiserfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, - struct posix_acl *acl, int type) -{ - int error, error2; - struct reiserfs_transaction_handle th; - size_t jcreate_blocks; - int size = acl ? posix_acl_xattr_size(acl->a_count) : 0; - int update_mode = 0; - struct inode *inode = d_inode(dentry); - umode_t mode = inode->i_mode; - - /* - * Pessimism: We can't assume that anything from the xattr root up - * has been created. - */ - - jcreate_blocks = reiserfs_xattr_jcreate_nblocks(inode) + - reiserfs_xattr_nblocks(inode, size) * 2; - - reiserfs_write_lock(inode->i_sb); - error = journal_begin(&th, inode->i_sb, jcreate_blocks); - reiserfs_write_unlock(inode->i_sb); - if (error == 0) { - if (type == ACL_TYPE_ACCESS && acl) { - error = posix_acl_update_mode(&nop_mnt_idmap, inode, - &mode, &acl); - if (error) - goto unlock; - update_mode = 1; - } - error = __reiserfs_set_acl(&th, inode, type, acl); - if (!error && update_mode) - inode->i_mode = mode; -unlock: - reiserfs_write_lock(inode->i_sb); - error2 = journal_end(&th); - reiserfs_write_unlock(inode->i_sb); - if (error2) - error = error2; - } - - return error; -} - -/* - * Convert from filesystem to in-memory representation. - */ -static struct posix_acl *reiserfs_posix_acl_from_disk(const void *value, size_t size) -{ - const char *end = (char *)value + size; - int n, count; - struct posix_acl *acl; - - if (!value) - return NULL; - if (size < sizeof(reiserfs_acl_header)) - return ERR_PTR(-EINVAL); - if (((reiserfs_acl_header *) value)->a_version != - cpu_to_le32(REISERFS_ACL_VERSION)) - return ERR_PTR(-EINVAL); - value = (char *)value + sizeof(reiserfs_acl_header); - count = reiserfs_acl_count(size); - if (count < 0) - return ERR_PTR(-EINVAL); - if (count == 0) - return NULL; - acl = posix_acl_alloc(count, GFP_NOFS); - if (!acl) - return ERR_PTR(-ENOMEM); - for (n = 0; n < count; n++) { - reiserfs_acl_entry *entry = (reiserfs_acl_entry *) value; - if ((char *)value + sizeof(reiserfs_acl_entry_short) > end) - goto fail; - acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag); - acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm); - switch (acl->a_entries[n].e_tag) { - case ACL_USER_OBJ: - case ACL_GROUP_OBJ: - case ACL_MASK: - case ACL_OTHER: - value = (char *)value + - sizeof(reiserfs_acl_entry_short); - break; - - case ACL_USER: - value = (char *)value + sizeof(reiserfs_acl_entry); - if ((char *)value > end) - goto fail; - acl->a_entries[n].e_uid = - make_kuid(&init_user_ns, - le32_to_cpu(entry->e_id)); - break; - case ACL_GROUP: - value = (char *)value + sizeof(reiserfs_acl_entry); - if ((char *)value > end) - goto fail; - acl->a_entries[n].e_gid = - make_kgid(&init_user_ns, - le32_to_cpu(entry->e_id)); - break; - - default: - goto fail; - } - } - if (value != end) - goto fail; - return acl; - -fail: - posix_acl_release(acl); - return ERR_PTR(-EINVAL); -} - -/* - * Convert from in-memory to filesystem representation. - */ -static void *reiserfs_posix_acl_to_disk(const struct posix_acl *acl, size_t * size) -{ - reiserfs_acl_header *ext_acl; - char *e; - int n; - - *size = reiserfs_acl_size(acl->a_count); - ext_acl = kmalloc(sizeof(reiserfs_acl_header) + - acl->a_count * - sizeof(reiserfs_acl_entry), - GFP_NOFS); - if (!ext_acl) - return ERR_PTR(-ENOMEM); - ext_acl->a_version = cpu_to_le32(REISERFS_ACL_VERSION); - e = (char *)ext_acl + sizeof(reiserfs_acl_header); - for (n = 0; n < acl->a_count; n++) { - const struct posix_acl_entry *acl_e = &acl->a_entries[n]; - reiserfs_acl_entry *entry = (reiserfs_acl_entry *) e; - entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); - entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); - switch (acl->a_entries[n].e_tag) { - case ACL_USER: - entry->e_id = cpu_to_le32( - from_kuid(&init_user_ns, acl_e->e_uid)); - e += sizeof(reiserfs_acl_entry); - break; - case ACL_GROUP: - entry->e_id = cpu_to_le32( - from_kgid(&init_user_ns, acl_e->e_gid)); - e += sizeof(reiserfs_acl_entry); - break; - - case ACL_USER_OBJ: - case ACL_GROUP_OBJ: - case ACL_MASK: - case ACL_OTHER: - e += sizeof(reiserfs_acl_entry_short); - break; - - default: - goto fail; - } - } - return (char *)ext_acl; - -fail: - kfree(ext_acl); - return ERR_PTR(-EINVAL); -} - -/* - * Inode operation get_posix_acl(). - * - * inode->i_mutex: down - * BKL held [before 2.5.x] - */ -struct posix_acl *reiserfs_get_acl(struct inode *inode, int type, bool rcu) -{ - char *name, *value; - struct posix_acl *acl; - int size; - int retval; - - if (rcu) - return ERR_PTR(-ECHILD); - - switch (type) { - case ACL_TYPE_ACCESS: - name = XATTR_NAME_POSIX_ACL_ACCESS; - break; - case ACL_TYPE_DEFAULT: - name = XATTR_NAME_POSIX_ACL_DEFAULT; - break; - default: - BUG(); - } - - size = reiserfs_xattr_get(inode, name, NULL, 0); - if (size < 0) { - if (size == -ENODATA || size == -ENOSYS) - return NULL; - return ERR_PTR(size); - } - - value = kmalloc(size, GFP_NOFS); - if (!value) - return ERR_PTR(-ENOMEM); - - retval = reiserfs_xattr_get(inode, name, value, size); - if (retval == -ENODATA || retval == -ENOSYS) { - /* - * This shouldn't actually happen as it should have - * been caught above.. but just in case - */ - acl = NULL; - } else if (retval < 0) { - acl = ERR_PTR(retval); - } else { - acl = reiserfs_posix_acl_from_disk(value, retval); - } - - kfree(value); - return acl; -} - -/* - * Inode operation set_posix_acl(). - * - * inode->i_mutex: down - * BKL held [before 2.5.x] - */ -static int -__reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode, - int type, struct posix_acl *acl) -{ - char *name; - void *value = NULL; - size_t size = 0; - int error; - - switch (type) { - case ACL_TYPE_ACCESS: - name = XATTR_NAME_POSIX_ACL_ACCESS; - break; - case ACL_TYPE_DEFAULT: - name = XATTR_NAME_POSIX_ACL_DEFAULT; - if (!S_ISDIR(inode->i_mode)) - return acl ? -EACCES : 0; - break; - default: - return -EINVAL; - } - - if (acl) { - value = reiserfs_posix_acl_to_disk(acl, &size); - if (IS_ERR(value)) - return (int)PTR_ERR(value); - } - - error = reiserfs_xattr_set_handle(th, inode, name, value, size, 0); - - /* - * Ensure that the inode gets dirtied if we're only using - * the mode bits and an old ACL didn't exist. We don't need - * to check if the inode is hashed here since we won't get - * called by reiserfs_inherit_default_acl(). - */ - if (error == -ENODATA) { - error = 0; - if (type == ACL_TYPE_ACCESS) { - inode_set_ctime_current(inode); - mark_inode_dirty(inode); - } - } - - kfree(value); - - if (!error) - set_cached_acl(inode, type, acl); - - return error; -} - -/* - * dir->i_mutex: locked, - * inode is new and not released into the wild yet - */ -int -reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, - struct inode *dir, struct dentry *dentry, - struct inode *inode) -{ - struct posix_acl *default_acl, *acl; - int err = 0; - - /* ACLs only get applied to files and directories */ - if (S_ISLNK(inode->i_mode)) - return 0; - - /* - * ACLs can only be used on "new" objects, so if it's an old object - * there is nothing to inherit from - */ - if (get_inode_sd_version(dir) == STAT_DATA_V1) - goto apply_umask; - - /* - * Don't apply ACLs to objects in the .reiserfs_priv tree.. This - * would be useless since permissions are ignored, and a pain because - * it introduces locking cycles - */ - if (IS_PRIVATE(inode)) - goto apply_umask; - - err = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl); - if (err) - return err; - - if (default_acl) { - err = __reiserfs_set_acl(th, inode, ACL_TYPE_DEFAULT, - default_acl); - posix_acl_release(default_acl); - } - if (acl) { - if (!err) - err = __reiserfs_set_acl(th, inode, ACL_TYPE_ACCESS, - acl); - posix_acl_release(acl); - } - - return err; - -apply_umask: - /* no ACL, apply umask */ - inode->i_mode &= ~current_umask(); - return err; -} - -/* This is used to cache the default acl before a new object is created. - * The biggest reason for this is to get an idea of how many blocks will - * actually be required for the create operation if we must inherit an ACL. - * An ACL write can add up to 3 object creations and an additional file write - * so we'd prefer not to reserve that many blocks in the journal if we can. - * It also has the advantage of not loading the ACL with a transaction open, - * this may seem silly, but if the owner of the directory is doing the - * creation, the ACL may not be loaded since the permissions wouldn't require - * it. - * We return the number of blocks required for the transaction. - */ -int reiserfs_cache_default_acl(struct inode *inode) -{ - struct posix_acl *acl; - int nblocks = 0; - - if (IS_PRIVATE(inode)) - return 0; - - acl = get_inode_acl(inode, ACL_TYPE_DEFAULT); - - if (acl && !IS_ERR(acl)) { - int size = reiserfs_acl_size(acl->a_count); - - /* Other xattrs can be created during inode creation. We don't - * want to claim too many blocks, so we check to see if we - * need to create the tree to the xattrs, and then we - * just want two files. */ - nblocks = reiserfs_xattr_jcreate_nblocks(inode); - nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb); - - REISERFS_I(inode)->i_flags |= i_has_xattr_dir; - - /* We need to account for writes + bitmaps for two files */ - nblocks += reiserfs_xattr_nblocks(inode, size) * 4; - posix_acl_release(acl); - } - - return nblocks; -} - -/* - * Called under i_mutex - */ -int reiserfs_acl_chmod(struct dentry *dentry) -{ - struct inode *inode = d_inode(dentry); - - if (IS_PRIVATE(inode)) - return 0; - if (get_inode_sd_version(inode) == STAT_DATA_V1 || - !reiserfs_posixacl(inode->i_sb)) - return 0; - - return posix_acl_chmod(&nop_mnt_idmap, dentry, inode->i_mode); -} diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c deleted file mode 100644 index 078dd8cc312f..000000000000 --- a/fs/reiserfs/xattr_security.c +++ /dev/null @@ -1,127 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include "reiserfs.h" -#include -#include -#include -#include -#include -#include "xattr.h" -#include -#include - -static int -security_get(const struct xattr_handler *handler, struct dentry *unused, - struct inode *inode, const char *name, void *buffer, size_t size) -{ - if (IS_PRIVATE(inode)) - return -EPERM; - - return reiserfs_xattr_get(inode, xattr_full_name(handler, name), - buffer, size); -} - -static int -security_set(const struct xattr_handler *handler, - struct mnt_idmap *idmap, struct dentry *unused, - struct inode *inode, const char *name, const void *buffer, - size_t size, int flags) -{ - if (IS_PRIVATE(inode)) - return -EPERM; - - return reiserfs_xattr_set(inode, - xattr_full_name(handler, name), - buffer, size, flags); -} - -static bool security_list(struct dentry *dentry) -{ - return !IS_PRIVATE(d_inode(dentry)); -} - -static int -reiserfs_initxattrs(struct inode *inode, const struct xattr *xattr_array, - void *fs_info) -{ - struct reiserfs_security_handle *sec = fs_info; - - sec->value = kmemdup(xattr_array->value, xattr_array->value_len, - GFP_KERNEL); - if (!sec->value) - return -ENOMEM; - - sec->name = xattr_array->name; - sec->length = xattr_array->value_len; - return 0; -} - -/* Initializes the security context for a new inode and returns the number - * of blocks needed for the transaction. If successful, reiserfs_security - * must be released using reiserfs_security_free when the caller is done. */ -int reiserfs_security_init(struct inode *dir, struct inode *inode, - const struct qstr *qstr, - struct reiserfs_security_handle *sec) -{ - int blocks = 0; - int error; - - sec->name = NULL; - sec->value = NULL; - sec->length = 0; - - /* Don't add selinux attributes on xattrs - they'll never get used */ - if (IS_PRIVATE(dir)) - return 0; - - error = security_inode_init_security(inode, dir, qstr, - &reiserfs_initxattrs, sec); - if (error) { - sec->name = NULL; - sec->value = NULL; - sec->length = 0; - return error; - } - - if (sec->length && reiserfs_xattrs_initialized(inode->i_sb)) { - blocks = reiserfs_xattr_jcreate_nblocks(inode) + - reiserfs_xattr_nblocks(inode, sec->length); - /* We don't want to count the directories twice if we have - * a default ACL. */ - REISERFS_I(inode)->i_flags |= i_has_xattr_dir; - } - return blocks; -} - -int reiserfs_security_write(struct reiserfs_transaction_handle *th, - struct inode *inode, - struct reiserfs_security_handle *sec) -{ - char xattr_name[XATTR_NAME_MAX + 1] = XATTR_SECURITY_PREFIX; - int error; - - if (XATTR_SECURITY_PREFIX_LEN + strlen(sec->name) > XATTR_NAME_MAX) - return -EINVAL; - - strlcat(xattr_name, sec->name, sizeof(xattr_name)); - - error = reiserfs_xattr_set_handle(th, inode, xattr_name, sec->value, - sec->length, XATTR_CREATE); - if (error == -ENODATA || error == -EOPNOTSUPP) - error = 0; - - return error; -} - -void reiserfs_security_free(struct reiserfs_security_handle *sec) -{ - kfree(sec->value); - sec->name = NULL; - sec->value = NULL; -} - -const struct xattr_handler reiserfs_xattr_security_handler = { - .prefix = XATTR_SECURITY_PREFIX, - .get = security_get, - .set = security_set, - .list = security_list, -}; diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c deleted file mode 100644 index 0c0c74d8db0e..000000000000 --- a/fs/reiserfs/xattr_trusted.c +++ /dev/null @@ -1,46 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include "reiserfs.h" -#include -#include -#include -#include -#include -#include "xattr.h" -#include - -static int -trusted_get(const struct xattr_handler *handler, struct dentry *unused, - struct inode *inode, const char *name, void *buffer, size_t size) -{ - if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode)) - return -EPERM; - - return reiserfs_xattr_get(inode, xattr_full_name(handler, name), - buffer, size); -} - -static int -trusted_set(const struct xattr_handler *handler, - struct mnt_idmap *idmap, struct dentry *unused, - struct inode *inode, const char *name, const void *buffer, - size_t size, int flags) -{ - if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode)) - return -EPERM; - - return reiserfs_xattr_set(inode, - xattr_full_name(handler, name), - buffer, size, flags); -} - -static bool trusted_list(struct dentry *dentry) -{ - return capable(CAP_SYS_ADMIN) && !IS_PRIVATE(d_inode(dentry)); -} - -const struct xattr_handler reiserfs_xattr_trusted_handler = { - .prefix = XATTR_TRUSTED_PREFIX, - .get = trusted_get, - .set = trusted_set, - .list = trusted_list, -}; diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c deleted file mode 100644 index 88195181e1d7..000000000000 --- a/fs/reiserfs/xattr_user.c +++ /dev/null @@ -1,43 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include "reiserfs.h" -#include -#include -#include -#include -#include "xattr.h" -#include - -static int -user_get(const struct xattr_handler *handler, struct dentry *unused, - struct inode *inode, const char *name, void *buffer, size_t size) -{ - if (!reiserfs_xattrs_user(inode->i_sb)) - return -EOPNOTSUPP; - return reiserfs_xattr_get(inode, xattr_full_name(handler, name), - buffer, size); -} - -static int -user_set(const struct xattr_handler *handler, struct mnt_idmap *idmap, - struct dentry *unused, - struct inode *inode, const char *name, const void *buffer, - size_t size, int flags) -{ - if (!reiserfs_xattrs_user(inode->i_sb)) - return -EOPNOTSUPP; - return reiserfs_xattr_set(inode, - xattr_full_name(handler, name), - buffer, size, flags); -} - -static bool user_list(struct dentry *dentry) -{ - return reiserfs_xattrs_user(dentry->d_sb); -} - -const struct xattr_handler reiserfs_xattr_user_handler = { - .prefix = XATTR_USER_PREFIX, - .get = user_get, - .set = user_set, - .list = user_list, -}; diff --git a/include/uapi/linux/reiserfs_fs.h b/include/uapi/linux/reiserfs_fs.h deleted file mode 100644 index 5bb921409f2b..000000000000 --- a/include/uapi/linux/reiserfs_fs.h +++ /dev/null @@ -1,27 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for licensing and copyright details - */ -#ifndef _LINUX_REISER_FS_H -#define _LINUX_REISER_FS_H - -#include -#include - -/* - * include/linux/reiser_fs.h - * - * Reiser File System constants and structures - * - */ - -/* ioctl's command */ -#define REISERFS_IOC_UNPACK _IOW(0xCD,1,long) -/* define following flags to be the same as in ext2, so that chattr(1), - lsattr(1) will work with us. */ -#define REISERFS_IOC_GETFLAGS FS_IOC_GETFLAGS -#define REISERFS_IOC_SETFLAGS FS_IOC_SETFLAGS -#define REISERFS_IOC_GETVERSION FS_IOC_GETVERSION -#define REISERFS_IOC_SETVERSION FS_IOC_SETVERSION - -#endif /* _LINUX_REISER_FS_H */ diff --git a/include/uapi/linux/reiserfs_xattr.h b/include/uapi/linux/reiserfs_xattr.h deleted file mode 100644 index 503ad018ce5b..000000000000 --- a/include/uapi/linux/reiserfs_xattr.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - File: linux/reiserfs_xattr.h -*/ - -#ifndef _LINUX_REISERFS_XATTR_H -#define _LINUX_REISERFS_XATTR_H - -#include - -/* Magic value in header */ -#define REISERFS_XATTR_MAGIC 0x52465841 /* "RFXA" */ - -struct reiserfs_xattr_header { - __le32 h_magic; /* magic number for identification */ - __le32 h_hash; /* hash of the value */ -}; - -struct reiserfs_security_handle { - const char *name; - void *value; - __kernel_size_t length; -}; - -#endif /* _LINUX_REISERFS_XATTR_H */ diff --git a/scripts/selinux/mdp/mdp.c b/scripts/selinux/mdp/mdp.c index 1415604c3d24..a413c157904d 100644 --- a/scripts/selinux/mdp/mdp.c +++ b/scripts/selinux/mdp/mdp.c @@ -171,9 +171,6 @@ int main(int argc, char *argv[]) #ifdef CONFIG_JFS_SECURITY FS_USE("xattr", "jfs"); #endif -#ifdef CONFIG_REISERFS_FS_SECURITY - FS_USE("xattr", "reiserfs"); -#endif #ifdef CONFIG_JFFS2_FS_SECURITY FS_USE("xattr", "jffs2"); #endif diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h index e7da92489167..f37614cc2c1b 100644 --- a/tools/objtool/noreturns.h +++ b/tools/objtool/noreturns.h @@ -11,7 +11,6 @@ NORETURN(__ia32_sys_exit) NORETURN(__ia32_sys_exit_group) NORETURN(__kunit_abort) NORETURN(__module_put_and_kthread_exit) -NORETURN(__reiserfs_panic) NORETURN(__stack_chk_fail) NORETURN(__tdx_hypercall_failed) NORETURN(__ubsan_handle_builtin_unreachable) diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test.c b/tools/testing/selftests/filesystems/statmount/statmount_test.c index c773334bbcc9..8eb6aa606a0d 100644 --- a/tools/testing/selftests/filesystems/statmount/statmount_test.c +++ b/tools/testing/selftests/filesystems/statmount/statmount_test.c @@ -27,7 +27,7 @@ static const char *const known_fs[] = { "ipathfs", "iso9660", "jffs2", "jfs", "minix", "mqueue", "msdos", "nfs", "nfs4", "nfsd", "nilfs2", "nsfs", "ntfs", "ntfs3", "ocfs2", "ocfs2_dlmfs", "ocxlflash", "omfs", "openpromfs", "overlay", "pipefs", - "proc", "pstore", "pvfs2", "qnx4", "qnx6", "ramfs", "reiserfs", + "proc", "pstore", "pvfs2", "qnx4", "qnx6", "ramfs", "resctrl", "romfs", "rootfs", "rpc_pipefs", "s390_hypfs", "secretmem", "securityfs", "selinuxfs", "smackfs", "smb3", "sockfs", "spufs", "squashfs", "sysfs", "sysv", "tmpfs", "tracefs", "ubifs", "udf", -- cgit v1.2.3 From 424a55a4a9087887fcfcee561648df497701a4a2 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Thu, 10 Oct 2024 07:40:34 +1100 Subject: uaccess: add copy_struct_to_user helper This is based on copy_struct_from_user(), but there is one additional case to consider when creating a syscall that returns an extensible-struct to userspace -- how should data in the struct that cannot fit into the userspace struct be handled (ksize > usize)? There are three possibilies: 1. The interface is like sched_getattr(2), where new information will be silently not provided to userspace. This is probably what most interfaces will want to do, as it provides the most possible backwards-compatibility. 2. The interface is like lsm_list_modules(2), where you want to return an error like -EMSGSIZE if not providing information could result in the userspace program making a serious mistake (such as one that could lead to a security problem) or if you want to provide some flag to userspace so they know that they are missing some information. 3. The interface is like statx(2), where there some kind of a request mask that indicates what data userspace would like. One could imagine that statx2(2) (using extensible structs) would want to return -EMSGSIZE if the user explicitly requested a field that their structure is too small to fit, but not return an error if the field was not explicitly requested. This is kind of a mix between (1) and (2) based on the requested mask. The copy_struct_to_user() helper includes a an extra argument that is used to return a boolean flag indicating whether there was a non-zero byte in the trailing bytes that were not copied to userspace. This can be used in the following ways to handle all three cases, respectively: 1. Just pass NULL, as you don't care about this case. 2. Return an error (say -EMSGSIZE) if the argument was set to true by copy_struct_to_user(). 3. If the argument was set to true by copy_struct_to_user(), check if there is a flag that implies a field larger than usize. This is the only case where callers of copy_struct_to_user() should check usize themselves. This will probably require scanning an array that specifies what flags were added for each version of the flags struct and returning an error if the request mask matches any of the flags that were added in versions of the struct that are larger than usize. At the moment we don't have any users of (3), so this patch doesn't include any helpers to make the necessary scanning easier, but it should be fairly easy to add some if necessary. Signed-off-by: Aleksa Sarai Link: https://lore.kernel.org/r/20241010-extensible-structs-check_fields-v3-1-d2833dfe6edd@cyphar.com Signed-off-by: Christian Brauner --- include/linux/uaccess.h | 97 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) (limited to 'include') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 39c7cf82b0c2..1e7c68b4c262 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -394,6 +394,103 @@ copy_struct_from_user(void *dst, size_t ksize, const void __user *src, return 0; } +/** + * copy_struct_to_user: copy a struct to userspace + * @dst: Destination address, in userspace. This buffer must be @ksize + * bytes long. + * @usize: (Alleged) size of @dst struct. + * @src: Source address, in kernel space. + * @ksize: Size of @src struct. + * @ignored_trailing: Set to %true if there was a non-zero byte in @src that + * userspace cannot see because they are using an smaller struct. + * + * Copies a struct from kernel space to userspace, in a way that guarantees + * backwards-compatibility for struct syscall arguments (as long as future + * struct extensions are made such that all new fields are *appended* to the + * old struct, and zeroed-out new fields have the same meaning as the old + * struct). + * + * Some syscalls may wish to make sure that userspace knows about everything in + * the struct, and if there is a non-zero value that userspce doesn't know + * about, they want to return an error (such as -EMSGSIZE) or have some other + * fallback (such as adding a "you're missing some information" flag). If + * @ignored_trailing is non-%NULL, it will be set to %true if there was a + * non-zero byte that could not be copied to userspace (ie. was past @usize). + * + * While unconditionally returning an error in this case is the simplest + * solution, for maximum backward compatibility you should try to only return + * -EMSGSIZE if the user explicitly requested the data that couldn't be copied. + * Note that structure sizes can change due to header changes and simple + * recompilations without code changes(!), so if you care about + * @ignored_trailing you probably want to make sure that any new field data is + * associated with a flag. Otherwise you might assume that a program knows + * about data it does not. + * + * @ksize is just sizeof(*src), and @usize should've been passed by userspace. + * The recommended usage is something like the following: + * + * SYSCALL_DEFINE2(foobar, struct foo __user *, uarg, size_t, usize) + * { + * int err; + * bool ignored_trailing; + * struct foo karg = {}; + * + * if (usize > PAGE_SIZE) + * return -E2BIG; + * if (usize < FOO_SIZE_VER0) + * return -EINVAL; + * + * // ... modify karg somehow ... + * + * err = copy_struct_to_user(uarg, usize, &karg, sizeof(karg), + * &ignored_trailing); + * if (err) + * return err; + * if (ignored_trailing) + * return -EMSGSIZE: + * + * // ... + * } + * + * There are three cases to consider: + * * If @usize == @ksize, then it's copied verbatim. + * * If @usize < @ksize, then the kernel is trying to pass userspace a newer + * struct than it supports. Thus we only copy the interoperable portions + * (@usize) and ignore the rest (but @ignored_trailing is set to %true if + * any of the trailing (@ksize - @usize) bytes are non-zero). + * * If @usize > @ksize, then the kernel is trying to pass userspace an older + * struct than userspace supports. In order to make sure the + * unknown-to-the-kernel fields don't contain garbage values, we zero the + * trailing (@usize - @ksize) bytes. + * + * Returns (in all cases, some data may have been copied): + * * -EFAULT: access to userspace failed. + */ +static __always_inline __must_check int +copy_struct_to_user(void __user *dst, size_t usize, const void *src, + size_t ksize, bool *ignored_trailing) +{ + size_t size = min(ksize, usize); + size_t rest = max(ksize, usize) - size; + + /* Double check if ksize is larger than a known object size. */ + if (WARN_ON_ONCE(ksize > __builtin_object_size(src, 1))) + return -E2BIG; + + /* Deal with trailing bytes. */ + if (usize > ksize) { + if (clear_user(dst + size, rest)) + return -EFAULT; + } + if (ignored_trailing) + *ignored_trailing = ksize < usize && + memchr_inv(src + size, 0, rest) != NULL; + /* Copy the interoperable parts of the struct. */ + if (copy_to_user(dst, src, size)) + return -EFAULT; + return 0; +} + bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size); long copy_from_kernel_nofault(void *dst, const void *src, size_t size); -- cgit v1.2.3 From 48931f65e9f785b65244550cc8f0c8bf9eab7acd Mon Sep 17 00:00:00 2001 From: Michael Margolin Date: Tue, 15 Oct 2024 17:42:42 +0000 Subject: RDMA/efa: Add option to set QP service level on create Using modify QP with AH attributes and IB_QP_AV flag set doesn't make much sense for connectionless QP types like SRD. Add SL parameter to EFA create QP user ABI and pass it to the device. Link: https://patch.msgid.link/r/20241015174242.3490-3-mrgolin@amazon.com Reviewed-by: Firas Jahjah Reviewed-by: Yonatan Nachum Signed-off-by: Michael Margolin Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa_com_cmd.c | 1 + drivers/infiniband/hw/efa/efa_com_cmd.h | 1 + drivers/infiniband/hw/efa/efa_verbs.c | 4 +++- include/uapi/rdma/efa-abi.h | 3 ++- 4 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c index 206f377db27e..9e04edb9dbda 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.c +++ b/drivers/infiniband/hw/efa/efa_com_cmd.c @@ -31,6 +31,7 @@ int efa_com_create_qp(struct efa_com_dev *edev, create_qp_cmd.qp_alloc_size.recv_queue_depth = params->rq_depth; create_qp_cmd.uar = params->uarn; + create_qp_cmd.sl = params->sl; if (params->unsolicited_write_recv) EFA_SET(&create_qp_cmd.flags, EFA_ADMIN_CREATE_QP_CMD_UNSOLICITED_WRITE_RECV, 1); diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h index 2599f8e58cc4..25f02c0d9698 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.h +++ b/drivers/infiniband/hw/efa/efa_com_cmd.h @@ -27,6 +27,7 @@ struct efa_com_create_qp_params { u16 pd; u16 uarn; u8 qp_type; + u8 sl; u8 unsolicited_write_recv : 1; }; diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index feb04cfdb8da..ca3af866a5df 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -676,7 +676,7 @@ int efa_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr, goto err_out; } - if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_90)) { + if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_98)) { ibdev_dbg(&dev->ibdev, "Incompatible ABI params, unknown fields in udata\n"); err = -EINVAL; @@ -732,6 +732,8 @@ int efa_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr, create_qp_params.rq_base_addr = qp->rq_dma_addr; } + create_qp_params.sl = cmd.sl; + if (cmd.flags & EFA_CREATE_QP_WITH_UNSOLICITED_WRITE_RECV) create_qp_params.unsolicited_write_recv = true; diff --git a/include/uapi/rdma/efa-abi.h b/include/uapi/rdma/efa-abi.h index d689b8b34189..11b94b0b035b 100644 --- a/include/uapi/rdma/efa-abi.h +++ b/include/uapi/rdma/efa-abi.h @@ -95,7 +95,8 @@ struct efa_ibv_create_qp { __u32 sq_ring_size; /* bytes */ __u32 driver_qp_type; __u16 flags; - __u8 reserved_90[6]; + __u8 sl; + __u8 reserved_98[5]; }; struct efa_ibv_create_qp_resp { -- cgit v1.2.3 From 3e482e2840546a3ff725d5adf8d0779ac1c3cf4f Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 18 Oct 2024 14:37:25 -0700 Subject: dmaengine: idxd: Move DSA/IAA device IDs to IDXD driver Since the DSA/IAA device IDs are only used by the IDXD driver, there is no need to define them as public IDs. Move their definitions to the IDXD driver to limit their scope. This change helps reduce unnecessary exposure of the device IDs in the global space, making the codebase cleaner and better encapsulated. There is no functional change. Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20241018213725.4167413-1-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/registers.h | 4 ++++ include/linux/pci_ids.h | 3 --- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index e16dbf9ab324..c426511f2104 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -6,6 +6,10 @@ #include /* PCI Config */ +#define PCI_DEVICE_ID_INTEL_DSA_GNRD 0x11fb +#define PCI_DEVICE_ID_INTEL_DSA_DMR 0x1212 +#define PCI_DEVICE_ID_INTEL_IAA_DMR 0x1216 + #define DEVICE_VERSION_1 0x100 #define DEVICE_VERSION_2 0x200 diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 4cf6aaed5f35..e4bddb927795 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2709,9 +2709,6 @@ #define PCI_DEVICE_ID_INTEL_82815_MC 0x1130 #define PCI_DEVICE_ID_INTEL_82815_CGC 0x1132 #define PCI_DEVICE_ID_INTEL_SST_TNG 0x119a -#define PCI_DEVICE_ID_INTEL_DSA_GNRD 0x11fb -#define PCI_DEVICE_ID_INTEL_DSA_DMR 0x1212 -#define PCI_DEVICE_ID_INTEL_IAA_DMR 0x1216 #define PCI_DEVICE_ID_INTEL_82092AA_0 0x1221 #define PCI_DEVICE_ID_INTEL_82437 0x122d #define PCI_DEVICE_ID_INTEL_82371FB_0 0x122e -- cgit v1.2.3 From 6fa115569d980fca1969c075d8d958d205a405ca Mon Sep 17 00:00:00 2001 From: Raviteja Laggyshetty Date: Tue, 10 Sep 2024 10:10:12 +0000 Subject: dt-bindings: interconnect: document the RPMh Network-On-Chip interconnect in QCS8300 SoC Document the RPMh Network-On-Chip Interconnect of the QCS8300 platform. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Raviteja Laggyshetty Link: https://lore.kernel.org/r/20240910101013.3020-2-quic_rlaggysh@quicinc.com Signed-off-by: Georgi Djakov --- .../bindings/interconnect/qcom,qcs8300-rpmh.yaml | 72 ++++++++ .../dt-bindings/interconnect/qcom,qcs8300-rpmh.h | 189 +++++++++++++++++++++ 2 files changed, 261 insertions(+) create mode 100644 Documentation/devicetree/bindings/interconnect/qcom,qcs8300-rpmh.yaml create mode 100644 include/dt-bindings/interconnect/qcom,qcs8300-rpmh.h (limited to 'include') diff --git a/Documentation/devicetree/bindings/interconnect/qcom,qcs8300-rpmh.yaml b/Documentation/devicetree/bindings/interconnect/qcom,qcs8300-rpmh.yaml new file mode 100644 index 000000000000..e9f528d6d9a8 --- /dev/null +++ b/Documentation/devicetree/bindings/interconnect/qcom,qcs8300-rpmh.yaml @@ -0,0 +1,72 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/interconnect/qcom,qcs8300-rpmh.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm RPMh Network-On-Chip Interconnect on QCS8300 + +maintainers: + - Raviteja Laggyshetty + +description: | + RPMh interconnect providers support system bandwidth requirements through + RPMh hardware accelerators known as Bus Clock Manager (BCM). + + See also: include/dt-bindings/interconnect/qcom,qcs8300-rpmh.h + +properties: + compatible: + enum: + - qcom,qcs8300-aggre1-noc + - qcom,qcs8300-aggre2-noc + - qcom,qcs8300-clk-virt + - qcom,qcs8300-config-noc + - qcom,qcs8300-dc-noc + - qcom,qcs8300-gem-noc + - qcom,qcs8300-gpdsp-anoc + - qcom,qcs8300-lpass-ag-noc + - qcom,qcs8300-mc-virt + - qcom,qcs8300-mmss-noc + - qcom,qcs8300-nspa-noc + - qcom,qcs8300-pcie-anoc + - qcom,qcs8300-system-noc + + reg: + maxItems: 1 + +required: + - compatible + +allOf: + - $ref: qcom,rpmh-common.yaml# + - if: + properties: + compatible: + contains: + enum: + - qcom,qcs8300-clk-virt + - qcom,qcs8300-mc-virt + then: + properties: + reg: false + else: + required: + - reg + +unevaluatedProperties: false + +examples: + - | + gem_noc: interconnect@9100000 { + compatible = "qcom,qcs8300-gem-noc"; + reg = <0x9100000 0xf7080>; + #interconnect-cells = <2>; + qcom,bcm-voters = <&apps_bcm_voter>; + }; + + clk_virt: interconnect-0 { + compatible = "qcom,qcs8300-clk-virt"; + #interconnect-cells = <2>; + qcom,bcm-voters = <&apps_bcm_voter>; + }; diff --git a/include/dt-bindings/interconnect/qcom,qcs8300-rpmh.h b/include/dt-bindings/interconnect/qcom,qcs8300-rpmh.h new file mode 100644 index 000000000000..c5eeafa1b1dd --- /dev/null +++ b/include/dt-bindings/interconnect/qcom,qcs8300-rpmh.h @@ -0,0 +1,189 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef __DT_BINDINGS_INTERCONNECT_QCOM_QCS8300_H +#define __DT_BINDINGS_INTERCONNECT_QCOM_QCS8300_H + +#define MASTER_QUP_3 0 +#define MASTER_EMAC 1 +#define MASTER_SDC 2 +#define MASTER_UFS_MEM 3 +#define MASTER_USB2 4 +#define MASTER_USB3_0 5 +#define SLAVE_A1NOC_SNOC 6 + +#define MASTER_QDSS_BAM 0 +#define MASTER_QUP_0 1 +#define MASTER_QUP_1 2 +#define MASTER_CNOC_A2NOC 3 +#define MASTER_CRYPTO_CORE0 4 +#define MASTER_CRYPTO_CORE1 5 +#define MASTER_IPA 6 +#define MASTER_QDSS_ETR_0 7 +#define MASTER_QDSS_ETR_1 8 +#define SLAVE_A2NOC_SNOC 9 + +#define MASTER_QUP_CORE_0 0 +#define MASTER_QUP_CORE_1 1 +#define MASTER_QUP_CORE_3 2 +#define SLAVE_QUP_CORE_0 3 +#define SLAVE_QUP_CORE_1 4 +#define SLAVE_QUP_CORE_3 5 + +#define MASTER_GEM_NOC_CNOC 0 +#define MASTER_GEM_NOC_PCIE_SNOC 1 +#define SLAVE_AHB2PHY_2 2 +#define SLAVE_AHB2PHY_3 3 +#define SLAVE_ANOC_THROTTLE_CFG 4 +#define SLAVE_AOSS 5 +#define SLAVE_APPSS 6 +#define SLAVE_BOOT_ROM 7 +#define SLAVE_CAMERA_CFG 8 +#define SLAVE_CAMERA_NRT_THROTTLE_CFG 9 +#define SLAVE_CAMERA_RT_THROTTLE_CFG 10 +#define SLAVE_CLK_CTL 11 +#define SLAVE_CDSP_CFG 12 +#define SLAVE_RBCPR_CX_CFG 13 +#define SLAVE_RBCPR_MMCX_CFG 14 +#define SLAVE_RBCPR_MX_CFG 15 +#define SLAVE_CPR_NSPCX 16 +#define SLAVE_CPR_NSPHMX 17 +#define SLAVE_CRYPTO_0_CFG 18 +#define SLAVE_CX_RDPM 19 +#define SLAVE_DISPLAY_CFG 20 +#define SLAVE_DISPLAY_RT_THROTTLE_CFG 21 +#define SLAVE_EMAC_CFG 22 +#define SLAVE_GP_DSP0_CFG 23 +#define SLAVE_GPDSP0_THROTTLE_CFG 24 +#define SLAVE_GPU_TCU_THROTTLE_CFG 25 +#define SLAVE_GFX3D_CFG 26 +#define SLAVE_HWKM 27 +#define SLAVE_IMEM_CFG 28 +#define SLAVE_IPA_CFG 29 +#define SLAVE_IPC_ROUTER_CFG 30 +#define SLAVE_LPASS 31 +#define SLAVE_LPASS_THROTTLE_CFG 32 +#define SLAVE_MX_RDPM 33 +#define SLAVE_MXC_RDPM 34 +#define SLAVE_PCIE_0_CFG 35 +#define SLAVE_PCIE_1_CFG 36 +#define SLAVE_PCIE_TCU_THROTTLE_CFG 37 +#define SLAVE_PCIE_THROTTLE_CFG 38 +#define SLAVE_PDM 39 +#define SLAVE_PIMEM_CFG 40 +#define SLAVE_PKA_WRAPPER_CFG 41 +#define SLAVE_QDSS_CFG 42 +#define SLAVE_QM_CFG 43 +#define SLAVE_QM_MPU_CFG 44 +#define SLAVE_QUP_0 45 +#define SLAVE_QUP_1 46 +#define SLAVE_QUP_3 47 +#define SLAVE_SAIL_THROTTLE_CFG 48 +#define SLAVE_SDC1 49 +#define SLAVE_SECURITY 50 +#define SLAVE_SNOC_THROTTLE_CFG 51 +#define SLAVE_TCSR 52 +#define SLAVE_TLMM 53 +#define SLAVE_TSC_CFG 54 +#define SLAVE_UFS_MEM_CFG 55 +#define SLAVE_USB2 56 +#define SLAVE_USB3_0 57 +#define SLAVE_VENUS_CFG 58 +#define SLAVE_VENUS_CVP_THROTTLE_CFG 59 +#define SLAVE_VENUS_V_CPU_THROTTLE_CFG 60 +#define SLAVE_VENUS_VCODEC_THROTTLE_CFG 61 +#define SLAVE_DDRSS_CFG 62 +#define SLAVE_GPDSP_NOC_CFG 63 +#define SLAVE_CNOC_MNOC_HF_CFG 64 +#define SLAVE_CNOC_MNOC_SF_CFG 65 +#define SLAVE_PCIE_ANOC_CFG 66 +#define SLAVE_SNOC_CFG 67 +#define SLAVE_BOOT_IMEM 68 +#define SLAVE_IMEM 69 +#define SLAVE_PIMEM 70 +#define SLAVE_PCIE_0 71 +#define SLAVE_PCIE_1 72 +#define SLAVE_QDSS_STM 73 +#define SLAVE_TCU 74 + +#define MASTER_CNOC_DC_NOC 0 +#define SLAVE_LLCC_CFG 1 +#define SLAVE_GEM_NOC_CFG 2 + +#define MASTER_GPU_TCU 0 +#define MASTER_PCIE_TCU 1 +#define MASTER_SYS_TCU 2 +#define MASTER_APPSS_PROC 3 +#define MASTER_COMPUTE_NOC 4 +#define MASTER_GEM_NOC_CFG 5 +#define MASTER_GPDSP_SAIL 6 +#define MASTER_GFX3D 7 +#define MASTER_MNOC_HF_MEM_NOC 8 +#define MASTER_MNOC_SF_MEM_NOC 9 +#define MASTER_ANOC_PCIE_GEM_NOC 10 +#define MASTER_SNOC_GC_MEM_NOC 11 +#define MASTER_SNOC_SF_MEM_NOC 12 +#define SLAVE_GEM_NOC_CNOC 13 +#define SLAVE_LLCC 14 +#define SLAVE_GEM_NOC_PCIE_CNOC 15 +#define SLAVE_SERVICE_GEM_NOC_1 16 +#define SLAVE_SERVICE_GEM_NOC_2 17 +#define SLAVE_SERVICE_GEM_NOC 18 +#define SLAVE_SERVICE_GEM_NOC2 19 + +#define MASTER_SAILSS_MD0 0 +#define MASTER_DSP0 1 +#define SLAVE_GP_DSP_SAIL_NOC 2 + +#define MASTER_CNOC_LPASS_AG_NOC 0 +#define MASTER_LPASS_PROC 1 +#define SLAVE_LPASS_CORE_CFG 2 +#define SLAVE_LPASS_LPI_CFG 3 +#define SLAVE_LPASS_MPU_CFG 4 +#define SLAVE_LPASS_TOP_CFG 5 +#define SLAVE_LPASS_SNOC 6 +#define SLAVE_SERVICES_LPASS_AML_NOC 7 +#define SLAVE_SERVICE_LPASS_AG_NOC 8 + +#define MASTER_LLCC 0 +#define SLAVE_EBI1 1 + +#define MASTER_CAMNOC_HF 0 +#define MASTER_CAMNOC_ICP 1 +#define MASTER_CAMNOC_SF 2 +#define MASTER_MDP0 3 +#define MASTER_MDP1 4 +#define MASTER_CNOC_MNOC_HF_CFG 5 +#define MASTER_CNOC_MNOC_SF_CFG 6 +#define MASTER_VIDEO_P0 7 +#define MASTER_VIDEO_PROC 8 +#define MASTER_VIDEO_V_PROC 9 +#define SLAVE_MNOC_HF_MEM_NOC 10 +#define SLAVE_MNOC_SF_MEM_NOC 11 +#define SLAVE_SERVICE_MNOC_HF 12 +#define SLAVE_SERVICE_MNOC_SF 13 + +#define MASTER_CDSP_NOC_CFG 0 +#define MASTER_CDSP_PROC 1 +#define SLAVE_HCP_A 2 +#define SLAVE_CDSP_MEM_NOC 3 +#define SLAVE_SERVICE_NSP_NOC 4 + +#define MASTER_PCIE_0 0 +#define MASTER_PCIE_1 1 +#define SLAVE_ANOC_PCIE_GEM_NOC 2 + +#define MASTER_GIC_AHB 0 +#define MASTER_A1NOC_SNOC 1 +#define MASTER_A2NOC_SNOC 2 +#define MASTER_LPASS_ANOC 3 +#define MASTER_SNOC_CFG 4 +#define MASTER_PIMEM 5 +#define MASTER_GIC 6 +#define SLAVE_SNOC_GEM_NOC_GC 7 +#define SLAVE_SNOC_GEM_NOC_SF 8 +#define SLAVE_SERVICE_SNOC 9 + +#endif -- cgit v1.2.3 From 6c5e948f1fffdb1ae2c6d2f3d5336dbf07189334 Mon Sep 17 00:00:00 2001 From: Raviteja Laggyshetty Date: Tue, 24 Sep 2024 14:39:57 +0000 Subject: dt-bindings: interconnect: document the RPMh Network-On-Chip interconnect in QCS615 SoC Document the RPMh Network-On-Chip Interconnect of the QCS615 platform. Signed-off-by: Raviteja Laggyshetty Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240924143958.25-2-quic_rlaggysh@quicinc.com Signed-off-by: Georgi Djakov --- .../bindings/interconnect/qcom,qcs615-rpmh.yaml | 73 +++++++++++ .../dt-bindings/interconnect/qcom,qcs615-rpmh.h | 136 +++++++++++++++++++++ 2 files changed, 209 insertions(+) create mode 100644 Documentation/devicetree/bindings/interconnect/qcom,qcs615-rpmh.yaml create mode 100644 include/dt-bindings/interconnect/qcom,qcs615-rpmh.h (limited to 'include') diff --git a/Documentation/devicetree/bindings/interconnect/qcom,qcs615-rpmh.yaml b/Documentation/devicetree/bindings/interconnect/qcom,qcs615-rpmh.yaml new file mode 100644 index 000000000000..9d762b2a1fcf --- /dev/null +++ b/Documentation/devicetree/bindings/interconnect/qcom,qcs615-rpmh.yaml @@ -0,0 +1,73 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/interconnect/qcom,qcs615-rpmh.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm RPMh Network-On-Chip Interconnect on QCS615 + +maintainers: + - Raviteja Laggyshetty + +description: | + RPMh interconnect providers support system bandwidth requirements through + RPMh hardware accelerators known as Bus Clock Manager (BCM). The provider is + able to communicate with the BCM through the Resource State Coordinator (RSC) + associated with each execution environment. Provider nodes must point to at + least one RPMh device child node pertaining to their RSC and each provider + can map to multiple RPMh resources. + + See also: include/dt-bindings/interconnect/qcom,qcs615-rpmh.h + +properties: + compatible: + enum: + - qcom,qcs615-aggre1-noc + - qcom,qcs615-camnoc-virt + - qcom,qcs615-config-noc + - qcom,qcs615-dc-noc + - qcom,qcs615-gem-noc + - qcom,qcs615-ipa-virt + - qcom,qcs615-mc-virt + - qcom,qcs615-mmss-noc + - qcom,qcs615-system-noc + + reg: + maxItems: 1 + +required: + - compatible + +allOf: + - $ref: qcom,rpmh-common.yaml# + - if: + properties: + compatible: + contains: + enum: + - qcom,qcs615-camnoc-virt + - qcom,qcs615-ipa-virt + - qcom,qcs615-mc-virt + then: + properties: + reg: false + else: + required: + - reg + +unevaluatedProperties: false + +examples: + - | + gem_noc: interconnect@9680000 { + compatible = "qcom,qcs615-gem-noc"; + reg = <0x9680000 0x3e200>; + #interconnect-cells = <2>; + qcom,bcm-voters = <&apps_bcm_voter>; + }; + + mc_virt: interconnect-2 { + compatible = "qcom,qcs615-mc-virt"; + #interconnect-cells = <2>; + qcom,bcm-voters = <&apps_bcm_voter>; + }; diff --git a/include/dt-bindings/interconnect/qcom,qcs615-rpmh.h b/include/dt-bindings/interconnect/qcom,qcs615-rpmh.h new file mode 100644 index 000000000000..84ae0d39e73c --- /dev/null +++ b/include/dt-bindings/interconnect/qcom,qcs615-rpmh.h @@ -0,0 +1,136 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef __DT_BINDINGS_INTERCONNECT_QCOM_QCS615_H +#define __DT_BINDINGS_INTERCONNECT_QCOM_QCS615_H + +#define MASTER_A1NOC_CFG 1 +#define MASTER_QDSS_BAM 2 +#define MASTER_QSPI 3 +#define MASTER_QUP_0 4 +#define MASTER_BLSP_1 5 +#define MASTER_CNOC_A2NOC 6 +#define MASTER_CRYPTO 7 +#define MASTER_IPA 8 +#define MASTER_EMAC_EVB 9 +#define MASTER_PCIE 10 +#define MASTER_QDSS_ETR 11 +#define MASTER_SDCC_1 12 +#define MASTER_SDCC_2 13 +#define MASTER_UFS_MEM 14 +#define MASTER_USB2 15 +#define MASTER_USB3_0 16 +#define SLAVE_A1NOC_SNOC 17 +#define SLAVE_LPASS_SNOC 18 +#define SLAVE_ANOC_PCIE_SNOC 19 +#define SLAVE_SERVICE_A2NOC 20 + +#define MASTER_CAMNOC_HF0_UNCOMP 1 +#define MASTER_CAMNOC_HF1_UNCOMP 2 +#define MASTER_CAMNOC_SF_UNCOMP 3 +#define SLAVE_CAMNOC_UNCOMP 4 + +#define MASTER_SPDM 1 +#define MASTER_SNOC_CNOC 2 +#define MASTER_QDSS_DAP 3 +#define SLAVE_A1NOC_CFG 4 +#define SLAVE_AHB2PHY_EAST 5 +#define SLAVE_AHB2PHY_WEST 6 +#define SLAVE_AOP 7 +#define SLAVE_AOSS 8 +#define SLAVE_CAMERA_CFG 9 +#define SLAVE_CLK_CTL 10 +#define SLAVE_RBCPR_CX_CFG 11 +#define SLAVE_RBCPR_MX_CFG 12 +#define SLAVE_CRYPTO_0_CFG 13 +#define SLAVE_CNOC_DDRSS 14 +#define SLAVE_DISPLAY_CFG 15 +#define SLAVE_EMAC_AVB_CFG 16 +#define SLAVE_GLM 17 +#define SLAVE_GFX3D_CFG 18 +#define SLAVE_IMEM_CFG 19 +#define SLAVE_IPA_CFG 20 +#define SLAVE_CNOC_MNOC_CFG 21 +#define SLAVE_PCIE_CFG 22 +#define SLAVE_PIMEM_CFG 23 +#define SLAVE_PRNG 24 +#define SLAVE_QDSS_CFG 25 +#define SLAVE_QSPI 26 +#define SLAVE_QUP_0 27 +#define SLAVE_QUP_1 28 +#define SLAVE_SDCC_1 29 +#define SLAVE_SDCC_2 30 +#define SLAVE_SNOC_CFG 31 +#define SLAVE_SPDM_WRAPPER 32 +#define SLAVE_TCSR 33 +#define SLAVE_TLMM_EAST 34 +#define SLAVE_TLMM_SOUTH 35 +#define SLAVE_TLMM_WEST 36 +#define SLAVE_UFS_MEM_CFG 37 +#define SLAVE_USB2 38 +#define SLAVE_USB3 39 +#define SLAVE_VENUS_CFG 40 +#define SLAVE_VSENSE_CTRL_CFG 41 +#define SLAVE_CNOC_A2NOC 42 +#define SLAVE_SERVICE_CNOC 43 + +#define MASTER_CNOC_DC_NOC 1 +#define SLAVE_DC_NOC_GEMNOC 2 +#define SLAVE_LLCC_CFG 3 + +#define MASTER_APPSS_PROC 1 +#define MASTER_GPU_TCU 2 +#define MASTER_SYS_TCU 3 +#define MASTER_GEM_NOC_CFG 4 +#define MASTER_GFX3D 5 +#define MASTER_MNOC_HF_MEM_NOC 6 +#define MASTER_MNOC_SF_MEM_NOC 7 +#define MASTER_SNOC_GC_MEM_NOC 8 +#define MASTER_SNOC_SF_MEM_NOC 9 +#define SLAVE_MSS_PROC_MS_MPU_CFG 10 +#define SLAVE_GEM_NOC_SNOC 11 +#define SLAVE_LLCC 12 +#define SLAVE_MEM_NOC_PCIE_SNOC 13 +#define SLAVE_SERVICE_GEM_NOC 14 + +#define MASTER_IPA_CORE 1 +#define SLAVE_IPA_CORE 2 + +#define MASTER_LLCC 1 +#define SLAVE_EBI1 2 + +#define MASTER_CNOC_MNOC_CFG 1 +#define MASTER_CAMNOC_HF0 2 +#define MASTER_CAMNOC_HF1 3 +#define MASTER_CAMNOC_SF 4 +#define MASTER_MDP0 5 +#define MASTER_ROTATOR 6 +#define MASTER_VIDEO_P0 7 +#define MASTER_VIDEO_PROC 8 +#define SLAVE_MNOC_SF_MEM_NOC 9 +#define SLAVE_MNOC_HF_MEM_NOC 10 +#define SLAVE_SERVICE_MNOC 11 + +#define MASTER_SNOC_CFG 1 +#define MASTER_A1NOC_SNOC 2 +#define MASTER_GEM_NOC_SNOC 3 +#define MASTER_GEM_NOC_PCIE_SNOC 4 +#define MASTER_LPASS_ANOC 5 +#define MASTER_ANOC_PCIE_SNOC 6 +#define MASTER_PIMEM 7 +#define MASTER_GIC 8 +#define SLAVE_APPSS 9 +#define SLAVE_SNOC_CNOC 10 +#define SLAVE_SNOC_GEM_NOC_SF 11 +#define SLAVE_SNOC_MEM_NOC_GC 12 +#define SLAVE_IMEM 13 +#define SLAVE_PIMEM 14 +#define SLAVE_SERVICE_SNOC 15 +#define SLAVE_PCIE_0 16 +#define SLAVE_QDSS_STM 17 +#define SLAVE_TCU 18 + +#endif + -- cgit v1.2.3 From 43c7ce69d28e185f62fe2b8be2c681c5cac0bc6b Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 16 Oct 2024 11:53:50 -0700 Subject: rtnetlink: Protect struct rtnl_link_ops with SRCU. Once RTNL is replaced with rtnl_net_lock(), we need a mechanism to guarantee that rtnl_link_ops is alive during inflight RTM_NEWLINK even when its module is being unloaded. Let's use SRCU to protect ops. rtnl_link_ops_get() now iterates link_ops under RCU and returns SRCU-protected ops pointer. The caller must call rtnl_link_ops_put() to release the pointer after the use. Also, __rtnl_link_unregister() unlinks the ops first and calls synchronize_srcu() to wait for inflight RTM_NEWLINK requests to complete. Note that link_ops needs to be protected by its dedicated lock when RTNL is removed. Suggested-by: Eric Dumazet Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- include/net/rtnetlink.h | 5 ++- net/core/rtnetlink.c | 83 ++++++++++++++++++++++++++++++++++++------------- 2 files changed, 65 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index bb49c5708ce7..1a6aa5ca74f3 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -3,6 +3,7 @@ #define __NET_RTNETLINK_H #include +#include #include typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *, @@ -69,7 +70,8 @@ static inline int rtnl_msg_family(const struct nlmsghdr *nlh) /** * struct rtnl_link_ops - rtnetlink link operations * - * @list: Used internally + * @list: Used internally, protected by RTNL and SRCU + * @srcu: Used internally * @kind: Identifier * @netns_refund: Physical device, move to init_net on netns exit * @maxtype: Highest device specific netlink attribute number @@ -100,6 +102,7 @@ static inline int rtnl_msg_family(const struct nlmsghdr *nlh) */ struct rtnl_link_ops { struct list_head list; + struct srcu_struct srcu; const char *kind; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9c9290a6c271..31b105b3a834 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -457,15 +457,29 @@ EXPORT_SYMBOL_GPL(__rtnl_unregister_many); static LIST_HEAD(link_ops); -static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind) +static struct rtnl_link_ops *rtnl_link_ops_get(const char *kind, int *srcu_index) { - const struct rtnl_link_ops *ops; + struct rtnl_link_ops *ops; - list_for_each_entry(ops, &link_ops, list) { - if (!strcmp(ops->kind, kind)) - return ops; + rcu_read_lock(); + + list_for_each_entry_rcu(ops, &link_ops, list) { + if (!strcmp(ops->kind, kind)) { + *srcu_index = srcu_read_lock(&ops->srcu); + goto unlock; + } } - return NULL; + + ops = NULL; +unlock: + rcu_read_unlock(); + + return ops; +} + +static void rtnl_link_ops_put(struct rtnl_link_ops *ops, int srcu_index) +{ + srcu_read_unlock(&ops->srcu, srcu_index); } /** @@ -480,8 +494,16 @@ static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind) */ int __rtnl_link_register(struct rtnl_link_ops *ops) { - if (rtnl_link_ops_get(ops->kind)) - return -EEXIST; + struct rtnl_link_ops *tmp; + int err; + + /* When RTNL is removed, add lock for link_ops. */ + ASSERT_RTNL(); + + list_for_each_entry(tmp, &link_ops, list) { + if (!strcmp(ops->kind, tmp->kind)) + return -EEXIST; + } /* The check for alloc/setup is here because if ops * does not have that filled up, it is not possible @@ -491,7 +513,12 @@ int __rtnl_link_register(struct rtnl_link_ops *ops) if ((ops->alloc || ops->setup) && !ops->dellink) ops->dellink = unregister_netdevice_queue; - list_add_tail(&ops->list, &link_ops); + err = init_srcu_struct(&ops->srcu); + if (err) + return err; + + list_add_tail_rcu(&ops->list, &link_ops); + return 0; } EXPORT_SYMBOL_GPL(__rtnl_link_register); @@ -542,10 +569,12 @@ void __rtnl_link_unregister(struct rtnl_link_ops *ops) { struct net *net; - for_each_net(net) { + list_del_rcu(&ops->list); + synchronize_srcu(&ops->srcu); + cleanup_srcu_struct(&ops->srcu); + + for_each_net(net) __rtnl_kill_links(net, ops); - } - list_del(&ops->list); } EXPORT_SYMBOL_GPL(__rtnl_link_unregister); @@ -2158,10 +2187,11 @@ static const struct nla_policy ifla_xdp_policy[IFLA_XDP_MAX + 1] = { [IFLA_XDP_PROG_ID] = { .type = NLA_U32 }, }; -static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla) +static struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla, + int *ops_srcu_index) { - const struct rtnl_link_ops *ops = NULL; struct nlattr *linfo[IFLA_INFO_MAX + 1]; + struct rtnl_link_ops *ops = NULL; if (nla_parse_nested_deprecated(linfo, IFLA_INFO_MAX, nla, ifla_info_policy, NULL) < 0) return NULL; @@ -2170,7 +2200,7 @@ static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla char kind[MODULE_NAME_LEN]; nla_strscpy(kind, linfo[IFLA_INFO_KIND], sizeof(kind)); - ops = rtnl_link_ops_get(kind); + ops = rtnl_link_ops_get(kind, ops_srcu_index); } return ops; @@ -2290,8 +2320,8 @@ static int rtnl_valid_dump_ifinfo_req(const struct nlmsghdr *nlh, static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { - const struct rtnl_link_ops *kind_ops = NULL; struct netlink_ext_ack *extack = cb->extack; + struct rtnl_link_ops *kind_ops = NULL; const struct nlmsghdr *nlh = cb->nlh; struct net *net = sock_net(skb->sk); unsigned int flags = NLM_F_MULTI; @@ -2302,6 +2332,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) struct net *tgt_net = net; u32 ext_filter_mask = 0; struct net_device *dev; + int ops_srcu_index; int master_idx = 0; int netnsid = -1; int err, i; @@ -2335,7 +2366,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) master_idx = nla_get_u32(tb[i]); break; case IFLA_LINKINFO: - kind_ops = linkinfo_to_kind_ops(tb[i]); + kind_ops = linkinfo_to_kind_ops(tb[i], &ops_srcu_index); break; default: if (cb->strict_check) { @@ -2361,6 +2392,10 @@ walk_entries: if (err < 0) break; } + + if (kind_ops) + rtnl_link_ops_put(kind_ops, ops_srcu_index); + cb->seq = tgt_net->dev_base_seq; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); if (netnsid >= 0) @@ -3747,8 +3782,9 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct nlattr **tb, **linkinfo, **data = NULL; - const struct rtnl_link_ops *ops = NULL; + struct rtnl_link_ops *ops = NULL; struct rtnl_newlink_tbs *tbs; + int ops_srcu_index; int ret; tbs = kmalloc(sizeof(*tbs), GFP_KERNEL); @@ -3780,13 +3816,13 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, char kind[MODULE_NAME_LEN]; nla_strscpy(kind, linkinfo[IFLA_INFO_KIND], sizeof(kind)); - ops = rtnl_link_ops_get(kind); + ops = rtnl_link_ops_get(kind, &ops_srcu_index); #ifdef CONFIG_MODULES if (!ops) { __rtnl_unlock(); request_module("rtnl-link-%s", kind); rtnl_lock(); - ops = rtnl_link_ops_get(kind); + ops = rtnl_link_ops_get(kind, &ops_srcu_index); } #endif } @@ -3800,7 +3836,7 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, linkinfo[IFLA_INFO_DATA], ops->policy, extack); if (ret < 0) - goto free; + goto put_ops; data = tbs->attr; } @@ -3808,12 +3844,15 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, if (ops->validate) { ret = ops->validate(tb, data, extack); if (ret < 0) - goto free; + goto put_ops; } } ret = __rtnl_newlink(skb, nlh, ops, tbs, data, extack); +put_ops: + if (ops) + rtnl_link_ops_put(ops, ops_srcu_index); free: kfree(tbs); return ret; -- cgit v1.2.3 From 26eebdc4b005ccd4cf63f4fef4c9c0adf9bfa380 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 16 Oct 2024 11:53:56 -0700 Subject: rtnetlink: Return int from rtnl_af_register(). The next patch will add init_srcu_struct() in rtnl_af_register(), then we need to handle its error. Let's add the error handling in advance to make the following patch cleaner. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Matt Johnston Signed-off-by: Paolo Abeni --- include/net/rtnetlink.h | 2 +- net/bridge/br_netlink.c | 6 +++++- net/core/rtnetlink.c | 4 +++- net/ipv4/devinet.c | 3 ++- net/ipv6/addrconf.c | 5 ++++- net/mctp/device.c | 16 +++++++++++----- net/mpls/af_mpls.c | 5 ++++- 7 files changed, 30 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 1a6aa5ca74f3..969138ae2f4b 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -204,7 +204,7 @@ struct rtnl_af_ops { size_t (*get_stats_af_size)(const struct net_device *dev); }; -void rtnl_af_register(struct rtnl_af_ops *ops); +int rtnl_af_register(struct rtnl_af_ops *ops); void rtnl_af_unregister(struct rtnl_af_ops *ops); struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 6b97ae47f855..3e0f47203f2a 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1924,7 +1924,9 @@ int __init br_netlink_init(void) if (err) goto out; - rtnl_af_register(&br_af_ops); + err = rtnl_af_register(&br_af_ops); + if (err) + goto out_vlan; err = rtnl_link_register(&br_link_ops); if (err) @@ -1934,6 +1936,8 @@ int __init br_netlink_init(void) out_af: rtnl_af_unregister(&br_af_ops); +out_vlan: + br_vlan_rtnl_uninit(); out: return err; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 445e6ffed75e..70b663aca209 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -686,11 +686,13 @@ static const struct rtnl_af_ops *rtnl_af_lookup(const int family) * * Returns 0 on success or a negative error code. */ -void rtnl_af_register(struct rtnl_af_ops *ops) +int rtnl_af_register(struct rtnl_af_ops *ops) { rtnl_lock(); list_add_tail_rcu(&ops->list, &rtnl_af_ops); rtnl_unlock(); + + return 0; } EXPORT_SYMBOL_GPL(rtnl_af_register); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index ec29ead83e74..0ff9c0abfaa0 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -2827,7 +2827,8 @@ void __init devinet_init(void) register_pernet_subsys(&devinet_ops); register_netdevice_notifier(&ip_netdev_notifier); - rtnl_af_register(&inet_af_ops); + if (rtnl_af_register(&inet_af_ops)) + panic("Unable to register inet_af_ops\n"); rtnl_register_many(devinet_rtnl_msg_handlers); } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ac8645ad2537..d0a99710d65d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -7468,7 +7468,9 @@ int __init addrconf_init(void) addrconf_verify(&init_net); - rtnl_af_register(&inet6_ops); + err = rtnl_af_register(&inet6_ops); + if (err) + goto erraf; err = rtnl_register_many(addrconf_rtnl_msg_handlers); if (err) @@ -7482,6 +7484,7 @@ int __init addrconf_init(void) errout: rtnl_unregister_all(PF_INET6); rtnl_af_unregister(&inet6_ops); +erraf: unregister_netdevice_notifier(&ipv6_dev_notf); errlo: destroy_workqueue(addrconf_wq); diff --git a/net/mctp/device.c b/net/mctp/device.c index 85cc5f31f1e7..3d75b919995d 100644 --- a/net/mctp/device.c +++ b/net/mctp/device.c @@ -535,14 +535,20 @@ int __init mctp_device_init(void) int err; register_netdevice_notifier(&mctp_dev_nb); - rtnl_af_register(&mctp_af_ops); + + err = rtnl_af_register(&mctp_af_ops); + if (err) + goto err_notifier; err = rtnl_register_many(mctp_device_rtnl_msg_handlers); - if (err) { - rtnl_af_unregister(&mctp_af_ops); - unregister_netdevice_notifier(&mctp_dev_nb); - } + if (err) + goto err_af; + return 0; +err_af: + rtnl_af_unregister(&mctp_af_ops); +err_notifier: + unregister_netdevice_notifier(&mctp_dev_nb); return err; } diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index a0573847bc55..1f63b32d76d6 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -2753,7 +2753,9 @@ static int __init mpls_init(void) dev_add_pack(&mpls_packet_type); - rtnl_af_register(&mpls_af_ops); + err = rtnl_af_register(&mpls_af_ops); + if (err) + goto out_unregister_dev_type; err = rtnl_register_many(mpls_rtnl_msg_handlers); if (err) @@ -2773,6 +2775,7 @@ out_unregister_rtnl: rtnl_unregister_many(mpls_rtnl_msg_handlers); out_unregister_rtnl_af: rtnl_af_unregister(&mpls_af_ops); +out_unregister_dev_type: dev_remove_pack(&mpls_packet_type); out_unregister_pernet: unregister_pernet_subsys(&mpls_net_ops); -- cgit v1.2.3 From 6ab0f866948323724e95cf14d9e47fd77703c192 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 16 Oct 2024 11:53:57 -0700 Subject: rtnetlink: Protect struct rtnl_af_ops with SRCU. Once RTNL is replaced with rtnl_net_lock(), we need a mechanism to guarantee that rtnl_af_ops is alive during inflight RTM_SETLINK even when its module is being unloaded. Let's use SRCU to protect ops. rtnl_af_lookup() now iterates rtnl_af_ops under RCU and returns SRCU-protected ops pointer. The caller must call rtnl_af_put() to release the pointer after the use. Also, rtnl_af_unregister() unlinks the ops first and calls synchronize_srcu() to wait for inflight RTM_SETLINK requests to complete. Note that rtnl_af_ops needs to be protected by its dedicated lock when RTNL is removed. Note also that BUG_ON() in do_setlink() is changed to the normal error handling as a different af_ops might be found after validate_linkmsg(). Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- include/net/rtnetlink.h | 5 +++- net/core/rtnetlink.c | 63 ++++++++++++++++++++++++++++++++++++------------- 2 files changed, 51 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 969138ae2f4b..e0d9a8eae6b6 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -172,7 +172,8 @@ void rtnl_link_unregister(struct rtnl_link_ops *ops); /** * struct rtnl_af_ops - rtnetlink address family operations * - * @list: Used internally + * @list: Used internally, protected by RTNL and SRCU + * @srcu: Used internally * @family: Address family * @fill_link_af: Function to fill IFLA_AF_SPEC with address family * specific netlink attributes. @@ -185,6 +186,8 @@ void rtnl_link_unregister(struct rtnl_link_ops *ops); */ struct rtnl_af_ops { struct list_head list; + struct srcu_struct srcu; + int family; int (*fill_link_af)(struct sk_buff *skb, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 70b663aca209..194a81e5f608 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -666,18 +666,31 @@ static size_t rtnl_link_get_size(const struct net_device *dev) static LIST_HEAD(rtnl_af_ops); -static const struct rtnl_af_ops *rtnl_af_lookup(const int family) +static struct rtnl_af_ops *rtnl_af_lookup(const int family, int *srcu_index) { - const struct rtnl_af_ops *ops; + struct rtnl_af_ops *ops; ASSERT_RTNL(); - list_for_each_entry(ops, &rtnl_af_ops, list) { - if (ops->family == family) - return ops; + rcu_read_lock(); + + list_for_each_entry_rcu(ops, &rtnl_af_ops, list) { + if (ops->family == family) { + *srcu_index = srcu_read_lock(&ops->srcu); + goto unlock; + } } - return NULL; + ops = NULL; +unlock: + rcu_read_unlock(); + + return ops; +} + +static void rtnl_af_put(struct rtnl_af_ops *ops, int srcu_index) +{ + srcu_read_unlock(&ops->srcu, srcu_index); } /** @@ -688,6 +701,11 @@ static const struct rtnl_af_ops *rtnl_af_lookup(const int family) */ int rtnl_af_register(struct rtnl_af_ops *ops) { + int err = init_srcu_struct(&ops->srcu); + + if (err) + return err; + rtnl_lock(); list_add_tail_rcu(&ops->list, &rtnl_af_ops); rtnl_unlock(); @@ -707,6 +725,8 @@ void rtnl_af_unregister(struct rtnl_af_ops *ops) rtnl_unlock(); synchronize_rcu(); + synchronize_srcu(&ops->srcu); + cleanup_srcu_struct(&ops->srcu); } EXPORT_SYMBOL_GPL(rtnl_af_unregister); @@ -2579,20 +2599,24 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[], int rem, err; nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) { - const struct rtnl_af_ops *af_ops; + struct rtnl_af_ops *af_ops; + int af_ops_srcu_index; - af_ops = rtnl_af_lookup(nla_type(af)); + af_ops = rtnl_af_lookup(nla_type(af), &af_ops_srcu_index); if (!af_ops) return -EAFNOSUPPORT; if (!af_ops->set_link_af) - return -EOPNOTSUPP; - - if (af_ops->validate_link_af) { + err = -EOPNOTSUPP; + else if (af_ops->validate_link_af) err = af_ops->validate_link_af(dev, af, extack); - if (err < 0) - return err; - } + else + err = 0; + + rtnl_af_put(af_ops, af_ops_srcu_index); + + if (err < 0) + return err; } } @@ -3172,11 +3196,18 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev, int rem; nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) { - const struct rtnl_af_ops *af_ops; + struct rtnl_af_ops *af_ops; + int af_ops_srcu_index; - BUG_ON(!(af_ops = rtnl_af_lookup(nla_type(af)))); + af_ops = rtnl_af_lookup(nla_type(af), &af_ops_srcu_index); + if (!af_ops) { + err = -EAFNOSUPPORT; + goto errout; + } err = af_ops->set_link_af(dev, af, extack); + rtnl_af_put(af_ops, af_ops_srcu_index); + if (err < 0) goto errout; -- cgit v1.2.3 From 6474353a5e3d0b2cf610153cea0c61f576a36d0a Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 25 Sep 2024 11:05:16 +0200 Subject: epoll: annotate racy check Epoll relies on a racy fastpath check during __fput() in eventpoll_release() to avoid the hit of pointlessly acquiring a semaphore. Annotate that race by using WRITE_ONCE() and READ_ONCE(). Link: https://lore.kernel.org/r/66edfb3c.050a0220.3195df.001a.GAE@google.com Link: https://lore.kernel.org/r/20240925-fungieren-anbauen-79b334b00542@brauner Reviewed-by: Jan Kara Reported-by: syzbot+3b6b32dc50537a49bb4a@syzkaller.appspotmail.com Signed-off-by: Christian Brauner --- fs/eventpoll.c | 6 ++++-- include/linux/eventpoll.h | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 1ae4542f0bd8..90fbab6b6f03 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -823,7 +823,8 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force) to_free = NULL; head = file->f_ep; if (head->first == &epi->fllink && !epi->fllink.next) { - file->f_ep = NULL; + /* See eventpoll_release() for details. */ + WRITE_ONCE(file->f_ep, NULL); if (!is_file_epoll(file)) { struct epitems_head *v; v = container_of(head, struct epitems_head, epitems); @@ -1603,7 +1604,8 @@ allocate: spin_unlock(&file->f_lock); goto allocate; } - file->f_ep = head; + /* See eventpoll_release() for details. */ + WRITE_ONCE(file->f_ep, head); to_free = NULL; } hlist_add_head_rcu(&epi->fllink, file->f_ep); diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index 3337745d81bd..0c0d00fcd131 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -42,7 +42,7 @@ static inline void eventpoll_release(struct file *file) * because the file in on the way to be removed and nobody ( but * eventpoll ) has still a reference to this file. */ - if (likely(!file->f_ep)) + if (likely(!READ_ONCE(file->f_ep))) return; /* -- cgit v1.2.3 From 900bbaae67e980945dec74d36f8afe0de7556d5a Mon Sep 17 00:00:00 2001 From: Xuewen Yan Date: Fri, 26 Apr 2024 16:05:48 +0800 Subject: epoll: Add synchronous wakeup support for ep_poll_callback Now, the epoll only use wake_up() interface to wake up task. However, sometimes, there are epoll users which want to use the synchronous wakeup flag to hint the scheduler, such as Android binder driver. So add a wake_up_sync() define, and use the wake_up_sync() when the sync is true in ep_poll_callback(). Co-developed-by: Jing Xia Signed-off-by: Jing Xia Signed-off-by: Xuewen Yan Link: https://lore.kernel.org/r/20240426080548.8203-1-xuewen.yan@unisoc.com Tested-by: Brian Geffon Reviewed-by: Brian Geffon Reported-by: Benoit Lize Signed-off-by: Christian Brauner --- fs/eventpoll.c | 5 ++++- include/linux/wait.h | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 90fbab6b6f03..1a06e462b6ef 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1373,7 +1373,10 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v break; } } - wake_up(&ep->wq); + if (sync) + wake_up_sync(&ep->wq); + else + wake_up(&ep->wq); } if (waitqueue_active(&ep->poll_wait)) pwake++; diff --git a/include/linux/wait.h b/include/linux/wait.h index 8aa3372f21a0..2b322a9b88a2 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -221,6 +221,7 @@ void __wake_up_pollfree(struct wait_queue_head *wq_head); #define wake_up_all(x) __wake_up(x, TASK_NORMAL, 0, NULL) #define wake_up_locked(x) __wake_up_locked((x), TASK_NORMAL, 1) #define wake_up_all_locked(x) __wake_up_locked((x), TASK_NORMAL, 0) +#define wake_up_sync(x) __wake_up_sync(x, TASK_NORMAL) #define wake_up_interruptible(x) __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL) #define wake_up_interruptible_nr(x, nr) __wake_up(x, TASK_INTERRUPTIBLE, nr, NULL) -- cgit v1.2.3 From 99bdadbde9c418f29b78b7241732268dbc0a05cc Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Tue, 15 Oct 2024 22:21:54 +0200 Subject: acl: Realign struct posix_acl to save 8 bytes Reduce posix_acl's struct size by 8 bytes by realigning its members. Cc: Christian Brauner Reviewed-by: Jan Kara Signed-off-by: Thorsten Blum Link: https://lore.kernel.org/r/20241015202158.2376-1-thorsten.blum@linux.dev Signed-off-by: Christian Brauner --- include/linux/posix_acl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 0e65b3d634d9..2d6a4badd306 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -28,8 +28,8 @@ struct posix_acl_entry { struct posix_acl { refcount_t a_refcount; - struct rcu_head a_rcu; unsigned int a_count; + struct rcu_head a_rcu; struct posix_acl_entry a_entries[]; }; -- cgit v1.2.3 From 8c6e03ffedc5463d1aa1ba89f6ceb082518a3520 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Fri, 18 Oct 2024 14:14:21 +0200 Subject: acl: Annotate struct posix_acl with __counted_by() Add the __counted_by compiler attribute to the flexible array member a_entries to improve access bounds-checking via CONFIG_UBSAN_BOUNDS and CONFIG_FORTIFY_SOURCE. Use struct_size() to calculate the number of bytes to allocate for new and cloned acls and remove the local size variables. Change the posix_acl_alloc() function parameter count from int to unsigned int to match posix_acl's a_count data type. Add identifier names to the function definition to silence two checkpatch warnings. Reviewed-by: Jan Kara Signed-off-by: Thorsten Blum Link: https://lore.kernel.org/r/20241018121426.155247-2-thorsten.blum@linux.dev Cc: Nathan Chancellor Signed-off-by: Christian Brauner --- fs/posix_acl.c | 13 ++++++------- include/linux/posix_acl.h | 4 ++-- 2 files changed, 8 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 6c66a37522d0..4050942ab52f 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -200,11 +200,11 @@ EXPORT_SYMBOL(posix_acl_init); * Allocate a new ACL with the specified number of entries. */ struct posix_acl * -posix_acl_alloc(int count, gfp_t flags) +posix_acl_alloc(unsigned int count, gfp_t flags) { - const size_t size = sizeof(struct posix_acl) + - count * sizeof(struct posix_acl_entry); - struct posix_acl *acl = kmalloc(size, flags); + struct posix_acl *acl; + + acl = kmalloc(struct_size(acl, a_entries, count), flags); if (acl) posix_acl_init(acl, count); return acl; @@ -220,9 +220,8 @@ posix_acl_clone(const struct posix_acl *acl, gfp_t flags) struct posix_acl *clone = NULL; if (acl) { - int size = sizeof(struct posix_acl) + acl->a_count * - sizeof(struct posix_acl_entry); - clone = kmemdup(acl, size, flags); + clone = kmemdup(acl, struct_size(acl, a_entries, acl->a_count), + flags); if (clone) refcount_set(&clone->a_refcount, 1); } diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 2d6a4badd306..e2d47eb1a7f3 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -30,7 +30,7 @@ struct posix_acl { refcount_t a_refcount; unsigned int a_count; struct rcu_head a_rcu; - struct posix_acl_entry a_entries[]; + struct posix_acl_entry a_entries[] __counted_by(a_count); }; #define FOREACH_ACL_ENTRY(pa, acl, pe) \ @@ -62,7 +62,7 @@ posix_acl_release(struct posix_acl *acl) /* posix_acl.c */ extern void posix_acl_init(struct posix_acl *, int); -extern struct posix_acl *posix_acl_alloc(int, gfp_t); +extern struct posix_acl *posix_acl_alloc(unsigned int count, gfp_t flags); extern struct posix_acl *posix_acl_from_mode(umode_t, gfp_t); extern int posix_acl_equiv_mode(const struct posix_acl *, umode_t *); extern int __posix_acl_create(struct posix_acl **, gfp_t, umode_t *); -- cgit v1.2.3 From bc2bb732162fb183e5c8df9d42604ddb7ec36e8b Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 18 Oct 2024 11:33:24 +0300 Subject: dt-bindings: interconnect: qcom: document SAR2130P NoC Add bindings for the Network of Connects (NoC) present on the Qualcomm SAR2130P platform. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20241018-sar2130p-icc-v2-1-c58c73dcd19d@linaro.org Signed-off-by: Georgi Djakov --- .../bindings/interconnect/qcom,sar2130p-rpmh.yaml | 117 ++++++++++++++++++ .../dt-bindings/interconnect/qcom,sar2130p-rpmh.h | 137 +++++++++++++++++++++ 2 files changed, 254 insertions(+) create mode 100644 Documentation/devicetree/bindings/interconnect/qcom,sar2130p-rpmh.yaml create mode 100644 include/dt-bindings/interconnect/qcom,sar2130p-rpmh.h (limited to 'include') diff --git a/Documentation/devicetree/bindings/interconnect/qcom,sar2130p-rpmh.yaml b/Documentation/devicetree/bindings/interconnect/qcom,sar2130p-rpmh.yaml new file mode 100644 index 000000000000..4647dac740e9 --- /dev/null +++ b/Documentation/devicetree/bindings/interconnect/qcom,sar2130p-rpmh.yaml @@ -0,0 +1,117 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/interconnect/qcom,sar2130p-rpmh.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm RPMh Network-On-Chip Interconnect on SAR2130P + +maintainers: + - Dmitry Baryshkov + - Georgi Djakov + +description: | + RPMh interconnect providers support system bandwidth requirements through + RPMh hardware accelerators known as Bus Clock Manager (BCM). The provider is + able to communicate with the BCM through the Resource State Coordinator (RSC) + associated with each execution environment. Provider nodes must point to at + least one RPMh device child node pertaining to their RSC and each provider + can map to multiple RPMh resources. + + See also:: include/dt-bindings/interconnect/qcom,sar2130p-rpmh.h + +properties: + compatible: + enum: + - qcom,sar2130p-clk-virt + - qcom,sar2130p-config-noc + - qcom,sar2130p-gem-noc + - qcom,sar2130p-lpass-ag-noc + - qcom,sar2130p-mc-virt + - qcom,sar2130p-mmss-noc + - qcom,sar2130p-nsp-noc + - qcom,sar2130p-pcie-anoc + - qcom,sar2130p-system-noc + + reg: + maxItems: 1 + + clocks: + minItems: 1 + maxItems: 2 + +required: + - compatible + +allOf: + - $ref: qcom,rpmh-common.yaml# + - if: + properties: + compatible: + contains: + enum: + - qcom,sar2130p-clk-virt + - qcom,sar2130p-mc-virt + then: + properties: + reg: false + else: + required: + - reg + + - if: + properties: + compatible: + contains: + enum: + - qcom,sar2130p-pcie-anoc + then: + properties: + clocks: + items: + - description: aggre-NOC PCIe AXI clock + - description: cfg-NOC PCIe a-NOC AHB clock + + - if: + properties: + compatible: + contains: + enum: + - qcom,sar2130p-system-noc + then: + properties: + clocks: + items: + - description: aggre USB3 PRIM AXI clock + + - if: + properties: + compatible: + contains: + enum: + - qcom,sar2130p-system-noc + - qcom,sar2130p-pcie-anoc + then: + required: + - clocks + else: + properties: + clocks: false + +unevaluatedProperties: false + +examples: + - | + clk_virt: interconnect-0 { + compatible = "qcom,sar2130p-clk-virt"; + #interconnect-cells = <2>; + qcom,bcm-voters = <&apps_bcm_voter>; + }; + + aggre1_noc: interconnect@1680000 { + compatible = "qcom,sar2130p-system-noc"; + reg = <0x01680000 0x29080>; + #interconnect-cells = <2>; + clocks = <&gcc_prim_axi_clk>; + qcom,bcm-voters = <&apps_bcm_voter>; + }; diff --git a/include/dt-bindings/interconnect/qcom,sar2130p-rpmh.h b/include/dt-bindings/interconnect/qcom,sar2130p-rpmh.h new file mode 100644 index 000000000000..aec7cbb7cd70 --- /dev/null +++ b/include/dt-bindings/interconnect/qcom,sar2130p-rpmh.h @@ -0,0 +1,137 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2022, Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2024, Linaro Ltd. + */ + +#ifndef __DT_BINDINGS_INTERCONNECT_QCOM_SAR2130P_H +#define __DT_BINDINGS_INTERCONNECT_QCOM_SAR2130P_H + +#define MASTER_QUP_CORE_0 0 +#define MASTER_QUP_CORE_1 1 +#define SLAVE_QUP_CORE_0 2 +#define SLAVE_QUP_CORE_1 3 + +#define MASTER_GEM_NOC_CNOC 0 +#define MASTER_GEM_NOC_PCIE_SNOC 1 +#define MASTER_QDSS_DAP 2 +#define SLAVE_AHB2PHY_SOUTH 3 +#define SLAVE_AOSS 4 +#define SLAVE_CAMERA_CFG 5 +#define SLAVE_CLK_CTL 6 +#define SLAVE_CDSP_CFG 7 +#define SLAVE_RBCPR_CX_CFG 8 +#define SLAVE_RBCPR_MMCX_CFG 9 +#define SLAVE_RBCPR_MXA_CFG 10 +#define SLAVE_RBCPR_MXC_CFG 11 +#define SLAVE_CPR_NSPCX 12 +#define SLAVE_CRYPTO_0_CFG 13 +#define SLAVE_CX_RDPM 14 +#define SLAVE_DISPLAY_CFG 15 +#define SLAVE_GFX3D_CFG 16 +#define SLAVE_IMEM_CFG 17 +#define SLAVE_IPC_ROUTER_CFG 18 +#define SLAVE_LPASS 19 +#define SLAVE_MX_RDPM 20 +#define SLAVE_PCIE_0_CFG 21 +#define SLAVE_PCIE_1_CFG 22 +#define SLAVE_PDM 23 +#define SLAVE_PIMEM_CFG 24 +#define SLAVE_PRNG 25 +#define SLAVE_QDSS_CFG 26 +#define SLAVE_QSPI_0 27 +#define SLAVE_QUP_0 28 +#define SLAVE_QUP_1 29 +#define SLAVE_SDCC_1 30 +#define SLAVE_TCSR 31 +#define SLAVE_TLMM 32 +#define SLAVE_TME_CFG 33 +#define SLAVE_USB3_0 34 +#define SLAVE_VENUS_CFG 35 +#define SLAVE_VSENSE_CTRL_CFG 36 +#define SLAVE_WLAN_Q6_CFG 37 +#define SLAVE_DDRSS_CFG 38 +#define SLAVE_CNOC_MNOC_CFG 39 +#define SLAVE_SNOC_CFG 40 +#define SLAVE_IMEM 41 +#define SLAVE_PIMEM 42 +#define SLAVE_SERVICE_CNOC 43 +#define SLAVE_PCIE_0 44 +#define SLAVE_PCIE_1 45 +#define SLAVE_QDSS_STM 46 +#define SLAVE_TCU 47 + +#define MASTER_GPU_TCU 0 +#define MASTER_SYS_TCU 1 +#define MASTER_APPSS_PROC 2 +#define MASTER_GFX3D 3 +#define MASTER_MNOC_HF_MEM_NOC 4 +#define MASTER_MNOC_SF_MEM_NOC 5 +#define MASTER_COMPUTE_NOC 6 +#define MASTER_ANOC_PCIE_GEM_NOC 7 +#define MASTER_SNOC_GC_MEM_NOC 8 +#define MASTER_SNOC_SF_MEM_NOC 9 +#define MASTER_WLAN_Q6 10 +#define SLAVE_GEM_NOC_CNOC 11 +#define SLAVE_LLCC 12 +#define SLAVE_MEM_NOC_PCIE_SNOC 13 + +#define MASTER_CNOC_LPASS_AG_NOC 0 +#define MASTER_LPASS_PROC 1 +#define SLAVE_LPASS_CORE_CFG 2 +#define SLAVE_LPASS_LPI_CFG 3 +#define SLAVE_LPASS_MPU_CFG 4 +#define SLAVE_LPASS_TOP_CFG 5 +#define SLAVE_LPASS_SNOC 6 +#define SLAVE_SERVICES_LPASS_AML_NOC 7 +#define SLAVE_SERVICE_LPASS_AG_NOC 8 + +#define MASTER_LLCC 0 +#define SLAVE_EBI1 1 + +#define MASTER_CAMNOC_HF 0 +#define MASTER_CAMNOC_ICP 1 +#define MASTER_CAMNOC_SF 2 +#define MASTER_LSR 3 +#define MASTER_MDP 4 +#define MASTER_CNOC_MNOC_CFG 5 +#define MASTER_VIDEO 6 +#define MASTER_VIDEO_CV_PROC 7 +#define MASTER_VIDEO_PROC 8 +#define MASTER_VIDEO_V_PROC 9 +#define SLAVE_MNOC_HF_MEM_NOC 10 +#define SLAVE_MNOC_SF_MEM_NOC 11 +#define SLAVE_SERVICE_MNOC 12 + +#define MASTER_CDSP_NOC_CFG 0 +#define MASTER_CDSP_PROC 1 +#define SLAVE_CDSP_MEM_NOC 2 +#define SLAVE_SERVICE_NSP_NOC 3 + +#define MASTER_PCIE_0 0 +#define MASTER_PCIE_1 1 +#define SLAVE_ANOC_PCIE_GEM_NOC 2 + +#define MASTER_GIC_AHB 0 +#define MASTER_QDSS_BAM 1 +#define MASTER_QSPI_0 2 +#define MASTER_QUP_0 3 +#define MASTER_QUP_1 4 +#define MASTER_A2NOC_SNOC 5 +#define MASTER_CNOC_DATAPATH 6 +#define MASTER_LPASS_ANOC 7 +#define MASTER_SNOC_CFG 8 +#define MASTER_CRYPTO 9 +#define MASTER_PIMEM 10 +#define MASTER_GIC 11 +#define MASTER_QDSS_ETR 12 +#define MASTER_QDSS_ETR_1 13 +#define MASTER_SDCC_1 14 +#define MASTER_USB3_0 15 +#define SLAVE_A2NOC_SNOC 16 +#define SLAVE_SNOC_GEM_NOC_GC 17 +#define SLAVE_SNOC_GEM_NOC_SF 18 +#define SLAVE_SERVICE_SNOC 19 + +#endif -- cgit v1.2.3 From c16a7d3ef029494a6f5b5cd20606dd08aeaa2b0f Mon Sep 17 00:00:00 2001 From: Yassine Oudjana Date: Thu, 17 Oct 2024 11:51:34 +0300 Subject: dt-bindings: power: Add binding for MediaTek MT6735 power controller Add DT binding for MediaTek MT6735 SCPSYS power controller. Signed-off-by: Yassine Oudjana Reviewed-by: AngeloGioacchino Del Regno Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20241017085136.68053-2-y.oudjana@protonmail.com Signed-off-by: Ulf Hansson --- .../bindings/power/mediatek,power-controller.yaml | 1 + Documentation/devicetree/bindings/soc/mediatek/scpsys.txt | 1 + .../dt-bindings/power/mediatek,mt6735-power-controller.h | 14 ++++++++++++++ 3 files changed, 16 insertions(+) create mode 100644 include/dt-bindings/power/mediatek,mt6735-power-controller.h (limited to 'include') diff --git a/Documentation/devicetree/bindings/power/mediatek,power-controller.yaml b/Documentation/devicetree/bindings/power/mediatek,power-controller.yaml index 8985e2df8a56..6d37c06b2f65 100644 --- a/Documentation/devicetree/bindings/power/mediatek,power-controller.yaml +++ b/Documentation/devicetree/bindings/power/mediatek,power-controller.yaml @@ -23,6 +23,7 @@ properties: compatible: enum: + - mediatek,mt6735-power-controller - mediatek,mt6795-power-controller - mediatek,mt8167-power-controller - mediatek,mt8173-power-controller diff --git a/Documentation/devicetree/bindings/soc/mediatek/scpsys.txt b/Documentation/devicetree/bindings/soc/mediatek/scpsys.txt index 2bc367793aec..3530a6668b48 100644 --- a/Documentation/devicetree/bindings/soc/mediatek/scpsys.txt +++ b/Documentation/devicetree/bindings/soc/mediatek/scpsys.txt @@ -20,6 +20,7 @@ Required properties: - compatible: Should be one of: - "mediatek,mt2701-scpsys" - "mediatek,mt2712-scpsys" + - "mediatek,mt6735-scpsys" - "mediatek,mt6765-scpsys" - "mediatek,mt6797-scpsys" - "mediatek,mt7622-scpsys" diff --git a/include/dt-bindings/power/mediatek,mt6735-power-controller.h b/include/dt-bindings/power/mediatek,mt6735-power-controller.h new file mode 100644 index 000000000000..6957075fcb9e --- /dev/null +++ b/include/dt-bindings/power/mediatek,mt6735-power-controller.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ + +#ifndef _DT_BINDINGS_POWER_MT6735_POWER_CONTROLLER_H +#define _DT_BINDINGS_POWER_MT6735_POWER_CONTROLLER_H + +#define MT6735_POWER_DOMAIN_MD1 0 +#define MT6735_POWER_DOMAIN_CONN 1 +#define MT6735_POWER_DOMAIN_DIS 2 +#define MT6735_POWER_DOMAIN_MFG 3 +#define MT6735_POWER_DOMAIN_ISP 4 +#define MT6735_POWER_DOMAIN_VDE 5 +#define MT6735_POWER_DOMAIN_VEN 6 + +#endif -- cgit v1.2.3 From c2114a0d17631497df99388905353c11f4d5f0dd Mon Sep 17 00:00:00 2001 From: Yassine Oudjana Date: Thu, 17 Oct 2024 11:51:35 +0300 Subject: pmdomain: mediatek: Add support for MT6735 Add support for SCPSYS power domains of MT6735. All non-CPU power domains are added except for MD2 (C2K modem), which is left out due to issues with powering it on. Signed-off-by: Yassine Oudjana Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20241017085136.68053-3-y.oudjana@protonmail.com Signed-off-by: Ulf Hansson --- drivers/pmdomain/mediatek/mt6735-pm-domains.h | 96 +++++++++++++++++++++++++++ drivers/pmdomain/mediatek/mtk-pm-domains.c | 5 ++ drivers/pmdomain/mediatek/mtk-pm-domains.h | 2 + include/linux/soc/mediatek/infracfg.h | 5 ++ 4 files changed, 108 insertions(+) create mode 100644 drivers/pmdomain/mediatek/mt6735-pm-domains.h (limited to 'include') diff --git a/drivers/pmdomain/mediatek/mt6735-pm-domains.h b/drivers/pmdomain/mediatek/mt6735-pm-domains.h new file mode 100644 index 000000000000..71896be68e22 --- /dev/null +++ b/drivers/pmdomain/mediatek/mt6735-pm-domains.h @@ -0,0 +1,96 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __SOC_MEDIATEK_MT6735_PM_DOMAINS_H +#define __SOC_MEDIATEK_MT6735_PM_DOMAINS_H + +#include "mtk-pm-domains.h" +#include + +/* + * MT6735 power domain support + */ + +static const struct scpsys_domain_data scpsys_domain_data_mt6735[] = { + [MT6735_POWER_DOMAIN_MD1] = { + .name = "md1", + .sta_mask = PWR_STATUS_MD1, + .ctl_offs = SPM_MD1_PWR_CON, + .pwr_sta_offs = SPM_PWR_STATUS, + .pwr_sta2nd_offs = SPM_PWR_STATUS_2ND, + .sram_pdn_bits = GENMASK(8, 8), + .sram_pdn_ack_bits = 0, + .bp_cfg = { + BUS_PROT_INFRA_UPDATE_TOPAXI(MT6735_TOP_AXI_PROT_EN_MD1), + }, + }, + [MT6735_POWER_DOMAIN_CONN] = { + .name = "conn", + .sta_mask = PWR_STATUS_CONN, + .ctl_offs = SPM_CONN_PWR_CON, + .pwr_sta_offs = SPM_PWR_STATUS, + .pwr_sta2nd_offs = SPM_PWR_STATUS_2ND, + .sram_pdn_bits = GENMASK(8, 8), + .sram_pdn_ack_bits = 0, + .bp_cfg = { + BUS_PROT_INFRA_UPDATE_TOPAXI(MT6735_TOP_AXI_PROT_EN_CONN), + }, + }, + [MT6735_POWER_DOMAIN_DIS] = { + .name = "dis", + .sta_mask = PWR_STATUS_DISP, + .ctl_offs = SPM_DIS_PWR_CON, + .pwr_sta_offs = SPM_PWR_STATUS, + .pwr_sta2nd_offs = SPM_PWR_STATUS_2ND, + .sram_pdn_bits = GENMASK(11, 8), + .sram_pdn_ack_bits = GENMASK(12, 12), + .bp_cfg = { + BUS_PROT_INFRA_UPDATE_TOPAXI(MT8173_TOP_AXI_PROT_EN_MM_M0), + }, + }, + [MT6735_POWER_DOMAIN_MFG] = { + .name = "mfg", + .sta_mask = PWR_STATUS_MFG, + .ctl_offs = SPM_MFG_PWR_CON, + .pwr_sta_offs = SPM_PWR_STATUS, + .pwr_sta2nd_offs = SPM_PWR_STATUS_2ND, + .sram_pdn_bits = GENMASK(11, 8), + .sram_pdn_ack_bits = GENMASK(12, 12), + .bp_cfg = { + BUS_PROT_INFRA_UPDATE_TOPAXI(MT8173_TOP_AXI_PROT_EN_MFG_S), + }, + }, + [MT6735_POWER_DOMAIN_ISP] = { + .name = "isp", + .sta_mask = PWR_STATUS_ISP, + .ctl_offs = SPM_ISP_PWR_CON, + .pwr_sta_offs = SPM_PWR_STATUS, + .pwr_sta2nd_offs = SPM_PWR_STATUS_2ND, + .sram_pdn_bits = GENMASK(11, 8), + .sram_pdn_ack_bits = GENMASK(13, 12), + }, + [MT6735_POWER_DOMAIN_VDE] = { + .name = "vde", + .sta_mask = PWR_STATUS_VDEC, + .ctl_offs = SPM_VDE_PWR_CON, + .pwr_sta_offs = SPM_PWR_STATUS, + .pwr_sta2nd_offs = SPM_PWR_STATUS_2ND, + .sram_pdn_bits = GENMASK(11, 8), + .sram_pdn_ack_bits = GENMASK(12, 12), + }, + [MT6735_POWER_DOMAIN_VEN] = { + .name = "ven", + .sta_mask = BIT(8), + .ctl_offs = SPM_VEN_PWR_CON, + .pwr_sta_offs = SPM_PWR_STATUS, + .pwr_sta2nd_offs = SPM_PWR_STATUS_2ND, + .sram_pdn_bits = GENMASK(11, 8), + .sram_pdn_ack_bits = GENMASK(15, 12), + }, +}; + +static const struct scpsys_soc_data mt6735_scpsys_data = { + .domains_data = scpsys_domain_data_mt6735, + .num_domains = ARRAY_SIZE(scpsys_domain_data_mt6735), +}; + +#endif /* __SOC_MEDIATEK_MT6735_PM_DOMAINS_H */ diff --git a/drivers/pmdomain/mediatek/mtk-pm-domains.c b/drivers/pmdomain/mediatek/mtk-pm-domains.c index 3580913f25d3..b866b006af69 100644 --- a/drivers/pmdomain/mediatek/mtk-pm-domains.c +++ b/drivers/pmdomain/mediatek/mtk-pm-domains.c @@ -16,6 +16,7 @@ #include #include +#include "mt6735-pm-domains.h" #include "mt6795-pm-domains.h" #include "mt8167-pm-domains.h" #include "mt8173-pm-domains.h" @@ -608,6 +609,10 @@ static void scpsys_domain_cleanup(struct scpsys *scpsys) } static const struct of_device_id scpsys_of_match[] = { + { + .compatible = "mediatek,mt6735-power-controller", + .data = &mt6735_scpsys_data, + }, { .compatible = "mediatek,mt6795-power-controller", .data = &mt6795_scpsys_data, diff --git a/drivers/pmdomain/mediatek/mtk-pm-domains.h b/drivers/pmdomain/mediatek/mtk-pm-domains.h index aaba5e6b0536..2ac96804b985 100644 --- a/drivers/pmdomain/mediatek/mtk-pm-domains.h +++ b/drivers/pmdomain/mediatek/mtk-pm-domains.h @@ -21,6 +21,7 @@ #define SPM_ISP_PWR_CON 0x0238 #define SPM_DIS_PWR_CON 0x023c #define SPM_CONN_PWR_CON 0x0280 +#define SPM_MD1_PWR_CON 0x0284 #define SPM_VEN2_PWR_CON 0x0298 #define SPM_AUDIO_PWR_CON 0x029c #define SPM_MFG_2D_PWR_CON 0x02c0 @@ -30,6 +31,7 @@ #define SPM_PWR_STATUS 0x060c #define SPM_PWR_STATUS_2ND 0x0610 +#define PWR_STATUS_MD1 BIT(0) #define PWR_STATUS_CONN BIT(1) #define PWR_STATUS_DISP BIT(3) #define PWR_STATUS_MFG BIT(4) diff --git a/include/linux/soc/mediatek/infracfg.h b/include/linux/soc/mediatek/infracfg.h index 6c6cccc848f4..9956e18c5ffa 100644 --- a/include/linux/soc/mediatek/infracfg.h +++ b/include/linux/soc/mediatek/infracfg.h @@ -434,6 +434,11 @@ #define MT7622_TOP_AXI_PROT_EN_WB (BIT(2) | BIT(6) | \ BIT(7) | BIT(8)) +#define MT6735_TOP_AXI_PROT_EN_CONN (BIT(2) | BIT(8)) +#define MT6735_TOP_AXI_PROT_EN_MD1 (BIT(24) | BIT(25) | \ + BIT(26) | BIT(27) | \ + BIT(28)) + #define INFRA_TOPAXI_PROTECTEN 0x0220 #define INFRA_TOPAXI_PROTECTSTA1 0x0228 #define INFRA_TOPAXI_PROTECTEN_SET 0x0260 -- cgit v1.2.3 From d811ac148f0afd2f3f7e1cd7f54de8da973ec5e3 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 15 Oct 2024 15:56:35 +0200 Subject: virtchnl: fix m68k build. The kernel test robot reported a build failure on m68k in the intel driver due to the recent shapers-related changes. The mentioned arch has funny alignment properties, let's be explicit about the binary layout expectation introducing a padding field. Fixes: 608a5c05c39b ("virtchnl: support queue rate limit and quanta size configuration") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202410131710.71Wt6LKO-lkp@intel.com/ Reviewed-by: Alexander Lobakin Reviewed-by: Paul Menzel Reviewed-by: Jacob Keller Link: https://patch.msgid.link/e45d1c9f17356d431b03b419f60b8b763d2ff768.1729000481.git.pabeni@redhat.com Signed-off-by: Paolo Abeni --- include/linux/avf/virtchnl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 223e433c39fe..13a11f3c09b8 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -1499,6 +1499,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_queue_chunk); struct virtchnl_quanta_cfg { u16 quanta_size; + u16 pad; struct virtchnl_queue_chunk queue_select; }; -- cgit v1.2.3 From 2c50ec98fc6cab28df35e0a22a2bcc7957d9d0ab Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 2 Oct 2024 14:06:47 -0600 Subject: block: remove redundant passthrough check in blk_mq_need_time_stamp() Simply checking the rq_flags is enough to determine if accounting is being done for this request. Reviewed-by: Keith Busch Reviewed-by: Anuj Gupta Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 4fecf46ef681..59e9adf815a4 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -857,12 +857,6 @@ void blk_mq_end_request_batch(struct io_comp_batch *ib); */ static inline bool blk_mq_need_time_stamp(struct request *rq) { - /* - * passthrough io doesn't use iostat accounting, cgroup stats - * and io scheduler functionalities. - */ - if (blk_rq_is_passthrough(rq)) - return false; return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_USE_SCHED)); } -- cgit v1.2.3 From 9dfd9ea93aeab57d897bb7fc7c0707f26b0b9af8 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Thu, 3 Oct 2024 00:11:43 +0200 Subject: block: introduce add_disk_fwnode() Introduce add_disk_fwnode() as a replacement of device_add_disk() that permits to pass and attach a fwnode to disk dev. This variant can be useful for eMMC that might have the partition table for the disk defined in DT. A parser can later make use of the attached fwnode to parse the related table and init the hardcoded partition for the disk. device_add_disk() is converted to a simple wrapper of add_disk_fwnode() with the fwnode entry set as NULL. Signed-off-by: Christian Marangi Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20241002221306.4403-4-ansuelsmth@gmail.com Signed-off-by: Jens Axboe --- block/genhd.c | 28 ++++++++++++++++++++++++---- include/linux/blkdev.h | 3 +++ 2 files changed, 27 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/block/genhd.c b/block/genhd.c index 1c05dd4c6980..bc30eee7ab16 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -383,16 +383,18 @@ int disk_scan_partitions(struct gendisk *disk, blk_mode_t mode) } /** - * device_add_disk - add disk information to kernel list + * add_disk_fwnode - add disk information to kernel list with fwnode * @parent: parent device for the disk * @disk: per-device partitioning information * @groups: Additional per-device sysfs groups + * @fwnode: attached disk fwnode * * This function registers the partitioning information in @disk - * with the kernel. + * with the kernel. Also attach a fwnode to the disk device. */ -int __must_check device_add_disk(struct device *parent, struct gendisk *disk, - const struct attribute_group **groups) +int __must_check add_disk_fwnode(struct device *parent, struct gendisk *disk, + const struct attribute_group **groups, + struct fwnode_handle *fwnode) { struct device *ddev = disk_to_dev(disk); @@ -452,6 +454,8 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk, ddev->parent = parent; ddev->groups = groups; dev_set_name(ddev, "%s", disk->disk_name); + if (fwnode) + device_set_node(ddev, fwnode); if (!(disk->flags & GENHD_FL_HIDDEN)) ddev->devt = MKDEV(disk->major, disk->first_minor); ret = device_add(ddev); @@ -553,6 +557,22 @@ out_exit_elevator: elevator_exit(disk->queue); return ret; } +EXPORT_SYMBOL_GPL(add_disk_fwnode); + +/** + * device_add_disk - add disk information to kernel list + * @parent: parent device for the disk + * @disk: per-device partitioning information + * @groups: Additional per-device sysfs groups + * + * This function registers the partitioning information in @disk + * with the kernel. + */ +int __must_check device_add_disk(struct device *parent, struct gendisk *disk, + const struct attribute_group **groups) +{ + return add_disk_fwnode(parent, disk, groups, NULL); +} EXPORT_SYMBOL(device_add_disk); static void blk_report_disk_dead(struct gendisk *disk, bool surprise) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 50c3b959da28..a6aae750b4ac 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -725,6 +725,9 @@ static inline unsigned int blk_queue_depth(struct request_queue *q) #define for_each_bio(_bio) \ for (; _bio; _bio = _bio->bi_next) +int __must_check add_disk_fwnode(struct device *parent, struct gendisk *disk, + const struct attribute_group **groups, + struct fwnode_handle *fwnode); int __must_check device_add_disk(struct device *parent, struct gendisk *disk, const struct attribute_group **groups); static inline int __must_check add_disk(struct gendisk *disk) -- cgit v1.2.3 From 110234da18ab482f6f583d28eff26b9569bf5622 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 7 Oct 2024 08:32:35 -0700 Subject: block: enable passthrough command statistics Applications using the passthrough interfaces for IO want to continue seeing the disk stats. These requests had been fenced off from this block layer feature. While the block layer doesn't necessarily know what a passthrough command does, we do know the data size and direction, which is enough to account for the command's stats. Since tracking these has the potential to produce unexpected results, the passthrough stats are locked behind a new queue flag that needs to be enabled with the /sys/block//queue/iostats_passthrough attribute. Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20241007153236.2818562-1-kbusch@meta.com Signed-off-by: Jens Axboe --- Documentation/ABI/stable/sysfs-block | 7 +++++++ block/blk-mq.c | 32 +++++++++++++++++++++++++++++++- block/blk-sysfs.c | 30 ++++++++++++++++++++++++++++++ include/linux/blkdev.h | 5 +++++ 4 files changed, 73 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block index cea8856f798d..835361110715 100644 --- a/Documentation/ABI/stable/sysfs-block +++ b/Documentation/ABI/stable/sysfs-block @@ -424,6 +424,13 @@ Description: [RW] This file is used to control (on/off) the iostats accounting of the disk. +What: /sys/block//queue/iostats_passthrough +Date: October 2024 +Contact: linux-block@vger.kernel.org +Description: + [RW] This file is used to control (on/off) the iostats + accounting of the disk for passthrough commands. + What: /sys/block//queue/logical_block_size Date: May 2009 diff --git a/block/blk-mq.c b/block/blk-mq.c index f4f78c03f735..7d05a56e3639 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -988,13 +988,43 @@ static inline void blk_account_io_done(struct request *req, u64 now) } } +static inline bool blk_rq_passthrough_stats(struct request *req) +{ + struct bio *bio = req->bio; + + if (!blk_queue_passthrough_stat(req->q)) + return false; + + /* Requests without a bio do not transfer data. */ + if (!bio) + return false; + + /* + * Stats are accumulated in the bdev, so must have one attached to a + * bio to track stats. Most drivers do not set the bdev for passthrough + * requests, but nvme is one that will set it. + */ + if (!bio->bi_bdev) + return false; + + /* + * We don't know what a passthrough command does, but we know the + * payload size and data direction. Ensuring the size is aligned to the + * block size filters out most commands with payloads that don't + * represent sector access. + */ + if (blk_rq_bytes(req) & (bdev_logical_block_size(bio->bi_bdev) - 1)) + return false; + return true; +} + static inline void blk_account_io_start(struct request *req) { trace_block_io_start(req); if (!blk_queue_io_stat(req->q)) return; - if (blk_rq_is_passthrough(req)) + if (blk_rq_is_passthrough(req) && !blk_rq_passthrough_stats(req)) return; req->rq_flags |= RQF_IO_STAT; diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 8717d43e0792..741b95dfdbf6 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -272,6 +272,34 @@ static ssize_t queue_nr_zones_show(struct gendisk *disk, char *page) return queue_var_show(disk_nr_zones(disk), page); } +static ssize_t queue_iostats_passthrough_show(struct gendisk *disk, char *page) +{ + return queue_var_show(blk_queue_passthrough_stat(disk->queue), page); +} + +static ssize_t queue_iostats_passthrough_store(struct gendisk *disk, + const char *page, size_t count) +{ + struct queue_limits lim; + unsigned long ios; + ssize_t ret; + + ret = queue_var_store(&ios, page, count); + if (ret < 0) + return ret; + + lim = queue_limits_start_update(disk->queue); + if (ios) + lim.flags |= BLK_FLAG_IOSTATS_PASSTHROUGH; + else + lim.flags &= ~BLK_FLAG_IOSTATS_PASSTHROUGH; + + ret = queue_limits_commit_update(disk->queue, &lim); + if (ret) + return ret; + + return count; +} static ssize_t queue_nomerges_show(struct gendisk *disk, char *page) { return queue_var_show((blk_queue_nomerges(disk->queue) << 1) | @@ -460,6 +488,7 @@ QUEUE_RO_ENTRY(queue_max_open_zones, "max_open_zones"); QUEUE_RO_ENTRY(queue_max_active_zones, "max_active_zones"); QUEUE_RW_ENTRY(queue_nomerges, "nomerges"); +QUEUE_RW_ENTRY(queue_iostats_passthrough, "iostats_passthrough"); QUEUE_RW_ENTRY(queue_rq_affinity, "rq_affinity"); QUEUE_RW_ENTRY(queue_poll, "io_poll"); QUEUE_RW_ENTRY(queue_poll_delay, "io_poll_delay"); @@ -586,6 +615,7 @@ static struct attribute *queue_attrs[] = { &queue_max_open_zones_entry.attr, &queue_max_active_zones_entry.attr, &queue_nomerges_entry.attr, + &queue_iostats_passthrough_entry.attr, &queue_iostats_entry.attr, &queue_stable_writes_entry.attr, &queue_add_random_entry.attr, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a6aae750b4ac..6b78a68e0bd9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -349,6 +349,9 @@ typedef unsigned int __bitwise blk_flags_t; /* I/O topology is misaligned */ #define BLK_FLAG_MISALIGNED ((__force blk_flags_t)(1u << 1)) +/* passthrough command IO accounting */ +#define BLK_FLAG_IOSTATS_PASSTHROUGH ((__force blk_flags_t)(1u << 2)) + struct queue_limits { blk_features_t features; blk_flags_t flags; @@ -617,6 +620,8 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) #define blk_queue_nonrot(q) (!((q)->limits.features & BLK_FEAT_ROTATIONAL)) #define blk_queue_io_stat(q) ((q)->limits.features & BLK_FEAT_IO_STAT) +#define blk_queue_passthrough_stat(q) \ + ((q)->limits.flags & BLK_FLAG_IOSTATS_PASSTHROUGH) #define blk_queue_dax(q) ((q)->limits.features & BLK_FEAT_DAX) #define blk_queue_pci_p2pdma(q) ((q)->limits.features & BLK_FEAT_PCI_P2PDMA) #ifdef CONFIG_BLK_RQ_ALLOC_TIME -- cgit v1.2.3 From 59eaa01ce7a6cbc5c36b928f52888f99fca6b295 Mon Sep 17 00:00:00 2001 From: Uday Shankar Date: Mon, 7 Oct 2024 12:24:17 -0600 Subject: ublk: support device recovery without I/O queueing ublk currently supports the following behaviors on ublk server exit: A: outstanding I/Os get errors, subsequently issued I/Os get errors B: outstanding I/Os get errors, subsequently issued I/Os queue C: outstanding I/Os get reissued, subsequently issued I/Os queue and the following behaviors for recovery of preexisting block devices by a future incarnation of the ublk server: 1: ublk devices stopped on ublk server exit (no recovery possible) 2: ublk devices are recoverable using start/end_recovery commands The userspace interface allows selection of combinations of these behaviors using flags specified at device creation time, namely: default behavior: A + 1 UBLK_F_USER_RECOVERY: B + 2 UBLK_F_USER_RECOVERY|UBLK_F_USER_RECOVERY_REISSUE: C + 2 The behavior A + 2 is currently unsupported. Add support for this behavior under the new flag combination UBLK_F_USER_RECOVERY|UBLK_F_USER_RECOVERY_FAIL_IO. Signed-off-by: Uday Shankar Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20241007182419.3263186-5-ushankar@purestorage.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 78 ++++++++++++++++++++++++++++++++++--------- include/uapi/linux/ublk_cmd.h | 18 ++++++++++ 2 files changed, 81 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 0e75283e3bda..59951e7c2593 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -60,10 +60,12 @@ | UBLK_F_UNPRIVILEGED_DEV \ | UBLK_F_CMD_IOCTL_ENCODE \ | UBLK_F_USER_COPY \ - | UBLK_F_ZONED) + | UBLK_F_ZONED \ + | UBLK_F_USER_RECOVERY_FAIL_IO) #define UBLK_F_ALL_RECOVERY_FLAGS (UBLK_F_USER_RECOVERY \ - | UBLK_F_USER_RECOVERY_REISSUE) + | UBLK_F_USER_RECOVERY_REISSUE \ + | UBLK_F_USER_RECOVERY_FAIL_IO) /* All UBLK_PARAM_TYPE_* should be included here */ #define UBLK_PARAM_TYPE_ALL \ @@ -146,6 +148,7 @@ struct ublk_queue { bool force_abort; bool timeout; bool canceling; + bool fail_io; /* copy of dev->state == UBLK_S_DEV_FAIL_IO */ unsigned short nr_io_ready; /* how many ios setup */ spinlock_t cancel_lock; struct ublk_device *dev; @@ -690,7 +693,8 @@ static inline bool ublk_nosrv_should_reissue_outstanding(struct ublk_device *ub) */ static inline bool ublk_nosrv_dev_should_queue_io(struct ublk_device *ub) { - return ub->dev_info.flags & UBLK_F_USER_RECOVERY; + return (ub->dev_info.flags & UBLK_F_USER_RECOVERY) && + !(ub->dev_info.flags & UBLK_F_USER_RECOVERY_FAIL_IO); } /* @@ -700,7 +704,8 @@ static inline bool ublk_nosrv_dev_should_queue_io(struct ublk_device *ub) */ static inline bool ublk_nosrv_should_queue_io(struct ublk_queue *ubq) { - return ubq->flags & UBLK_F_USER_RECOVERY; + return (ubq->flags & UBLK_F_USER_RECOVERY) && + !(ubq->flags & UBLK_F_USER_RECOVERY_FAIL_IO); } /* @@ -714,6 +719,12 @@ static inline bool ublk_nosrv_should_stop_dev(struct ublk_device *ub) return !(ub->dev_info.flags & UBLK_F_USER_RECOVERY); } +static inline bool ublk_dev_in_recoverable_state(struct ublk_device *ub) +{ + return ub->dev_info.state == UBLK_S_DEV_QUIESCED || + ub->dev_info.state == UBLK_S_DEV_FAIL_IO; +} + static void ublk_free_disk(struct gendisk *disk) { struct ublk_device *ub = disk->private_data; @@ -1275,6 +1286,10 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq = bd->rq; blk_status_t res; + if (unlikely(ubq->fail_io)) { + return BLK_STS_TARGET; + } + /* fill iod to slot in io cmd buffer */ res = ublk_setup_iod(ubq, rq); if (unlikely(res != BLK_STS_OK)) @@ -1625,6 +1640,7 @@ static void ublk_nosrv_work(struct work_struct *work) { struct ublk_device *ub = container_of(work, struct ublk_device, nosrv_work); + int i; if (ublk_nosrv_should_stop_dev(ub)) { ublk_stop_dev(ub); @@ -1634,7 +1650,18 @@ static void ublk_nosrv_work(struct work_struct *work) mutex_lock(&ub->mutex); if (ub->dev_info.state != UBLK_S_DEV_LIVE) goto unlock; - __ublk_quiesce_dev(ub); + + if (ublk_nosrv_dev_should_queue_io(ub)) { + __ublk_quiesce_dev(ub); + } else { + blk_mq_quiesce_queue(ub->ub_disk->queue); + ub->dev_info.state = UBLK_S_DEV_FAIL_IO; + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) { + ublk_get_queue(ub, i)->fail_io = true; + } + blk_mq_unquiesce_queue(ub->ub_disk->queue); + } + unlock: mutex_unlock(&ub->mutex); ublk_cancel_dev(ub); @@ -2387,8 +2414,13 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd) return -EPERM; /* forbid nonsense combinations of recovery flags */ - if ((info.flags & UBLK_F_USER_RECOVERY_REISSUE) && - !(info.flags & UBLK_F_USER_RECOVERY)) { + switch (info.flags & UBLK_F_ALL_RECOVERY_FLAGS) { + case 0: + case UBLK_F_USER_RECOVERY: + case (UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_REISSUE): + case (UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_FAIL_IO): + break; + default: pr_warn("%s: invalid recovery flags %llx\n", __func__, info.flags & UBLK_F_ALL_RECOVERY_FLAGS); return -EINVAL; @@ -2729,14 +2761,18 @@ static int ublk_ctrl_start_recovery(struct ublk_device *ub, * and related io_uring ctx is freed so file struct of /dev/ublkcX is * released. * + * and one of the following holds + * * (2) UBLK_S_DEV_QUIESCED is set, which means the quiesce_work: * (a)has quiesced request queue * (b)has requeued every inflight rqs whose io_flags is ACTIVE * (c)has requeued/aborted every inflight rqs whose io_flags is NOT ACTIVE * (d)has completed/camceled all ioucmds owned by ther dying process + * + * (3) UBLK_S_DEV_FAIL_IO is set, which means the queue is not + * quiesced, but all I/O is being immediately errored */ - if (test_bit(UB_STATE_OPEN, &ub->state) || - ub->dev_info.state != UBLK_S_DEV_QUIESCED) { + if (test_bit(UB_STATE_OPEN, &ub->state) || !ublk_dev_in_recoverable_state(ub)) { ret = -EBUSY; goto out_unlock; } @@ -2760,6 +2796,7 @@ static int ublk_ctrl_end_recovery(struct ublk_device *ub, const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); int ublksrv_pid = (int)header->data[0]; int ret = -EINVAL; + int i; pr_devel("%s: Waiting for new ubq_daemons(nr: %d) are ready, dev id %d...\n", __func__, ub->dev_info.nr_hw_queues, header->dev_id); @@ -2774,18 +2811,29 @@ static int ublk_ctrl_end_recovery(struct ublk_device *ub, if (ublk_nosrv_should_stop_dev(ub)) goto out_unlock; - if (ub->dev_info.state != UBLK_S_DEV_QUIESCED) { + if (!ublk_dev_in_recoverable_state(ub)) { ret = -EBUSY; goto out_unlock; } ub->dev_info.ublksrv_pid = ublksrv_pid; pr_devel("%s: new ublksrv_pid %d, dev id %d\n", __func__, ublksrv_pid, header->dev_id); - blk_mq_unquiesce_queue(ub->ub_disk->queue); - pr_devel("%s: queue unquiesced, dev id %d.\n", - __func__, header->dev_id); - blk_mq_kick_requeue_list(ub->ub_disk->queue); - ub->dev_info.state = UBLK_S_DEV_LIVE; + + if (ublk_nosrv_dev_should_queue_io(ub)) { + ub->dev_info.state = UBLK_S_DEV_LIVE; + blk_mq_unquiesce_queue(ub->ub_disk->queue); + pr_devel("%s: queue unquiesced, dev id %d.\n", + __func__, header->dev_id); + blk_mq_kick_requeue_list(ub->ub_disk->queue); + } else { + blk_mq_quiesce_queue(ub->ub_disk->queue); + ub->dev_info.state = UBLK_S_DEV_LIVE; + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) { + ublk_get_queue(ub, i)->fail_io = false; + } + blk_mq_unquiesce_queue(ub->ub_disk->queue); + } + ret = 0; out_unlock: mutex_unlock(&ub->mutex); diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 12873639ea96..a8bc98bb69fc 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -147,8 +147,18 @@ */ #define UBLK_F_NEED_GET_DATA (1UL << 2) +/* + * - Block devices are recoverable if ublk server exits and restarts + * - Outstanding I/O when ublk server exits is met with errors + * - I/O issued while there is no ublk server queues + */ #define UBLK_F_USER_RECOVERY (1UL << 3) +/* + * - Block devices are recoverable if ublk server exits and restarts + * - Outstanding I/O when ublk server exits is reissued + * - I/O issued while there is no ublk server queues + */ #define UBLK_F_USER_RECOVERY_REISSUE (1UL << 4) /* @@ -190,10 +200,18 @@ */ #define UBLK_F_ZONED (1ULL << 8) +/* + * - Block devices are recoverable if ublk server exits and restarts + * - Outstanding I/O when ublk server exits is met with errors + * - I/O issued while there is no ublk server is met with errors + */ +#define UBLK_F_USER_RECOVERY_FAIL_IO (1ULL << 9) + /* device state */ #define UBLK_S_DEV_DEAD 0 #define UBLK_S_DEV_LIVE 1 #define UBLK_S_DEV_QUIESCED 2 +#define UBLK_S_DEV_FAIL_IO 3 /* shipped via sqe->cmd of io_uring command */ struct ublksrv_ctrl_cmd { -- cgit v1.2.3 From b21d948f4cc73e3296f2365c7afca721dd6893fa Mon Sep 17 00:00:00 2001 From: Greg Joyce Date: Thu, 29 Aug 2024 12:56:11 -0500 Subject: block: sed-opal: add ioctl IOC_OPAL_SET_SID_PW After a SED drive is provisioned, there is no way to change the SID password via the ioctl() interface. A new ioctl IOC_OPAL_SET_SID_PW will allow the password to be changed. The valid current password is required. Signed-off-by: Greg Joyce Reviewed-by: Daniel Wagner Link: https://lore.kernel.org/r/20240829175639.6478-2-gjoyce@linux.ibm.com Signed-off-by: Jens Axboe --- block/sed-opal.c | 26 ++++++++++++++++++++++++++ include/linux/sed-opal.h | 1 + include/uapi/linux/sed-opal.h | 1 + 3 files changed, 28 insertions(+) (limited to 'include') diff --git a/block/sed-opal.c b/block/sed-opal.c index 598fd3e7fcc8..5a28f23f7f22 100644 --- a/block/sed-opal.c +++ b/block/sed-opal.c @@ -3037,6 +3037,29 @@ static int opal_set_new_pw(struct opal_dev *dev, struct opal_new_pw *opal_pw) return ret; } +static int opal_set_new_sid_pw(struct opal_dev *dev, struct opal_new_pw *opal_pw) +{ + int ret; + struct opal_key *newkey = &opal_pw->new_user_pw.opal_key; + struct opal_key *oldkey = &opal_pw->session.opal_key; + + const struct opal_step pw_steps[] = { + { start_SIDASP_opal_session, oldkey }, + { set_sid_cpin_pin, newkey }, + { end_opal_session, } + }; + + if (!dev) + return -ENODEV; + + mutex_lock(&dev->dev_lock); + setup_opal_dev(dev); + ret = execute_steps(dev, pw_steps, ARRAY_SIZE(pw_steps)); + mutex_unlock(&dev->dev_lock); + + return ret; +} + static int opal_activate_user(struct opal_dev *dev, struct opal_session_info *opal_session) { @@ -3286,6 +3309,9 @@ int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg) case IOC_OPAL_DISCOVERY: ret = opal_get_discv(dev, p); break; + case IOC_OPAL_SET_SID_PW: + ret = opal_set_new_sid_pw(dev, p); + break; default: break; diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h index 2ac50822554e..80f33a93f944 100644 --- a/include/linux/sed-opal.h +++ b/include/linux/sed-opal.h @@ -52,6 +52,7 @@ static inline bool is_sed_ioctl(unsigned int cmd) case IOC_OPAL_GET_GEOMETRY: case IOC_OPAL_DISCOVERY: case IOC_OPAL_REVERT_LSP: + case IOC_OPAL_SET_SID_PW: return true; } return false; diff --git a/include/uapi/linux/sed-opal.h b/include/uapi/linux/sed-opal.h index d3994b7716bc..9025dd5a4f0f 100644 --- a/include/uapi/linux/sed-opal.h +++ b/include/uapi/linux/sed-opal.h @@ -215,5 +215,6 @@ struct opal_revert_lsp { #define IOC_OPAL_GET_GEOMETRY _IOR('p', 238, struct opal_geometry) #define IOC_OPAL_DISCOVERY _IOW('p', 239, struct opal_discovery) #define IOC_OPAL_REVERT_LSP _IOW('p', 240, struct opal_revert_lsp) +#define IOC_OPAL_SET_SID_PW _IOW('p', 241, struct opal_new_pw) #endif /* _UAPI_SED_OPAL_H */ -- cgit v1.2.3 From 8cf8dfceebdaf282da8a836b2bb578808a12698c Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 22 Oct 2024 15:41:34 +0200 Subject: seccomp: Stub for !HAVE_ARCH_SECCOMP_FILTER If we have CONFIG_SECCOMP but not CONFIG_HAVE_ARCH_SECCOMP_FILTER we get a compilation error: ../kernel/entry/common.c: In function 'syscall_trace_enter': ../kernel/entry/common.c:55:23: error: implicit declaration of function '__secure_computing' [-Werror=implicit-function-declaration] 55 | ret = __secure_computing(NULL); | ^~~~~~~~~~~~~~~~~~ This is because generic entry calls __secure_computing() unconditionally. Provide the needed stub similar to how current ARM does this by calling secure_computing_strict() in the absence of secure_computing(). This is similar to what is done for ARM in arch/arm/kernel/ptrace.c. Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20241022-seccomp-compile-error-v2-1-c9f08a4f8ebb@linaro.org Signed-off-by: Kees Cook --- include/linux/seccomp.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 709ad84809e1..341980599c71 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -32,6 +32,11 @@ static inline int secure_computing(void) } #else extern void secure_computing_strict(int this_syscall); +static inline int __secure_computing(const struct seccomp_data *sd) +{ + secure_computing_strict(sd->nr); + return 0; +} #endif extern long prctl_get_seccomp(void); -- cgit v1.2.3 From cfcdf395c21eeac4543d2b8fef9d29ae9e4559e9 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Tue, 8 Oct 2024 18:07:02 +0200 Subject: regulator: core: add callback to perform runtime init Provide an initialisation callback to handle runtime parameters. The idea is similar to the regulator_init() callback, but it provides regulator specific structures, instead of just the driver specific data. As an example, this allows the driver to amend the regulator constraints based on runtime parameters if necessary. Signed-off-by: Jerome Brunet Link: https://patch.msgid.link/20241008-regulator-ignored-data-v2-2-d1251e0ee507@baylibre.com Signed-off-by: Mark Brown --- drivers/regulator/core.c | 6 ++++++ include/linux/regulator/driver.h | 2 ++ 2 files changed, 8 insertions(+) (limited to 'include') diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 24bb7f5b12e3..eecb05a0d08c 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -5758,6 +5758,12 @@ regulator_register(struct device *dev, goto wash; } + if (regulator_desc->init_cb) { + ret = regulator_desc->init_cb(rdev, config); + if (ret < 0) + goto wash; + } + if ((rdev->supply_name && !rdev->supply) && (rdev->constraints->always_on || rdev->constraints->boot_on)) { diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index f230a472ccd3..d2f4427504f0 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -365,6 +365,8 @@ struct regulator_desc { int (*of_parse_cb)(struct device_node *, const struct regulator_desc *, struct regulator_config *); + int (*init_cb)(struct regulator_dev *, + struct regulator_config *); int id; unsigned int continuous_voltage_range:1; unsigned n_voltages; -- cgit v1.2.3 From 602ff58ae4fe4289b0ca71cba9fb82f7de92cd64 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Tue, 8 Oct 2024 18:07:03 +0200 Subject: regulator: core: remove machine init callback from config The machine specific regulator_init() appears to be unused. It does not allow a lot of interaction with the regulator framework, since nothing from the framework is passed along (desc, config, etc ...) Machine specific init may also be done with the added init_cb() in the regulator description, so remove regulator_init(). Signed-off-by: Jerome Brunet Link: https://patch.msgid.link/20241008-regulator-ignored-data-v2-3-d1251e0ee507@baylibre.com Signed-off-by: Mark Brown --- drivers/regulator/core.c | 7 ------- include/linux/regulator/machine.h | 3 +-- 2 files changed, 1 insertion(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index eecb05a0d08c..f8e36c9f5943 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -5775,13 +5775,6 @@ regulator_register(struct device *dev, resolved_early = true; } - /* perform any regulator specific init */ - if (init_data && init_data->regulator_init) { - ret = init_data->regulator_init(rdev->reg_data); - if (ret < 0) - goto wash; - } - if (config->ena_gpiod) { ret = regulator_ena_gpio_request(rdev, config); if (ret != 0) { diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index 0cd76d264727..d0d700ff337a 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -285,8 +285,7 @@ struct regulator_init_data { int num_consumer_supplies; struct regulator_consumer_supply *consumer_supplies; - /* optional regulator machine specific init */ - int (*regulator_init)(void *driver_data); + /* optional regulator machine specific data */ void *driver_data; /* core does not touch this */ }; -- cgit v1.2.3 From 51e32e897539663957f7a0950f66b48f8896efee Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Sat, 19 Oct 2024 14:16:00 +0300 Subject: clk: Provide devm_clk_bulk_get_all_enabled() helper Commit 265b07df758a ("clk: Provide managed helper to get and enable bulk clocks") added devm_clk_bulk_get_all_enable() function, but missed to return the number of clocks stored in the clk_bulk_data table referenced by the clks argument. Without knowing the number, it's not possible to iterate these clocks when needed, hence the argument is useless and could have been simply removed. Introduce devm_clk_bulk_get_all_enabled() variant, which is consistent with devm_clk_bulk_get_all() in terms of the returned value: > 0 if one or more clocks have been stored = 0 if there are no clocks < 0 if an error occurred Moreover, the naming is consistent with devm_clk_get_enabled(), i.e. use the past form of 'enable'. To reduce code duplication and improve patch readability, make devm_clk_bulk_get_all_enable() use the new helper, as suggested by Stephen Boyd. Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Manivannan Sadhasivam Signed-off-by: Cristian Ciocaltea Link: https://lore.kernel.org/r/20241019-clk_bulk_ena_fix-v4-1-57f108f64e70@collabora.com Signed-off-by: Stephen Boyd --- drivers/clk/clk-devres.c | 9 +++++---- include/linux/clk.h | 21 ++++++++++++++++----- 2 files changed, 21 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/clk/clk-devres.c b/drivers/clk/clk-devres.c index 82ae1f26e634..5368d92d9b39 100644 --- a/drivers/clk/clk-devres.c +++ b/drivers/clk/clk-devres.c @@ -218,8 +218,8 @@ static void devm_clk_bulk_release_all_enable(struct device *dev, void *res) clk_bulk_put_all(devres->num_clks, devres->clks); } -int __must_check devm_clk_bulk_get_all_enable(struct device *dev, - struct clk_bulk_data **clks) +int __must_check devm_clk_bulk_get_all_enabled(struct device *dev, + struct clk_bulk_data **clks) { struct clk_bulk_devres *devres; int ret; @@ -244,11 +244,12 @@ int __must_check devm_clk_bulk_get_all_enable(struct device *dev, } else { clk_bulk_put_all(devres->num_clks, devres->clks); devres_free(devres); + return ret; } - return ret; + return devres->num_clks; } -EXPORT_SYMBOL_GPL(devm_clk_bulk_get_all_enable); +EXPORT_SYMBOL_GPL(devm_clk_bulk_get_all_enabled); static int devm_clk_match(struct device *dev, void *res, void *data) { diff --git a/include/linux/clk.h b/include/linux/clk.h index 851a0f2cf42c..1dcee6d701e4 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -496,11 +496,13 @@ int __must_check devm_clk_bulk_get_all(struct device *dev, struct clk_bulk_data **clks); /** - * devm_clk_bulk_get_all_enable - Get and enable all clocks of the consumer (managed) + * devm_clk_bulk_get_all_enabled - Get and enable all clocks of the consumer (managed) * @dev: device for clock "consumer" * @clks: pointer to the clk_bulk_data table of consumer * - * Returns success (0) or negative errno. + * Returns a positive value for the number of clocks obtained while the + * clock references are stored in the clk_bulk_data table in @clks field. + * Returns 0 if there're none and a negative value if something failed. * * This helper function allows drivers to get all clocks of the * consumer and enables them in one operation with management. @@ -508,8 +510,8 @@ int __must_check devm_clk_bulk_get_all(struct device *dev, * is unbound. */ -int __must_check devm_clk_bulk_get_all_enable(struct device *dev, - struct clk_bulk_data **clks); +int __must_check devm_clk_bulk_get_all_enabled(struct device *dev, + struct clk_bulk_data **clks); /** * devm_clk_get - lookup and obtain a managed reference to a clock producer. @@ -1034,7 +1036,7 @@ static inline int __must_check devm_clk_bulk_get_all(struct device *dev, return 0; } -static inline int __must_check devm_clk_bulk_get_all_enable(struct device *dev, +static inline int __must_check devm_clk_bulk_get_all_enabled(struct device *dev, struct clk_bulk_data **clks) { return 0; @@ -1136,6 +1138,15 @@ static inline void clk_restore_context(void) {} #endif +/* Deprecated. Use devm_clk_bulk_get_all_enabled() */ +static inline int __must_check +devm_clk_bulk_get_all_enable(struct device *dev, struct clk_bulk_data **clks) +{ + int ret = devm_clk_bulk_get_all_enabled(dev, clks); + + return ret > 0 ? 0 : ret; +} + /* clk_prepare_enable helps cases using clk_enable in non-atomic context. */ static inline int clk_prepare_enable(struct clk *clk) { -- cgit v1.2.3 From 7867cb6575ac2f93a2daccdf3df49e4f1a4594e4 Mon Sep 17 00:00:00 2001 From: Taniya Das Date: Fri, 11 Oct 2024 00:28:31 +0530 Subject: dt-bindings: clock: qcom: Add SA8775P video clock controller Add device tree bindings for the video clock controller on Qualcomm SA8775P platform. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Taniya Das Link: https://lore.kernel.org/r/20241011-sa8775p-mm-v4-resend-patches-v5-1-4a9f17dc683a@quicinc.com Signed-off-by: Bjorn Andersson --- .../bindings/clock/qcom,sa8775p-videocc.yaml | 62 ++++++++++++++++++++++ include/dt-bindings/clock/qcom,sa8775p-videocc.h | 47 ++++++++++++++++ 2 files changed, 109 insertions(+) create mode 100644 Documentation/devicetree/bindings/clock/qcom,sa8775p-videocc.yaml create mode 100644 include/dt-bindings/clock/qcom,sa8775p-videocc.h (limited to 'include') diff --git a/Documentation/devicetree/bindings/clock/qcom,sa8775p-videocc.yaml b/Documentation/devicetree/bindings/clock/qcom,sa8775p-videocc.yaml new file mode 100644 index 000000000000..928131bff4c1 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/qcom,sa8775p-videocc.yaml @@ -0,0 +1,62 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/qcom,sa8775p-videocc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Video Clock & Reset Controller on SA8775P + +maintainers: + - Taniya Das + +description: | + Qualcomm video clock control module provides the clocks, resets and power + domains on SA8775P. + + See also: include/dt-bindings/clock/qcom,sa8775p-videocc.h + +properties: + compatible: + enum: + - qcom,sa8775p-videocc + + clocks: + items: + - description: Video AHB clock from GCC + - description: Board XO source + - description: Board active XO source + - description: Sleep Clock source + + power-domains: + maxItems: 1 + description: MMCX power domain + +required: + - compatible + - clocks + - power-domains + - '#power-domain-cells' + +allOf: + - $ref: qcom,gcc.yaml# + +unevaluatedProperties: false + +examples: + - | + #include + #include + #include + videocc: clock-controller@abf0000 { + compatible = "qcom,sa8775p-videocc"; + reg = <0x0abf0000 0x10000>; + clocks = <&gcc GCC_VIDEO_AHB_CLK>, + <&rpmhcc RPMH_CXO_CLK>, + <&rpmhcc RPMH_CXO_CLK_A>, + <&sleep_clk>; + power-domains = <&rpmhpd SA8775P_MMCX>; + #clock-cells = <1>; + #reset-cells = <1>; + #power-domain-cells = <1>; + }; +... diff --git a/include/dt-bindings/clock/qcom,sa8775p-videocc.h b/include/dt-bindings/clock/qcom,sa8775p-videocc.h new file mode 100644 index 000000000000..e6325f68c317 --- /dev/null +++ b/include/dt-bindings/clock/qcom,sa8775p-videocc.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_SA8775P_VIDEO_CC_H +#define _DT_BINDINGS_CLK_QCOM_SA8775P_VIDEO_CC_H + +/* VIDEO_CC clocks */ +#define VIDEO_CC_AHB_CLK 0 +#define VIDEO_CC_AHB_CLK_SRC 1 +#define VIDEO_CC_MVS0_CLK 2 +#define VIDEO_CC_MVS0_CLK_SRC 3 +#define VIDEO_CC_MVS0_DIV_CLK_SRC 4 +#define VIDEO_CC_MVS0C_CLK 5 +#define VIDEO_CC_MVS0C_DIV2_DIV_CLK_SRC 6 +#define VIDEO_CC_MVS1_CLK 7 +#define VIDEO_CC_MVS1_CLK_SRC 8 +#define VIDEO_CC_MVS1_DIV_CLK_SRC 9 +#define VIDEO_CC_MVS1C_CLK 10 +#define VIDEO_CC_MVS1C_DIV2_DIV_CLK_SRC 11 +#define VIDEO_CC_PLL_LOCK_MONITOR_CLK 12 +#define VIDEO_CC_SLEEP_CLK 13 +#define VIDEO_CC_SLEEP_CLK_SRC 14 +#define VIDEO_CC_SM_DIV_CLK_SRC 15 +#define VIDEO_CC_SM_OBS_CLK 16 +#define VIDEO_CC_XO_CLK 17 +#define VIDEO_CC_XO_CLK_SRC 18 +#define VIDEO_PLL0 19 +#define VIDEO_PLL1 20 + +/* VIDEO_CC power domains */ +#define VIDEO_CC_MVS0C_GDSC 0 +#define VIDEO_CC_MVS0_GDSC 1 +#define VIDEO_CC_MVS1C_GDSC 2 +#define VIDEO_CC_MVS1_GDSC 3 + +/* VIDEO_CC resets */ +#define VIDEO_CC_INTERFACE_BCR 0 +#define VIDEO_CC_MVS0_BCR 1 +#define VIDEO_CC_MVS0C_CLK_ARES 2 +#define VIDEO_CC_MVS0C_BCR 3 +#define VIDEO_CC_MVS1_BCR 4 +#define VIDEO_CC_MVS1C_CLK_ARES 5 +#define VIDEO_CC_MVS1C_BCR 6 + +#endif -- cgit v1.2.3 From 9b1873d2350b3f08463563603fdbfcea4f068e67 Mon Sep 17 00:00:00 2001 From: Taniya Das Date: Fri, 11 Oct 2024 00:28:33 +0530 Subject: dt-bindings: clock: qcom: Add SA8775P camera clock controller Add device tree bindings for the camera clock controller on Qualcomm SA8775P platform. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Taniya Das Link: https://lore.kernel.org/r/20241011-sa8775p-mm-v4-resend-patches-v5-3-4a9f17dc683a@quicinc.com Signed-off-by: Bjorn Andersson --- .../bindings/clock/qcom,sa8775p-camcc.yaml | 62 ++++++++++++ include/dt-bindings/clock/qcom,sa8775p-camcc.h | 108 +++++++++++++++++++++ 2 files changed, 170 insertions(+) create mode 100644 Documentation/devicetree/bindings/clock/qcom,sa8775p-camcc.yaml create mode 100644 include/dt-bindings/clock/qcom,sa8775p-camcc.h (limited to 'include') diff --git a/Documentation/devicetree/bindings/clock/qcom,sa8775p-camcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sa8775p-camcc.yaml new file mode 100644 index 000000000000..36a60d8f5ae3 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/qcom,sa8775p-camcc.yaml @@ -0,0 +1,62 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/qcom,sa8775p-camcc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Camera Clock & Reset Controller on SA8775P + +maintainers: + - Taniya Das + +description: | + Qualcomm camera clock control module provides the clocks, resets and power + domains on SA8775p. + + See also: include/dt-bindings/clock/qcom,sa8775p-camcc.h + +properties: + compatible: + enum: + - qcom,sa8775p-camcc + + clocks: + items: + - description: Camera AHB clock from GCC + - description: Board XO source + - description: Board active XO source + - description: Sleep clock source + + power-domains: + maxItems: 1 + description: MMCX power domain + +required: + - compatible + - clocks + - power-domains + - '#power-domain-cells' + +allOf: + - $ref: qcom,gcc.yaml# + +unevaluatedProperties: false + +examples: + - | + #include + #include + #include + clock-controller@ade0000 { + compatible = "qcom,sa8775p-camcc"; + reg = <0x0ade0000 0x20000>; + clocks = <&gcc GCC_CAMERA_AHB_CLK>, + <&rpmhcc RPMH_CXO_CLK>, + <&rpmhcc RPMH_CXO_CLK_A>, + <&sleep_clk>; + power-domains = <&rpmhpd SA8775P_MMCX>; + #clock-cells = <1>; + #reset-cells = <1>; + #power-domain-cells = <1>; + }; +... diff --git a/include/dt-bindings/clock/qcom,sa8775p-camcc.h b/include/dt-bindings/clock/qcom,sa8775p-camcc.h new file mode 100644 index 000000000000..38531acd699f --- /dev/null +++ b/include/dt-bindings/clock/qcom,sa8775p-camcc.h @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_SA8775P_CAM_CC_H +#define _DT_BINDINGS_CLK_QCOM_SA8775P_CAM_CC_H + +/* CAM_CC clocks */ +#define CAM_CC_CAMNOC_AXI_CLK 0 +#define CAM_CC_CAMNOC_AXI_CLK_SRC 1 +#define CAM_CC_CAMNOC_DCD_XO_CLK 2 +#define CAM_CC_CAMNOC_XO_CLK 3 +#define CAM_CC_CCI_0_CLK 4 +#define CAM_CC_CCI_0_CLK_SRC 5 +#define CAM_CC_CCI_1_CLK 6 +#define CAM_CC_CCI_1_CLK_SRC 7 +#define CAM_CC_CCI_2_CLK 8 +#define CAM_CC_CCI_2_CLK_SRC 9 +#define CAM_CC_CCI_3_CLK 10 +#define CAM_CC_CCI_3_CLK_SRC 11 +#define CAM_CC_CORE_AHB_CLK 12 +#define CAM_CC_CPAS_AHB_CLK 13 +#define CAM_CC_CPAS_FAST_AHB_CLK 14 +#define CAM_CC_CPAS_IFE_0_CLK 15 +#define CAM_CC_CPAS_IFE_1_CLK 16 +#define CAM_CC_CPAS_IFE_LITE_CLK 17 +#define CAM_CC_CPAS_IPE_CLK 18 +#define CAM_CC_CPAS_SFE_LITE_0_CLK 19 +#define CAM_CC_CPAS_SFE_LITE_1_CLK 20 +#define CAM_CC_CPHY_RX_CLK_SRC 21 +#define CAM_CC_CSI0PHYTIMER_CLK 22 +#define CAM_CC_CSI0PHYTIMER_CLK_SRC 23 +#define CAM_CC_CSI1PHYTIMER_CLK 24 +#define CAM_CC_CSI1PHYTIMER_CLK_SRC 25 +#define CAM_CC_CSI2PHYTIMER_CLK 26 +#define CAM_CC_CSI2PHYTIMER_CLK_SRC 27 +#define CAM_CC_CSI3PHYTIMER_CLK 28 +#define CAM_CC_CSI3PHYTIMER_CLK_SRC 29 +#define CAM_CC_CSID_CLK 30 +#define CAM_CC_CSID_CLK_SRC 31 +#define CAM_CC_CSID_CSIPHY_RX_CLK 32 +#define CAM_CC_CSIPHY0_CLK 33 +#define CAM_CC_CSIPHY1_CLK 34 +#define CAM_CC_CSIPHY2_CLK 35 +#define CAM_CC_CSIPHY3_CLK 36 +#define CAM_CC_FAST_AHB_CLK_SRC 37 +#define CAM_CC_GDSC_CLK 38 +#define CAM_CC_ICP_AHB_CLK 39 +#define CAM_CC_ICP_CLK 40 +#define CAM_CC_ICP_CLK_SRC 41 +#define CAM_CC_IFE_0_CLK 42 +#define CAM_CC_IFE_0_CLK_SRC 43 +#define CAM_CC_IFE_0_FAST_AHB_CLK 44 +#define CAM_CC_IFE_1_CLK 45 +#define CAM_CC_IFE_1_CLK_SRC 46 +#define CAM_CC_IFE_1_FAST_AHB_CLK 47 +#define CAM_CC_IFE_LITE_AHB_CLK 48 +#define CAM_CC_IFE_LITE_CLK 49 +#define CAM_CC_IFE_LITE_CLK_SRC 50 +#define CAM_CC_IFE_LITE_CPHY_RX_CLK 51 +#define CAM_CC_IFE_LITE_CSID_CLK 52 +#define CAM_CC_IFE_LITE_CSID_CLK_SRC 53 +#define CAM_CC_IPE_AHB_CLK 54 +#define CAM_CC_IPE_CLK 55 +#define CAM_CC_IPE_CLK_SRC 56 +#define CAM_CC_IPE_FAST_AHB_CLK 57 +#define CAM_CC_MCLK0_CLK 58 +#define CAM_CC_MCLK0_CLK_SRC 59 +#define CAM_CC_MCLK1_CLK 60 +#define CAM_CC_MCLK1_CLK_SRC 61 +#define CAM_CC_MCLK2_CLK 62 +#define CAM_CC_MCLK2_CLK_SRC 63 +#define CAM_CC_MCLK3_CLK 64 +#define CAM_CC_MCLK3_CLK_SRC 65 +#define CAM_CC_PLL0 66 +#define CAM_CC_PLL0_OUT_EVEN 67 +#define CAM_CC_PLL0_OUT_ODD 68 +#define CAM_CC_PLL2 69 +#define CAM_CC_PLL3 70 +#define CAM_CC_PLL3_OUT_EVEN 71 +#define CAM_CC_PLL4 72 +#define CAM_CC_PLL4_OUT_EVEN 73 +#define CAM_CC_PLL5 74 +#define CAM_CC_PLL5_OUT_EVEN 75 +#define CAM_CC_SFE_LITE_0_CLK 76 +#define CAM_CC_SFE_LITE_0_FAST_AHB_CLK 77 +#define CAM_CC_SFE_LITE_1_CLK 78 +#define CAM_CC_SFE_LITE_1_FAST_AHB_CLK 79 +#define CAM_CC_SLEEP_CLK 80 +#define CAM_CC_SLEEP_CLK_SRC 81 +#define CAM_CC_SLOW_AHB_CLK_SRC 82 +#define CAM_CC_SM_OBS_CLK 83 +#define CAM_CC_XO_CLK_SRC 84 +#define CAM_CC_QDSS_DEBUG_XO_CLK 85 + +/* CAM_CC power domains */ +#define CAM_CC_TITAN_TOP_GDSC 0 + +/* CAM_CC resets */ +#define CAM_CC_ICP_BCR 0 +#define CAM_CC_IFE_0_BCR 1 +#define CAM_CC_IFE_1_BCR 2 +#define CAM_CC_IPE_0_BCR 3 +#define CAM_CC_SFE_LITE_0_BCR 4 +#define CAM_CC_SFE_LITE_1_BCR 5 + +#endif -- cgit v1.2.3 From 33b5cd95d801cab8c634c822e6877470f4209612 Mon Sep 17 00:00:00 2001 From: Taniya Das Date: Fri, 11 Oct 2024 00:28:35 +0530 Subject: dt-bindings: clock: qcom: Add SA8775P display clock controllers Add device tree bindings for the display clock controllers on Qualcomm SA8775P platform. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Taniya Das Link: https://lore.kernel.org/r/20241011-sa8775p-mm-v4-resend-patches-v5-5-4a9f17dc683a@quicinc.com Signed-off-by: Bjorn Andersson --- .../bindings/clock/qcom,sa8775p-dispcc.yaml | 79 ++++++++++++++++++++ include/dt-bindings/clock/qcom,sa8775p-dispcc.h | 87 ++++++++++++++++++++++ 2 files changed, 166 insertions(+) create mode 100644 Documentation/devicetree/bindings/clock/qcom,sa8775p-dispcc.yaml create mode 100644 include/dt-bindings/clock/qcom,sa8775p-dispcc.h (limited to 'include') diff --git a/Documentation/devicetree/bindings/clock/qcom,sa8775p-dispcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sa8775p-dispcc.yaml new file mode 100644 index 000000000000..ce61755e62d4 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/qcom,sa8775p-dispcc.yaml @@ -0,0 +1,79 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/qcom,sa8775p-dispcc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Display Clock & Reset Controller on SA8775P + +maintainers: + - Taniya Das + +description: | + Qualcomm display clock control module provides the clocks, resets and power + domains on SA8775P. + + See also: include/dt-bindings/clock/qcom,sa8775p-dispcc.h + +properties: + compatible: + enum: + - qcom,sa8775p-dispcc0 + - qcom,sa8775p-dispcc1 + + clocks: + items: + - description: GCC AHB clock source + - description: Board XO source + - description: Board XO_AO source + - description: Sleep clock source + - description: Link clock from DP0 PHY + - description: VCO DIV clock from DP0 PHY + - description: Link clock from DP1 PHY + - description: VCO DIV clock from DP1 PHY + - description: Byte clock from DSI0 PHY + - description: Pixel clock from DSI0 PHY + - description: Byte clock from DSI1 PHY + - description: Pixel clock from DSI1 PHY + + power-domains: + maxItems: 1 + description: MMCX power domain + +required: + - compatible + - clocks + - power-domains + - '#power-domain-cells' + +allOf: + - $ref: qcom,gcc.yaml# + +unevaluatedProperties: false + +examples: + - | + #include + #include + #include + clock-controller@af00000 { + compatible = "qcom,sa8775p-dispcc0"; + reg = <0x0af00000 0x20000>; + clocks = <&gcc GCC_DISP_AHB_CLK>, + <&rpmhcc RPMH_CXO_CLK>, + <&rpmhcc RPMH_CXO_CLK_A>, + <&sleep_clk>, + <&dp_phy0 0>, + <&dp_phy0 1>, + <&dp_phy1 2>, + <&dp_phy1 3>, + <&dsi_phy0 0>, + <&dsi_phy0 1>, + <&dsi_phy1 2>, + <&dsi_phy1 3>; + power-domains = <&rpmhpd SA8775P_MMCX>; + #clock-cells = <1>; + #reset-cells = <1>; + #power-domain-cells = <1>; + }; +... diff --git a/include/dt-bindings/clock/qcom,sa8775p-dispcc.h b/include/dt-bindings/clock/qcom,sa8775p-dispcc.h new file mode 100644 index 000000000000..e2049e510658 --- /dev/null +++ b/include/dt-bindings/clock/qcom,sa8775p-dispcc.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_SA8775P_DISP_CC_H +#define _DT_BINDINGS_CLK_QCOM_SA8775P_DISP_CC_H + +/* DISP_CC_0/1 clocks */ +#define MDSS_DISP_CC_MDSS_AHB1_CLK 0 +#define MDSS_DISP_CC_MDSS_AHB_CLK 1 +#define MDSS_DISP_CC_MDSS_AHB_CLK_SRC 2 +#define MDSS_DISP_CC_MDSS_BYTE0_CLK 3 +#define MDSS_DISP_CC_MDSS_BYTE0_CLK_SRC 4 +#define MDSS_DISP_CC_MDSS_BYTE0_DIV_CLK_SRC 5 +#define MDSS_DISP_CC_MDSS_BYTE0_INTF_CLK 6 +#define MDSS_DISP_CC_MDSS_BYTE1_CLK 7 +#define MDSS_DISP_CC_MDSS_BYTE1_CLK_SRC 8 +#define MDSS_DISP_CC_MDSS_BYTE1_DIV_CLK_SRC 9 +#define MDSS_DISP_CC_MDSS_BYTE1_INTF_CLK 10 +#define MDSS_DISP_CC_MDSS_DPTX0_AUX_CLK 11 +#define MDSS_DISP_CC_MDSS_DPTX0_AUX_CLK_SRC 12 +#define MDSS_DISP_CC_MDSS_DPTX0_CRYPTO_CLK 13 +#define MDSS_DISP_CC_MDSS_DPTX0_CRYPTO_CLK_SRC 14 +#define MDSS_DISP_CC_MDSS_DPTX0_LINK_CLK 15 +#define MDSS_DISP_CC_MDSS_DPTX0_LINK_CLK_SRC 16 +#define MDSS_DISP_CC_MDSS_DPTX0_LINK_DIV_CLK_SRC 17 +#define MDSS_DISP_CC_MDSS_DPTX0_LINK_INTF_CLK 18 +#define MDSS_DISP_CC_MDSS_DPTX0_PIXEL0_CLK 19 +#define MDSS_DISP_CC_MDSS_DPTX0_PIXEL0_CLK_SRC 20 +#define MDSS_DISP_CC_MDSS_DPTX0_PIXEL1_CLK 21 +#define MDSS_DISP_CC_MDSS_DPTX0_PIXEL1_CLK_SRC 22 +#define MDSS_DISP_CC_MDSS_DPTX0_PIXEL2_CLK 23 +#define MDSS_DISP_CC_MDSS_DPTX0_PIXEL2_CLK_SRC 24 +#define MDSS_DISP_CC_MDSS_DPTX0_PIXEL3_CLK 25 +#define MDSS_DISP_CC_MDSS_DPTX0_PIXEL3_CLK_SRC 26 +#define MDSS_DISP_CC_MDSS_DPTX0_USB_ROUTER_LINK_INTF_CLK 27 +#define MDSS_DISP_CC_MDSS_DPTX1_AUX_CLK 28 +#define MDSS_DISP_CC_MDSS_DPTX1_AUX_CLK_SRC 29 +#define MDSS_DISP_CC_MDSS_DPTX1_CRYPTO_CLK 30 +#define MDSS_DISP_CC_MDSS_DPTX1_CRYPTO_CLK_SRC 31 +#define MDSS_DISP_CC_MDSS_DPTX1_LINK_CLK 32 +#define MDSS_DISP_CC_MDSS_DPTX1_LINK_CLK_SRC 33 +#define MDSS_DISP_CC_MDSS_DPTX1_LINK_DIV_CLK_SRC 34 +#define MDSS_DISP_CC_MDSS_DPTX1_LINK_INTF_CLK 35 +#define MDSS_DISP_CC_MDSS_DPTX1_PIXEL0_CLK 36 +#define MDSS_DISP_CC_MDSS_DPTX1_PIXEL0_CLK_SRC 37 +#define MDSS_DISP_CC_MDSS_DPTX1_PIXEL1_CLK 38 +#define MDSS_DISP_CC_MDSS_DPTX1_PIXEL1_CLK_SRC 39 +#define MDSS_DISP_CC_MDSS_DPTX1_USB_ROUTER_LINK_INTF_CLK 40 +#define MDSS_DISP_CC_MDSS_ESC0_CLK 41 +#define MDSS_DISP_CC_MDSS_ESC0_CLK_SRC 42 +#define MDSS_DISP_CC_MDSS_ESC1_CLK 43 +#define MDSS_DISP_CC_MDSS_ESC1_CLK_SRC 44 +#define MDSS_DISP_CC_MDSS_MDP1_CLK 45 +#define MDSS_DISP_CC_MDSS_MDP_CLK 46 +#define MDSS_DISP_CC_MDSS_MDP_CLK_SRC 47 +#define MDSS_DISP_CC_MDSS_MDP_LUT1_CLK 48 +#define MDSS_DISP_CC_MDSS_MDP_LUT_CLK 49 +#define MDSS_DISP_CC_MDSS_NON_GDSC_AHB_CLK 50 +#define MDSS_DISP_CC_MDSS_PCLK0_CLK 51 +#define MDSS_DISP_CC_MDSS_PCLK0_CLK_SRC 52 +#define MDSS_DISP_CC_MDSS_PCLK1_CLK 53 +#define MDSS_DISP_CC_MDSS_PCLK1_CLK_SRC 54 +#define MDSS_DISP_CC_MDSS_PLL_LOCK_MONITOR_CLK 55 +#define MDSS_DISP_CC_MDSS_RSCC_AHB_CLK 56 +#define MDSS_DISP_CC_MDSS_RSCC_VSYNC_CLK 57 +#define MDSS_DISP_CC_MDSS_VSYNC1_CLK 58 +#define MDSS_DISP_CC_MDSS_VSYNC_CLK 59 +#define MDSS_DISP_CC_MDSS_VSYNC_CLK_SRC 60 +#define MDSS_DISP_CC_PLL0 61 +#define MDSS_DISP_CC_PLL1 62 +#define MDSS_DISP_CC_SLEEP_CLK 63 +#define MDSS_DISP_CC_SLEEP_CLK_SRC 64 +#define MDSS_DISP_CC_SM_OBS_CLK 65 +#define MDSS_DISP_CC_XO_CLK 66 +#define MDSS_DISP_CC_XO_CLK_SRC 67 + +/* DISP_CC_0/1 power domains */ +#define MDSS_DISP_CC_MDSS_CORE_GDSC 0 +#define MDSS_DISP_CC_MDSS_CORE_INT2_GDSC 1 + +/* DISP_CC_0/1 resets */ +#define MDSS_DISP_CC_MDSS_CORE_BCR 0 +#define MDSS_DISP_CC_MDSS_RSCC_BCR 1 + +#endif -- cgit v1.2.3 From dbc81e680a0f007cf062963c40f145074aacab2d Mon Sep 17 00:00:00 2001 From: Lijuan Gao Date: Tue, 22 Oct 2024 16:54:30 +0800 Subject: dt-bindings: arm: qcom,ids: add SoC ID for QCS615 Add the ID for the Qualcomm QCS615 SoC. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Lijuan Gao Link: https://lore.kernel.org/r/20241022-add_initial_support_for_qcs615-v4-2-0a551c6dd342@quicinc.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/arm/qcom,ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/dt-bindings/arm/qcom,ids.h b/include/dt-bindings/arm/qcom,ids.h index 8332f8d82f96..73a69fc535f6 100644 --- a/include/dt-bindings/arm/qcom,ids.h +++ b/include/dt-bindings/arm/qcom,ids.h @@ -278,6 +278,7 @@ #define QCOM_ID_IPQ5321 650 #define QCOM_ID_QCS8300 674 #define QCOM_ID_QCS8275 675 +#define QCOM_ID_QCS615 680 /* * The board type and revision information, used by Qualcomm bootloaders and -- cgit v1.2.3 From 3b3214acd7f2dd506924cd57ce6e380cdaede423 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 18 Oct 2024 19:32:36 +0300 Subject: dt-bindings: arm: qcom,ids: add SoC ID for SAR2130P and SAR1130P Add the IDs for Qualcomm SAR2130P and SAR1130P platforms. Signed-off-by: Dmitry Baryshkov Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20241018-sar2130p-socinfo-v1-1-b1e97ea963fe@linaro.org Signed-off-by: Bjorn Andersson --- include/dt-bindings/arm/qcom,ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/arm/qcom,ids.h b/include/dt-bindings/arm/qcom,ids.h index 73a69fc535f6..1a53ca2df523 100644 --- a/include/dt-bindings/arm/qcom,ids.h +++ b/include/dt-bindings/arm/qcom,ids.h @@ -255,6 +255,7 @@ #define QCOM_ID_IPQ9510 521 #define QCOM_ID_QRB4210 523 #define QCOM_ID_QRB2210 524 +#define QCOM_ID_SAR2130P 525 #define QCOM_ID_SM8475 530 #define QCOM_ID_SM8475P 531 #define QCOM_ID_SA8775P 534 @@ -264,6 +265,7 @@ #define QCOM_ID_X1E80100 555 #define QCOM_ID_SM8650 557 #define QCOM_ID_SM4450 568 +#define QCOM_ID_SAR1130P 579 #define QCOM_ID_QDU1010 587 #define QCOM_ID_QRU1032 588 #define QCOM_ID_QRU1052 589 -- cgit v1.2.3 From 44059790a5cb9258ae6137387e4c39b717fd2ced Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Oct 2024 07:53:04 +0200 Subject: kfifo: don't include dma-mapping.h in kfifo.h Nothing in kfifo.h directly needs dma-mapping.h, only two macros use DMA_MAPPING_ERROR when actually instantiated. Drop the dma-mapping.h include to reduce include bloat. Add an explicity include to drivers/mailbox/omap-mailbox.c as that file uses __raw_readl and __raw_writel through a complicated include chain involving Fixes: d52b761e4b1a ("kfifo: add kfifo_dma_out_prepare_mapped()") Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20241023055317.313234-1-hch@lst.de Signed-off-by: Greg Kroah-Hartman --- drivers/mailbox/omap-mailbox.c | 1 + include/linux/kfifo.h | 1 - samples/kfifo/dma-example.c | 1 + 3 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/mailbox/omap-mailbox.c b/drivers/mailbox/omap-mailbox.c index 6797770474a5..680243751d62 100644 --- a/drivers/mailbox/omap-mailbox.c +++ b/drivers/mailbox/omap-mailbox.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index 564868bdce89..fd743d4c4b4b 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -37,7 +37,6 @@ */ #include -#include #include #include #include diff --git a/samples/kfifo/dma-example.c b/samples/kfifo/dma-example.c index 48df719dac8c..8076ac410161 100644 --- a/samples/kfifo/dma-example.c +++ b/samples/kfifo/dma-example.c @@ -9,6 +9,7 @@ #include #include #include +#include /* * This module shows how to handle fifo dma operations. -- cgit v1.2.3 From 83c289e81e88d01e55d6d56531502ed7b4886a05 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 17 Oct 2024 19:19:34 +0300 Subject: net/sched: act_api: unexport tcf_action_dump_1() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This isn't used outside act_api.c, but is called by tcf_dump_walker() prior to its definition. So move it upwards and make it static. Simultaneously, reorder the variable declarations so that they follow the networking "reverse Christmas tree" coding style. Signed-off-by: Vladimir Oltean Reviewed-by: Simon Horman Reviewed-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20241017161934.3599046-1-vladimir.oltean@nxp.com Signed-off-by: Paolo Abeni --- include/net/act_api.h | 1 - net/sched/act_api.c | 89 +++++++++++++++++++++++++-------------------------- 2 files changed, 44 insertions(+), 46 deletions(-) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index 77ee0c657e2c..404df8557f6a 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -219,7 +219,6 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[], int bind, int ref, bool terse); int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int); -int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int); static inline void tcf_action_update_bstats(struct tc_action *a, struct sk_buff *skb) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 5bbfb83ed600..c3f8dd7f8e65 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -504,6 +504,50 @@ nla_put_failure: return -1; } +static int +tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) +{ + unsigned char *b = skb_tail_pointer(skb); + struct nlattr *nest; + int err = -EINVAL; + u32 flags; + + if (tcf_action_dump_terse(skb, a, false)) + goto nla_put_failure; + + if (a->hw_stats != TCA_ACT_HW_STATS_ANY && + nla_put_bitfield32(skb, TCA_ACT_HW_STATS, + a->hw_stats, TCA_ACT_HW_STATS_ANY)) + goto nla_put_failure; + + if (a->used_hw_stats_valid && + nla_put_bitfield32(skb, TCA_ACT_USED_HW_STATS, + a->used_hw_stats, TCA_ACT_HW_STATS_ANY)) + goto nla_put_failure; + + flags = a->tcfa_flags & TCA_ACT_FLAGS_USER_MASK; + if (flags && + nla_put_bitfield32(skb, TCA_ACT_FLAGS, + flags, flags)) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_ACT_IN_HW_COUNT, a->in_hw_count)) + goto nla_put_failure; + + nest = nla_nest_start_noflag(skb, TCA_ACT_OPTIONS); + if (nest == NULL) + goto nla_put_failure; + err = tcf_action_dump_old(skb, a, bind, ref); + if (err > 0) { + nla_nest_end(skb, nest); + return err; + } + +nla_put_failure: + nlmsg_trim(skb, b); + return -1; +} + static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, struct netlink_callback *cb) { @@ -1190,51 +1234,6 @@ tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int bind, int ref) return a->ops->dump(skb, a, bind, ref); } -int -tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) -{ - int err = -EINVAL; - unsigned char *b = skb_tail_pointer(skb); - struct nlattr *nest; - u32 flags; - - if (tcf_action_dump_terse(skb, a, false)) - goto nla_put_failure; - - if (a->hw_stats != TCA_ACT_HW_STATS_ANY && - nla_put_bitfield32(skb, TCA_ACT_HW_STATS, - a->hw_stats, TCA_ACT_HW_STATS_ANY)) - goto nla_put_failure; - - if (a->used_hw_stats_valid && - nla_put_bitfield32(skb, TCA_ACT_USED_HW_STATS, - a->used_hw_stats, TCA_ACT_HW_STATS_ANY)) - goto nla_put_failure; - - flags = a->tcfa_flags & TCA_ACT_FLAGS_USER_MASK; - if (flags && - nla_put_bitfield32(skb, TCA_ACT_FLAGS, - flags, flags)) - goto nla_put_failure; - - if (nla_put_u32(skb, TCA_ACT_IN_HW_COUNT, a->in_hw_count)) - goto nla_put_failure; - - nest = nla_nest_start_noflag(skb, TCA_ACT_OPTIONS); - if (nest == NULL) - goto nla_put_failure; - err = tcf_action_dump_old(skb, a, bind, ref); - if (err > 0) { - nla_nest_end(skb, nest); - return err; - } - -nla_put_failure: - nlmsg_trim(skb, b); - return -1; -} -EXPORT_SYMBOL(tcf_action_dump_1); - int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[], int bind, int ref, bool terse) { -- cgit v1.2.3 From c972c1c41d9b20fb38b54e77dcee763e27e715a9 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 17 Oct 2024 18:41:00 -0700 Subject: ipv4: Switch inet_addr_hash() to less predictable hash. Recently, commit 4a0ec2aa0704 ("ipv6: switch inet6_addr_hash() to less predictable hash") and commit 4daf4dc275f1 ("ipv6: switch inet6_acaddr_hash() to less predictable hash") hardened IPv6 address hash functions. inet_addr_hash() is also highly predictable, and a malicious use could abuse a specific bucket. Let's follow the change on IPv4 by using jhash_1word(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241018014100.93776-1-kuniyu@amazon.com Signed-off-by: Paolo Abeni --- include/net/ip.h | 5 +++++ net/ipv4/devinet.c | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 4be0a6a603b2..0e548c1f2a0e 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -690,6 +690,11 @@ static inline unsigned int ipv4_addr_hash(__be32 ip) return (__force unsigned int) ip; } +static inline u32 __ipv4_addr_hash(const __be32 ip, const u32 initval) +{ + return jhash_1word((__force u32)ip, initval); +} + static inline u32 ipv4_portaddr_hash(const struct net *net, __be32 saddr, unsigned int port) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 0ff9c0abfaa0..5f859d01cbbe 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -121,7 +121,7 @@ struct inet_fill_args { static u32 inet_addr_hash(const struct net *net, __be32 addr) { - u32 val = (__force u32) addr ^ net_hash_mix(net); + u32 val = __ipv4_addr_hash(addr, net_hash_mix(net)); return hash_32(val, IN4_ADDR_HSIZE_SHIFT); } -- cgit v1.2.3 From e44ef3f66c5472c2cbc6957c684d7279c26b0db1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 18 Oct 2024 05:21:08 +0000 Subject: netpoll: remove ndo_netpoll_setup() second argument npinfo is not used in any of the ndo_netpoll_setup() methods. Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Link: https://patch.msgid.link/20241018052108.2610827-1-edumazet@google.com Signed-off-by: Paolo Abeni --- drivers/net/bonding/bond_main.c | 2 +- drivers/net/macvlan.c | 2 +- drivers/net/team/team_core.c | 3 +-- include/linux/netdevice.h | 3 +-- net/8021q/vlan_dev.c | 2 +- net/bridge/br_device.c | 2 +- net/core/netpoll.c | 2 +- net/dsa/user.c | 3 +-- 8 files changed, 8 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index b1bffd8e9a95..3928287f5865 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1476,7 +1476,7 @@ static void bond_netpoll_cleanup(struct net_device *bond_dev) slave_disable_netpoll(slave); } -static int bond_netpoll_setup(struct net_device *dev, struct netpoll_info *ni) +static int bond_netpoll_setup(struct net_device *dev) { struct bonding *bond = netdev_priv(dev); struct list_head *iter; diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index cf18e66de142..edbd5afcec41 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1117,7 +1117,7 @@ static void macvlan_dev_poll_controller(struct net_device *dev) return; } -static int macvlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo) +static int macvlan_dev_netpoll_setup(struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); struct net_device *real_dev = vlan->lowerdev; diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c index 18191d5a8bd4..a1b27b69f010 100644 --- a/drivers/net/team/team_core.c +++ b/drivers/net/team/team_core.c @@ -1946,8 +1946,7 @@ static void team_netpoll_cleanup(struct net_device *dev) mutex_unlock(&team->lock); } -static int team_netpoll_setup(struct net_device *dev, - struct netpoll_info *npifo) +static int team_netpoll_setup(struct net_device *dev) { struct team *team = netdev_priv(dev); struct team_port *port; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8feaca12655e..86a0b7eb9461 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1425,8 +1425,7 @@ struct net_device_ops { __be16 proto, u16 vid); #ifdef CONFIG_NET_POLL_CONTROLLER void (*ndo_poll_controller)(struct net_device *dev); - int (*ndo_netpoll_setup)(struct net_device *dev, - struct netpoll_info *info); + int (*ndo_netpoll_setup)(struct net_device *dev); void (*ndo_netpoll_cleanup)(struct net_device *dev); #endif int (*ndo_set_vf_mac)(struct net_device *dev, diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 458040e8a0e0..91d134961357 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -725,7 +725,7 @@ static void vlan_dev_poll_controller(struct net_device *dev) return; } -static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo) +static int vlan_dev_netpoll_setup(struct net_device *dev) { struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct net_device *real_dev = vlan->real_dev; diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 26b79feb385d..0ab4613aa07a 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -328,7 +328,7 @@ int br_netpoll_enable(struct net_bridge_port *p) return __br_netpoll_enable(p); } -static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni) +static int br_netpoll_setup(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); struct net_bridge_port *p; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index aa49b92e9194..94b7f07a952f 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -641,7 +641,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev) ops = ndev->netdev_ops; if (ops->ndo_netpoll_setup) { - err = ops->ndo_netpoll_setup(ndev, npinfo); + err = ops->ndo_netpoll_setup(ndev); if (err) goto free_npinfo; } diff --git a/net/dsa/user.c b/net/dsa/user.c index 64f660d2334b..91a1fa5f8ab0 100644 --- a/net/dsa/user.c +++ b/net/dsa/user.c @@ -1308,8 +1308,7 @@ static int dsa_user_set_pauseparam(struct net_device *dev, } #ifdef CONFIG_NET_POLL_CONTROLLER -static int dsa_user_netpoll_setup(struct net_device *dev, - struct netpoll_info *ni) +static int dsa_user_netpoll_setup(struct net_device *dev) { struct net_device *conduit = dsa_user_to_conduit(dev); struct dsa_user_priv *p = netdev_priv(dev); -- cgit v1.2.3 From 7cfc1b1fa8673fe386304194d4cc2c8fe555bbf9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 18 Oct 2024 05:23:10 +0000 Subject: net: netdev_tx_sent_queue() small optimization Change smp_mb() imediately following a set_bit() with smp_mb__after_atomic(). Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20241018052310.2612084-1-edumazet@google.com Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 86a0b7eb9461..bbd30f3c5d29 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3517,7 +3517,7 @@ static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue, * because in netdev_tx_completed_queue we update the dql_completed * before checking the XOFF flag. */ - smp_mb(); + smp_mb__after_atomic(); /* check again in case another CPU has just made room avail */ if (unlikely(dql_avail(&dev_queue->dql) >= 0)) -- cgit v1.2.3 From e55f45b0cda71ac2e9b4c6dee8852dc8d6f22ef0 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 23 Oct 2024 10:35:43 +0200 Subject: regulator: doc: add missing documentation for init_cb Add comment documenting introduced init_cb. This solves the following warning when building the kernel documentation: ./include/linux/regulator/driver.h:435: warning: Function parameter or struct member 'init_cb' not described in 'regulator_desc' Fixes: cfcdf395c21e ("regulator: core: add callback to perform runtime init") Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/lkml/20241023155120.6c4fea20@canb.auug.org.au/ Signed-off-by: Jerome Brunet Link: https://patch.msgid.link/20241023-regulator-doc-fixup-v1-1-ec018742ad73@baylibre.com Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index d2f4427504f0..5b66caf1695d 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -269,6 +269,11 @@ enum regulator_type { * config but it cannot store it for later usage. * Callback should return 0 on success or negative ERRNO * indicating failure. + * @init_cb: Optional callback called after the parsing of init_data. + * Allows the regulator to perform runtime init if necessary, + * such as synching the regulator and the parsed constraints. + * Callback should return 0 on success or negative ERRNO + * indicating failure. * @id: Numerical identifier for the regulator. * @ops: Regulator operations table. * @irq: Interrupt number for the regulator. -- cgit v1.2.3 From d1bc2d5cca434e7c854fd16ef021c16d74293b60 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 23 Oct 2024 10:35:44 +0200 Subject: regulator: doc: remove documentation comment for regulator_init Remove documentation comment related to regulator_init callback. This solves the following warning when building the kernel documentation: ./include/linux/regulator/machine.h:290: warning: Excess struct member 'regulator_init' description in 'regulator_init_data' Fixes: 602ff58ae4fe ("regulator: core: remove machine init callback from config") Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/lkml/20241023155257.0fa7211d@canb.auug.org.au/ Signed-off-by: Jerome Brunet Link: https://patch.msgid.link/20241023-regulator-doc-fixup-v1-2-ec018742ad73@baylibre.com Signed-off-by: Mark Brown --- include/linux/regulator/machine.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index d0d700ff337a..b3db09a7429b 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -273,8 +273,6 @@ struct regulator_consumer_supply { * be usable. * @num_consumer_supplies: Number of consumer device supplies. * @consumer_supplies: Consumer device supply configuration. - * - * @regulator_init: Callback invoked when the regulator has been registered. * @driver_data: Data passed to regulator_init. */ struct regulator_init_data { -- cgit v1.2.3 From c7ae6551533e7c7bf903a2d259044778f4b103ce Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Sun, 20 Oct 2024 23:59:36 +0000 Subject: ASoC: soc-pcm: remove dpcm_xxx flags dpcm_xxx flags are no longer needed. Let's remove it. Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/875xpm9vh3.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc.h | 5 ----- sound/soc/soc-pcm.c | 33 --------------------------------- 2 files changed, 38 deletions(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 8a1db45988ba..3e72317e2c20 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -820,11 +820,6 @@ struct snd_soc_dai_link { /* This DAI link can route to other DAI links at runtime (Frontend)*/ unsigned int dynamic:1; - /* REMOVE ME */ - /* DPCM capture and Playback support */ - unsigned int dpcm_capture:1; - unsigned int dpcm_playback:1; - /* DPCM used FE & BE merged format */ unsigned int dpcm_merged_format:1; /* DPCM used FE & BE merged channel */ diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index e2c5300df0f2..5142d1abbc7b 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -2844,39 +2844,6 @@ static int soc_get_playback_capture(struct snd_soc_pcm_runtime *rtd, has_capture = 1; } - /* - * REMOVE ME - * - * dpcm_xxx flag will be removed soon, Indicates warning if dpcm_xxx flag was used - * as availability limitation - */ - if (has_playback && has_capture) { - if ( dai_link->dpcm_playback && - !dai_link->dpcm_capture && - !dai_link->playback_only) { - dev_warn(rtd->card->dev, - "both playback/capture are available," - " but not using playback_only flag (%s)\n", - dai_link->stream_name); - dev_warn(rtd->card->dev, - "dpcm_playback/capture are no longer needed," - " please use playback/capture_only instead\n"); - has_capture = 0; - } - - if (!dai_link->dpcm_playback && - dai_link->dpcm_capture && - !dai_link->capture_only) { - dev_warn(rtd->card->dev, - "both playback/capture are available," - " but not using capture_only flag (%s)\n", - dai_link->stream_name); - dev_warn(rtd->card->dev, - "dpcm_playback/capture are no longer needed," - " please use playback/capture_only instead\n"); - has_playback = 0; - } - } } else { struct snd_soc_dai *codec_dai; -- cgit v1.2.3 From 50a191a8a12b33dfad3b27c6ba4e76c5ba39db73 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sat, 10 Aug 2024 19:00:35 +0200 Subject: sysctl: update comments to new registration APIs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysctl registration APIs do not need a terminating table entry anymore and with commit acc154691fc7 ("sysctl: Warn on an empty procname element") even emit warnings if such a sentinel entry is supplied. While at it also remove the mention of "table->de" which was removed in commit 3fbfa98112fc ("[PATCH] sysctl: remove the proc_dir_entry member for the sysctl tables") back in 2007. Signed-off-by: Thomas Weißschuh Reviewed-by: Kees Cook Signed-off-by: Joel Granados --- include/linux/sysctl.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 202855befa8b..40a6ac6c9713 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -90,9 +90,7 @@ int proc_do_static_key(const struct ctl_table *table, int write, void *buffer, /* * Register a set of sysctl names by calling register_sysctl - * with an initialised array of struct ctl_table's. An entry with - * NULL procname terminates the table. table->de will be - * set up by the registration and need not be initialised in advance. + * with an initialised array of struct ctl_table's. * * sysctl names can be mirrored automatically under /proc/sys. The * procname supplied controls /proc naming. @@ -133,7 +131,7 @@ static inline void *proc_sys_poll_event(struct ctl_table_poll *poll) /* A sysctl table is an array of struct ctl_table: */ struct ctl_table { - const char *procname; /* Text ID for /proc/sys, or zero */ + const char *procname; /* Text ID for /proc/sys */ void *data; int maxlen; umode_t mode; -- cgit v1.2.3 From 074a8b54dacc1920f54381f3661ecee6786b0c21 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 7 Oct 2024 15:00:45 +0300 Subject: wifi: mac80211: Add support to indicate that a new interface is to be added Add support to indicate to the driver that an interface is about to be added so that the driver could prepare its resources early if it needs so. Signed-off-by: Ilan Peer Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20241007144851.e0e8563e1c30.Ifccc96a46a347eb15752caefc9f4eff31f75ed47@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 8 ++++++++ net/mac80211/cfg.c | 18 ++++++++++++++++++ net/mac80211/driver-ops.h | 12 ++++++++++++ net/mac80211/trace.h | 19 +++++++++++++++++++ 4 files changed, 57 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 333e0fae6796..0b8df8ec5a3b 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4444,6 +4444,12 @@ struct ieee80211_prep_tx_info { * if the requested TID-To-Link mapping can be accepted or not. * If it's not accepted the driver may suggest a preferred mapping and * modify @ttlm parameter with the suggested TID-to-Link mapping. + * @prep_add_interface: prepare for interface addition. This can be used by + * drivers to prepare for the addition of a new interface, e.g., allocate + * the needed resources etc. This callback doesn't guarantee that an + * interface with the specified type would be added, and thus drivers that + * implement this callback need to handle such cases. The type is the full + * &enum nl80211_iftype. */ struct ieee80211_ops { void (*tx)(struct ieee80211_hw *hw, @@ -4828,6 +4834,8 @@ struct ieee80211_ops { enum ieee80211_neg_ttlm_res (*can_neg_ttlm)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_neg_ttlm *ttlm); + void (*prep_add_interface)(struct ieee80211_hw *hw, + enum nl80211_iftype type); }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 847304a3a29a..ce9558cd1576 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -194,6 +194,24 @@ static struct wireless_dev *ieee80211_add_iface(struct wiphy *wiphy, } } + /* Let the driver know that an interface is going to be added. + * Indicate so only for interface types that will be added to the + * driver. + */ + switch (type) { + case NL80211_IFTYPE_AP_VLAN: + break; + case NL80211_IFTYPE_MONITOR: + if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF) || + !(params->flags & MONITOR_FLAG_ACTIVE)) + break; + fallthrough; + default: + drv_prep_add_interface(local, + ieee80211_vif_type_p2p(&sdata->vif)); + break; + } + return wdev; } diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index d382d9729e85..48bc2da728c0 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1728,4 +1728,16 @@ drv_can_neg_ttlm(struct ieee80211_local *local, return res; } + +static inline void +drv_prep_add_interface(struct ieee80211_local *local, + enum nl80211_iftype type) +{ + trace_drv_prep_add_interface(local, type); + if (local->ops->prep_add_interface) + local->ops->prep_add_interface(&local->hw, type); + + trace_drv_return_void(local); +} + #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index dc498cd8cd91..e6f0ce8e5d43 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -3154,6 +3154,25 @@ TRACE_EVENT(drv_neg_ttlm_res, LOCAL_PR_ARG, VIF_PR_ARG, __entry->res ) ); + +TRACE_EVENT(drv_prep_add_interface, + TP_PROTO(struct ieee80211_local *local, + enum nl80211_iftype type), + + TP_ARGS(local, type), + TP_STRUCT__entry(LOCAL_ENTRY + __field(u32, type) + ), + + TP_fast_assign(LOCAL_ASSIGN; + __entry->type = type; + ), + + TP_printk(LOCAL_PR_FMT " type: %u\n ", + LOCAL_PR_ARG, __entry->type + ) +); + #endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH -- cgit v1.2.3 From 62262dd00c319195f2e14022903b7ebbb53119bc Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 7 Oct 2024 15:00:46 +0300 Subject: wifi: cfg80211: disallow SMPS in AP mode In practice, userspace hasn't been able to set this for many years, and mac80211 has already rejected it (which is now no longer needed), so reject SMPS mode (other than "OFF" to be a bit more compatible) in AP mode. Also remove the parameter from the AP settings struct. Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20241007144851.fe1fc46484cf.I8676fb52b818a4bedeb9c25b901e1396277ffc0b@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/quantenna/qtnfmac/commands.c | 2 +- include/net/cfg80211.h | 2 -- net/mac80211/cfg.c | 3 --- net/wireless/nl80211.c | 30 +++-------------------- 4 files changed, 4 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index 9540ad6196d7..956c5763662f 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -257,7 +257,7 @@ int qtnf_cmd_send_start_ap(struct qtnf_vif *vif, cmd->beacon_interval = cpu_to_le16(s->beacon_interval); cmd->hidden_ssid = qlink_hidden_ssid_nl2q(s->hidden_ssid); cmd->inactivity_timeout = cpu_to_le16(s->inactivity_timeout); - cmd->smps_mode = s->smps_mode; + cmd->smps_mode = NL80211_SMPS_OFF; cmd->p2p_ctwindow = s->p2p_ctwindow; cmd->p2p_opp_ps = s->p2p_opp_ps; cmd->pbss = s->pbss; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 69ec1eb41a09..c8ce5c2e14f4 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1460,7 +1460,6 @@ struct cfg80211_unsol_bcast_probe_resp { * @crypto: crypto settings * @privacy: the BSS uses privacy * @auth_type: Authentication type (algorithm) - * @smps_mode: SMPS mode * @inactivity_timeout: time in seconds to determine station's inactivity. * @p2p_ctwindow: P2P CT Window * @p2p_opp_ps: P2P opportunistic PS @@ -1498,7 +1497,6 @@ struct cfg80211_ap_settings { struct cfg80211_crypto_settings crypto; bool privacy; enum nl80211_auth_type auth_type; - enum nl80211_smps_mode smps_mode; int inactivity_timeout; u8 p2p_ctwindow; bool p2p_opp_ps; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index ce9558cd1576..548b9bbdac04 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1312,9 +1312,6 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, if (old) return -EALREADY; - if (params->smps_mode != NL80211_SMPS_OFF) - return -EOPNOTSUPP; - link->smps_mode = IEEE80211_SMPS_OFF; link->needed_rx_chains = sdata->local->rx_chains; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 4e3609176880..fb35c03af34c 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -6227,33 +6227,9 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) goto out; } - if (info->attrs[NL80211_ATTR_SMPS_MODE]) { - params->smps_mode = - nla_get_u8(info->attrs[NL80211_ATTR_SMPS_MODE]); - switch (params->smps_mode) { - case NL80211_SMPS_OFF: - break; - case NL80211_SMPS_STATIC: - if (!(rdev->wiphy.features & - NL80211_FEATURE_STATIC_SMPS)) { - err = -EINVAL; - goto out; - } - break; - case NL80211_SMPS_DYNAMIC: - if (!(rdev->wiphy.features & - NL80211_FEATURE_DYNAMIC_SMPS)) { - err = -EINVAL; - goto out; - } - break; - default: - err = -EINVAL; - goto out; - } - } else { - params->smps_mode = NL80211_SMPS_OFF; - } + if (info->attrs[NL80211_ATTR_SMPS_MODE] && + nla_get_u8(info->attrs[NL80211_ATTR_SMPS_MODE]) != NL80211_SMPS_OFF) + return -EOPNOTSUPP; params->pbss = nla_get_flag(info->attrs[NL80211_ATTR_PBSS]); if (params->pbss && !rdev->wiphy.bands[NL80211_BAND_60GHZ]) { -- cgit v1.2.3 From 9c5f2c7eeb585834f8dadb552b4fd811dd2dee6f Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Mon, 7 Oct 2024 15:00:47 +0300 Subject: wifi: mac80211: rename IEEE80211_CHANCTX_CHANGE_MIN_WIDTH The name is misleading, this actually indicates that ieee80211_chanctx_conf::min_def was updated. Rename it to IEEE80211_CHANCTX_CHANGE_MIN_DEF. Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20241007144851.726b5f12ae0c.I3bd9e594c9d2735183ec049a4c7224bd0a9599c9@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 4 ++-- include/net/mac80211.h | 4 ++-- net/mac80211/chan.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index a327893c6dce..11cd1a8fdd9e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -5067,7 +5067,7 @@ void iwl_mvm_change_chanctx(struct ieee80211_hw *hw, (changed & ~(IEEE80211_CHANCTX_CHANGE_WIDTH | IEEE80211_CHANCTX_CHANGE_RX_CHAINS | IEEE80211_CHANCTX_CHANGE_RADAR | - IEEE80211_CHANCTX_CHANGE_MIN_WIDTH)), + IEEE80211_CHANCTX_CHANGE_MIN_DEF)), "Cannot change PHY. Ref=%d, changed=0x%X\n", phy_ctxt->ref, changed)) return; @@ -5075,7 +5075,7 @@ void iwl_mvm_change_chanctx(struct ieee80211_hw *hw, guard(mvm)(mvm); /* we are only changing the min_width, may be a noop */ - if (changed == IEEE80211_CHANCTX_CHANGE_MIN_WIDTH) { + if (changed == IEEE80211_CHANCTX_CHANGE_MIN_DEF) { if (phy_ctxt->width == def->width) return; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 0b8df8ec5a3b..c42ad5a0c303 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -213,7 +213,7 @@ struct ieee80211_low_level_stats { * @IEEE80211_CHANCTX_CHANGE_RADAR: radar detection flag changed * @IEEE80211_CHANCTX_CHANGE_CHANNEL: switched to another operating channel, * this is used only with channel switching with CSA - * @IEEE80211_CHANCTX_CHANGE_MIN_WIDTH: The min required channel width changed + * @IEEE80211_CHANCTX_CHANGE_MIN_DEF: The min chandef changed * @IEEE80211_CHANCTX_CHANGE_AP: The AP channel definition changed, so (wider * bandwidth) OFDMA settings need to be changed * @IEEE80211_CHANCTX_CHANGE_PUNCTURING: The punctured channel(s) bitmap @@ -224,7 +224,7 @@ enum ieee80211_chanctx_change { IEEE80211_CHANCTX_CHANGE_RX_CHAINS = BIT(1), IEEE80211_CHANCTX_CHANGE_RADAR = BIT(2), IEEE80211_CHANCTX_CHANGE_CHANNEL = BIT(3), - IEEE80211_CHANCTX_CHANGE_MIN_WIDTH = BIT(4), + IEEE80211_CHANCTX_CHANGE_MIN_DEF = BIT(4), IEEE80211_CHANCTX_CHANGE_AP = BIT(5), IEEE80211_CHANCTX_CHANGE_PUNCTURING = BIT(6), }; diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index a155e418d26b..3f7df45b0431 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -409,7 +409,7 @@ _ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local, if (!ctx->driver_present) return 0; - return IEEE80211_CHANCTX_CHANGE_MIN_WIDTH; + return IEEE80211_CHANCTX_CHANGE_MIN_DEF; } static void ieee80211_chan_bw_change(struct ieee80211_local *local, -- cgit v1.2.3 From e21dd758cf4c51d508e6665b653c5836103d1027 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Mon, 7 Oct 2024 15:00:48 +0300 Subject: wifi: mac80211: make bss_param_ch_cnt available for the low level driver Drivers may need to track this. Make it available for them, and maintain the value when beacons are received. When link X receives a beacon, iterate the RNR elements and update all the links with their respective data. Track the link id that updated the data so that each link can know whether the update came from its own beacon or from another link. In case, the update came from the link's own beacon, always update the updater link id. The purpose is to let the low level driver know if a link is losing its beacons. If link X is losing its beacons, it can still track the bss_param_ch_cnt and know where the update came from. Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20241007144851.e2d8d1a722ad.I04b883daba2cd48e5730659eb62ca1614c899cbb@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 15 ++++++++ net/mac80211/ieee80211_i.h | 2 - net/mac80211/mlme.c | 93 +++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 106 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index c42ad5a0c303..20562676e9cd 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -740,6 +740,19 @@ struct ieee80211_parsed_tpe { * @eht_80mhz_full_bw_ul_mumimo: in AP-mode, does this BSS support the * reception of an EHT TB PPDU on an RU that spans the entire PPDU * bandwidth + * @bss_param_ch_cnt: in BSS-mode, the BSS params change count. This + * information is the latest known value. It can come from this link's + * beacon or from a beacon sent by another link. + * @bss_param_ch_cnt_link_id: in BSS-mode, the link_id to which the beacon + * that updated &bss_param_ch_cnt belongs. E.g. if link 1 doesn't hear + * its beacons, and link 2 sent a beacon with an RNR element that updated + * link 1's BSS params change count, then, link 1's + * bss_param_ch_cnt_link_id will be 2. That means that link 1 knows that + * link 2 was the link that updated its bss_param_ch_cnt value. + * In case link 1 hears its beacon again, bss_param_ch_cnt_link_id will + * be updated to 1, even if bss_param_ch_cnt didn't change. This allows + * the link to know that it heard the latest value from its own beacon + * (as opposed to hearing its value from another link's beacon). */ struct ieee80211_bss_conf { struct ieee80211_vif *vif; @@ -834,6 +847,8 @@ struct ieee80211_bss_conf { bool eht_su_beamformee; bool eht_mu_beamformer; bool eht_80mhz_full_bw_ul_mumimo; + u8 bss_param_ch_cnt; + u8 bss_param_ch_cnt_link_id; }; /** diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index afb867dc6b24..d884d05826d3 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1015,8 +1015,6 @@ struct ieee80211_link_data_managed { int wmm_last_param_set; int mu_edca_last_param_set; - - u8 bss_param_ch_cnt; }; struct ieee80211_link_data_ap { diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 0303972c23e4..93a11a59339c 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2643,6 +2643,89 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, &ifmgd->csa_connection_drop_work); } +struct sta_bss_param_ch_cnt_data { + struct ieee80211_sub_if_data *sdata; + u8 reporting_link_id; + u8 mld_id; +}; + +static enum cfg80211_rnr_iter_ret +ieee80211_sta_bss_param_ch_cnt_iter(void *_data, u8 type, + const struct ieee80211_neighbor_ap_info *info, + const u8 *tbtt_info, u8 tbtt_info_len) +{ + struct sta_bss_param_ch_cnt_data *data = _data; + struct ieee80211_sub_if_data *sdata = data->sdata; + const struct ieee80211_tbtt_info_ge_11 *ti; + u8 bss_param_ch_cnt; + int link_id; + + if (type != IEEE80211_TBTT_INFO_TYPE_TBTT) + return RNR_ITER_CONTINUE; + + if (tbtt_info_len < sizeof(*ti)) + return RNR_ITER_CONTINUE; + + ti = (const void *)tbtt_info; + + if (ti->mld_params.mld_id != data->mld_id) + return RNR_ITER_CONTINUE; + + link_id = le16_get_bits(ti->mld_params.params, + IEEE80211_RNR_MLD_PARAMS_LINK_ID); + bss_param_ch_cnt = + le16_get_bits(ti->mld_params.params, + IEEE80211_RNR_MLD_PARAMS_BSS_CHANGE_COUNT); + + if (bss_param_ch_cnt != 255 && + link_id < ARRAY_SIZE(sdata->link)) { + struct ieee80211_link_data *link = + sdata_dereference(sdata->link[link_id], sdata); + + if (link && link->conf->bss_param_ch_cnt != bss_param_ch_cnt) { + link->conf->bss_param_ch_cnt = bss_param_ch_cnt; + link->conf->bss_param_ch_cnt_link_id = + data->reporting_link_id; + } + } + + return RNR_ITER_CONTINUE; +} + +static void +ieee80211_mgd_update_bss_param_ch_cnt(struct ieee80211_sub_if_data *sdata, + struct ieee80211_bss_conf *bss_conf, + struct ieee802_11_elems *elems) +{ + struct sta_bss_param_ch_cnt_data data = { + .reporting_link_id = bss_conf->link_id, + .sdata = sdata, + }; + int bss_param_ch_cnt; + + if (!elems->ml_basic) + return; + + data.mld_id = ieee80211_mle_get_mld_id((const void *)elems->ml_basic); + + cfg80211_iter_rnr(elems->ie_start, elems->total_len, + ieee80211_sta_bss_param_ch_cnt_iter, &data); + + bss_param_ch_cnt = + ieee80211_mle_get_bss_param_ch_cnt((const void *)elems->ml_basic); + + /* + * Update bss_param_ch_cnt_link_id even if bss_param_ch_cnt + * didn't change to indicate that we got a beacon on our own + * link. + */ + if (bss_param_ch_cnt >= 0 && bss_param_ch_cnt != 255) { + bss_conf->bss_param_ch_cnt = bss_param_ch_cnt; + bss_conf->bss_param_ch_cnt_link_id = + bss_conf->link_id; + } +} + static bool ieee80211_find_80211h_pwr_constr(struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *channel, @@ -4667,7 +4750,8 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, ret = false; goto out; } - link->u.mgd.bss_param_ch_cnt = bss_param_ch_cnt; + bss_conf->bss_param_ch_cnt = bss_param_ch_cnt; + bss_conf->bss_param_ch_cnt_link_id = link_id; } } else if (elems->parse_error & IEEE80211_PARSE_ERR_DUP_NEST_ML_BASIC || !elems->prof || @@ -4677,6 +4761,7 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, } else { const u8 *ptr = elems->prof->variable + elems->prof->sta_info_len - 1; + int bss_param_ch_cnt; /* * During parsing, we validated that these fields exist, @@ -4684,8 +4769,10 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, */ capab_info = get_unaligned_le16(ptr); assoc_data->link[link_id].status = get_unaligned_le16(ptr + 2); - link->u.mgd.bss_param_ch_cnt = + bss_param_ch_cnt = ieee80211_mle_basic_sta_prof_bss_param_ch_cnt(elems->prof); + bss_conf->bss_param_ch_cnt = bss_param_ch_cnt; + bss_conf->bss_param_ch_cnt_link_id = link_id; if (assoc_data->link[link_id].status != WLAN_STATUS_SUCCESS) { link_info(link, "association response status code=%u\n", @@ -6913,6 +7000,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, /* note that after this elems->ml_basic can no longer be used fully */ ieee80211_mgd_check_cross_link_csa(sdata, rx_status->link_id, elems); + ieee80211_mgd_update_bss_param_ch_cnt(sdata, bss_conf, elems); + if (!link->u.mgd.disable_wmm_tracking && ieee80211_sta_wmm_params(local, link, elems->wmm_param, elems->wmm_param_len, -- cgit v1.2.3 From 88b67e91e2928c3311f3658d1270b40708b0de00 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 7 Oct 2024 15:00:54 +0300 Subject: wifi: mac80211: call rate_control_rate_update() for link STA In order to update the right link information, call the update rate_control_rate_update() with the right link_sta, and then pass that through to the driver's sta_rc_update() method. The software rate control still doesn't support it, but that'll be skipped by not having a rate control ref. Since it now operates on a link sta, rename the driver method. Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20241007144851.5851b6b5fd41.Ibdf50d96afa4b761dd9b9dfd54a1147e77a75329@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath10k/mac.c | 5 +++-- drivers/net/wireless/ath/ath11k/mac.c | 5 +++-- drivers/net/wireless/ath/ath12k/mac.c | 5 +++-- drivers/net/wireless/ath/ath9k/htc_drv_main.c | 6 ++++-- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 5 +++-- drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 2 +- drivers/net/wireless/mediatek/mt76/mt7915/main.c | 5 +++-- drivers/net/wireless/mediatek/mt76/mt7996/main.c | 5 +++-- drivers/net/wireless/realtek/rtw88/mac80211.c | 6 ++++-- drivers/net/wireless/realtek/rtw89/mac80211.c | 6 ++++-- drivers/net/wireless/ti/wlcore/main.c | 5 +++-- drivers/net/wireless/virtual/mac80211_hwsim.c | 8 ++++---- include/net/mac80211.h | 12 ++++++------ net/mac80211/chan.c | 2 +- net/mac80211/driver-ops.c | 15 ++++++++------- net/mac80211/driver-ops.h | 6 +++--- net/mac80211/ibss.c | 3 ++- net/mac80211/mesh_plink.c | 3 ++- net/mac80211/rate.c | 8 ++++---- net/mac80211/rate.h | 3 +-- net/mac80211/rx.c | 4 ++-- net/mac80211/tdls.c | 3 ++- net/mac80211/trace.h | 15 +++++++++------ net/mac80211/vht.c | 3 +-- 25 files changed, 80 insertions(+), 62 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 646e1737d4c4..41ab83c3d3f7 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -8507,9 +8507,10 @@ exit: static void ath10k_sta_rc_update(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - struct ieee80211_sta *sta, + struct ieee80211_link_sta *link_sta, u32 changed) { + struct ieee80211_sta *sta = link_sta->sta; struct ath10k *ar = hw->priv; struct ath10k_sta *arsta = (struct ath10k_sta *)sta->drv_priv; struct ath10k_vif *arvif = (void *)vif->drv_priv; @@ -9450,7 +9451,7 @@ static const struct ieee80211_ops ath10k_ops = { .reconfig_complete = ath10k_reconfig_complete, .get_survey = ath10k_get_survey, .set_bitrate_mask = ath10k_mac_op_set_bitrate_mask, - .sta_rc_update = ath10k_sta_rc_update, + .link_sta_rc_update = ath10k_sta_rc_update, .offset_tsf = ath10k_offset_tsf, .ampdu_action = ath10k_ampdu_action, .get_et_sset_count = ath10k_debug_get_et_sset_count, diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index f8068d2e848c..e6acbff06749 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -5079,9 +5079,10 @@ static void ath11k_mac_op_sta_set_4addr(struct ieee80211_hw *hw, static void ath11k_mac_op_sta_rc_update(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - struct ieee80211_sta *sta, + struct ieee80211_link_sta *link_sta, u32 changed) { + struct ieee80211_sta *sta = link_sta->sta; struct ath11k *ar = hw->priv; struct ath11k_sta *arsta = ath11k_sta_to_arsta(sta); struct ath11k_vif *arvif = ath11k_vif_to_arvif(vif); @@ -9708,7 +9709,7 @@ static const struct ieee80211_ops ath11k_ops = { .sta_state = ath11k_mac_op_sta_state, .sta_set_4addr = ath11k_mac_op_sta_set_4addr, .sta_set_txpwr = ath11k_mac_op_sta_set_txpwr, - .sta_rc_update = ath11k_mac_op_sta_rc_update, + .link_sta_rc_update = ath11k_mac_op_sta_rc_update, .conf_tx = ath11k_mac_op_conf_tx, .set_antenna = ath11k_mac_op_set_antenna, .get_antenna = ath11k_mac_op_get_antenna, diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 137394c36460..acf5628adda5 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -4737,9 +4737,10 @@ out: static void ath12k_mac_op_sta_rc_update(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - struct ieee80211_sta *sta, + struct ieee80211_link_sta *link_sta, u32 changed) { + struct ieee80211_sta *sta = link_sta->sta; struct ath12k *ar; struct ath12k_sta *arsta = ath12k_sta_to_arsta(sta); struct ath12k_vif *arvif = ath12k_vif_to_arvif(vif); @@ -8681,7 +8682,7 @@ static const struct ieee80211_ops ath12k_ops = { .set_rekey_data = ath12k_mac_op_set_rekey_data, .sta_state = ath12k_mac_op_sta_state, .sta_set_txpwr = ath12k_mac_op_sta_set_txpwr, - .sta_rc_update = ath12k_mac_op_sta_rc_update, + .link_sta_rc_update = ath12k_mac_op_sta_rc_update, .conf_tx = ath12k_mac_op_conf_tx, .set_antenna = ath12k_mac_op_set_antenna, .get_antenna = ath12k_mac_op_get_antenna, diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_main.c b/drivers/net/wireless/ath/ath9k/htc_drv_main.c index 8a03bcc2789e..57094bd45d98 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_main.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_main.c @@ -1357,8 +1357,10 @@ static int ath9k_htc_sta_remove(struct ieee80211_hw *hw, static void ath9k_htc_sta_rc_update(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - struct ieee80211_sta *sta, u32 changed) + struct ieee80211_link_sta *link_sta, + u32 changed) { + struct ieee80211_sta *sta = link_sta->sta; struct ath9k_htc_sta *ista = (struct ath9k_htc_sta *) sta->drv_priv; if (!(changed & IEEE80211_RC_SUPP_RATES_CHANGED)) @@ -1883,7 +1885,7 @@ struct ieee80211_ops ath9k_htc_ops = { .sta_add = ath9k_htc_sta_add, .sta_remove = ath9k_htc_sta_remove, .conf_tx = ath9k_htc_conf_tx, - .sta_rc_update = ath9k_htc_sta_rc_update, + .link_sta_rc_update = ath9k_htc_sta_rc_update, .bss_info_changed = ath9k_htc_bss_info_changed, .set_key = ath9k_htc_set_key, .get_tsf = ath9k_htc_get_tsf, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 11cd1a8fdd9e..3721d6349cc5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -4236,8 +4236,9 @@ int iwl_mvm_mac_set_rts_threshold(struct ieee80211_hw *hw, u32 value) } void iwl_mvm_sta_rc_update(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - struct ieee80211_sta *sta, u32 changed) + struct ieee80211_link_sta *link_sta, u32 changed) { + struct ieee80211_sta *sta = link_sta->sta; struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); if (changed & (IEEE80211_RC_BW_CHANGED | @@ -6562,7 +6563,7 @@ const struct ieee80211_ops iwl_mvm_hw_ops = { .allow_buffered_frames = iwl_mvm_mac_allow_buffered_frames, .release_buffered_frames = iwl_mvm_mac_release_buffered_frames, .set_rts_threshold = iwl_mvm_mac_set_rts_threshold, - .sta_rc_update = iwl_mvm_sta_rc_update, + .link_sta_rc_update = iwl_mvm_sta_rc_update, .conf_tx = iwl_mvm_mac_conf_tx, .mgd_prepare_tx = iwl_mvm_mac_mgd_prepare_tx, .mgd_complete_tx = iwl_mvm_mac_mgd_complete_tx, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c index f2378e0fb2fb..7de6c96646ca 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c @@ -1401,7 +1401,7 @@ const struct ieee80211_ops iwl_mvm_mld_hw_ops = { .allow_buffered_frames = iwl_mvm_mac_allow_buffered_frames, .release_buffered_frames = iwl_mvm_mac_release_buffered_frames, .set_rts_threshold = iwl_mvm_mac_set_rts_threshold, - .sta_rc_update = iwl_mvm_sta_rc_update, + .link_sta_rc_update = iwl_mvm_sta_rc_update, .conf_tx = iwl_mvm_mld_mac_conf_tx, .mgd_prepare_tx = iwl_mvm_mac_mgd_prepare_tx, .mgd_complete_tx = iwl_mvm_mac_mgd_complete_tx, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index ef07cff203b0..6246ffce7cf8 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -2914,7 +2914,7 @@ iwl_mvm_mac_release_buffered_frames(struct ieee80211_hw *hw, bool more_data); int iwl_mvm_mac_set_rts_threshold(struct ieee80211_hw *hw, u32 value); void iwl_mvm_sta_rc_update(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - struct ieee80211_sta *sta, u32 changed); + struct ieee80211_link_sta *link_sta, u32 changed); void iwl_mvm_mac_mgd_prepare_tx(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_prep_tx_info *info); diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c index d75e8dea1fbd..c6f498fc81ff 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c @@ -1163,9 +1163,10 @@ static void mt7915_sta_rc_work(void *data, struct ieee80211_sta *sta) static void mt7915_sta_rc_update(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - struct ieee80211_sta *sta, + struct ieee80211_link_sta *link_sta, u32 changed) { + struct ieee80211_sta *sta = link_sta->sta; struct mt7915_phy *phy = mt7915_hw_phy(hw); struct mt7915_dev *dev = phy->dev; struct mt7915_sta *msta = (struct mt7915_sta *)sta->drv_priv; @@ -1709,7 +1710,7 @@ const struct ieee80211_ops mt7915_ops = { .stop_ap = mt7915_stop_ap, .sta_state = mt76_sta_state, .sta_pre_rcu_remove = mt76_sta_pre_rcu_remove, - .sta_rc_update = mt7915_sta_rc_update, + .link_sta_rc_update = mt7915_sta_rc_update, .set_key = mt7915_set_key, .ampdu_action = mt7915_ampdu_action, .set_rts_threshold = mt7915_set_rts_threshold, diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index 39f071ece35e..2b34ae5e0cb5 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -1060,9 +1060,10 @@ static void mt7996_sta_rc_work(void *data, struct ieee80211_sta *sta) static void mt7996_sta_rc_update(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - struct ieee80211_sta *sta, + struct ieee80211_link_sta *link_sta, u32 changed) { + struct ieee80211_sta *sta = link_sta->sta; struct mt7996_phy *phy = mt7996_hw_phy(hw); struct mt7996_dev *dev = phy->dev; @@ -1472,7 +1473,7 @@ const struct ieee80211_ops mt7996_ops = { .sta_add = mt7996_sta_add, .sta_remove = mt7996_sta_remove, .sta_pre_rcu_remove = mt76_sta_pre_rcu_remove, - .sta_rc_update = mt7996_sta_rc_update, + .link_sta_rc_update = mt7996_sta_rc_update, .set_key = mt7996_set_key, .ampdu_action = mt7996_ampdu_action, .set_rts_threshold = mt7996_set_rts_threshold, diff --git a/drivers/net/wireless/realtek/rtw88/mac80211.c b/drivers/net/wireless/realtek/rtw88/mac80211.c index b39e90fb66b4..026fbf4ad9cc 100644 --- a/drivers/net/wireless/realtek/rtw88/mac80211.c +++ b/drivers/net/wireless/realtek/rtw88/mac80211.c @@ -928,8 +928,10 @@ static int rtw_ops_set_sar_specs(struct ieee80211_hw *hw, static void rtw_ops_sta_rc_update(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - struct ieee80211_sta *sta, u32 changed) + struct ieee80211_link_sta *link_sta, + u32 changed) { + struct ieee80211_sta *sta = link_sta->sta; struct rtw_dev *rtwdev = hw->priv; struct rtw_sta_info *si = (struct rtw_sta_info *)sta->drv_priv; @@ -973,7 +975,7 @@ const struct ieee80211_ops rtw_ops = { .reconfig_complete = rtw_reconfig_complete, .hw_scan = rtw_ops_hw_scan, .cancel_hw_scan = rtw_ops_cancel_hw_scan, - .sta_rc_update = rtw_ops_sta_rc_update, + .link_sta_rc_update = rtw_ops_sta_rc_update, .set_sar_specs = rtw_ops_set_sar_specs, #ifdef CONFIG_PM .suspend = rtw_ops_suspend, diff --git a/drivers/net/wireless/realtek/rtw89/mac80211.c b/drivers/net/wireless/realtek/rtw89/mac80211.c index 1ee63a85308f..3f33c3a2ae7d 100644 --- a/drivers/net/wireless/realtek/rtw89/mac80211.c +++ b/drivers/net/wireless/realtek/rtw89/mac80211.c @@ -1290,8 +1290,10 @@ out: static void rtw89_ops_sta_rc_update(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - struct ieee80211_sta *sta, u32 changed) + struct ieee80211_link_sta *link_sta, + u32 changed) { + struct ieee80211_sta *sta = link_sta->sta; struct rtw89_dev *rtwdev = hw->priv; rtw89_phy_ra_update_sta(rtwdev, sta, changed); @@ -1593,7 +1595,7 @@ const struct ieee80211_ops rtw89_ops = { .remain_on_channel = rtw89_ops_remain_on_channel, .cancel_remain_on_channel = rtw89_ops_cancel_remain_on_channel, .set_sar_specs = rtw89_ops_set_sar_specs, - .sta_rc_update = rtw89_ops_sta_rc_update, + .link_sta_rc_update = rtw89_ops_sta_rc_update, .set_tid_config = rtw89_ops_set_tid_config, #ifdef CONFIG_PM .suspend = rtw89_ops_suspend, diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index 0c77b8524160..986b07bfa0ee 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -5789,9 +5789,10 @@ static int wlcore_op_cancel_remain_on_channel(struct ieee80211_hw *hw, static void wlcore_op_sta_rc_update(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - struct ieee80211_sta *sta, + struct ieee80211_link_sta *link_sta, u32 changed) { + struct ieee80211_sta *sta = link_sta->sta; struct wl12xx_vif *wlvif = wl12xx_vif_to_data(vif); wl1271_debug(DEBUG_MAC80211, "mac80211 sta_rc_update"); @@ -6052,7 +6053,7 @@ static const struct ieee80211_ops wl1271_ops = { .assign_vif_chanctx = wlcore_op_assign_vif_chanctx, .unassign_vif_chanctx = wlcore_op_unassign_vif_chanctx, .switch_vif_chanctx = wlcore_op_switch_vif_chanctx, - .sta_rc_update = wlcore_op_sta_rc_update, + .link_sta_rc_update = wlcore_op_sta_rc_update, .sta_statistics = wlcore_op_sta_statistics, .get_expected_throughput = wlcore_op_get_expected_throughput, CFG80211_TESTMODE_CMD(wl1271_tm_cmd) diff --git a/drivers/net/wireless/virtual/mac80211_hwsim.c b/drivers/net/wireless/virtual/mac80211_hwsim.c index f0e528abb1b4..fe3e2dc1626b 100644 --- a/drivers/net/wireless/virtual/mac80211_hwsim.c +++ b/drivers/net/wireless/virtual/mac80211_hwsim.c @@ -2594,10 +2594,11 @@ static void mac80211_hwsim_link_info_changed(struct ieee80211_hw *hw, static void mac80211_hwsim_sta_rc_update(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - struct ieee80211_sta *sta, + struct ieee80211_link_sta *link_sta, u32 changed) { struct mac80211_hwsim_data *data = hw->priv; + struct ieee80211_sta *sta = link_sta->sta; u32 bw = U32_MAX; int link_id; @@ -2607,7 +2608,6 @@ mac80211_hwsim_sta_rc_update(struct ieee80211_hw *hw, link_id++) { enum nl80211_chan_width confbw = NL80211_CHAN_WIDTH_20_NOHT; struct ieee80211_bss_conf *vif_conf; - struct ieee80211_link_sta *link_sta; link_sta = rcu_dereference(sta->link[link_id]); @@ -2659,7 +2659,7 @@ static int mac80211_hwsim_sta_add(struct ieee80211_hw *hw, hwsim_check_magic(vif); hwsim_set_sta_magic(sta); - mac80211_hwsim_sta_rc_update(hw, vif, sta, 0); + mac80211_hwsim_sta_rc_update(hw, vif, &sta->deflink, 0); if (sta->valid_links) { WARN(hweight16(sta->valid_links) > 1, @@ -3961,7 +3961,7 @@ out: .link_info_changed = mac80211_hwsim_link_info_changed, \ .tx_last_beacon = mac80211_hwsim_tx_last_beacon, \ .sta_notify = mac80211_hwsim_sta_notify, \ - .sta_rc_update = mac80211_hwsim_sta_rc_update, \ + .link_sta_rc_update = mac80211_hwsim_sta_rc_update, \ .conf_tx = mac80211_hwsim_conf_tx, \ .get_survey = mac80211_hwsim_get_survey, \ CFG80211_TESTMODE_CMD(mac80211_hwsim_testmode_cmd) \ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 20562676e9cd..7a6edc04e212 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4090,8 +4090,8 @@ struct ieee80211_prep_tx_info { * in @sta_state. * The callback can sleep. * - * @sta_rc_update: Notifies the driver of changes to the bitrates that can be - * used to transmit to the station. The changes are advertised with bits + * @link_sta_rc_update: Notifies the driver of changes to the bitrates that can + * be used to transmit to the station. The changes are advertised with bits * from &enum ieee80211_rate_control_changed and the values are reflected * in the station data. This callback should only be used when the driver * uses hardware rate control (%IEEE80211_HW_HAS_RATE_CONTROL) since @@ -4581,10 +4581,10 @@ struct ieee80211_ops { void (*sta_pre_rcu_remove)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta); - void (*sta_rc_update)(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - struct ieee80211_sta *sta, - u32 changed); + void (*link_sta_rc_update)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_link_sta *link_sta, + u32 changed); void (*sta_rate_tbl_update)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta); diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index b355e9af268d..2d7ec557cbd6 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -467,7 +467,7 @@ static void ieee80211_chan_bw_change(struct ieee80211_local *local, continue; link_sta->pub->bandwidth = new_sta_bw; - rate_control_rate_update(local, sband, sta, link_id, + rate_control_rate_update(local, sband, link_sta, IEEE80211_RC_BW_CHANGED); } } diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index fe868b521622..84d048339113 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -181,9 +181,10 @@ int drv_sta_set_txpwr(struct ieee80211_local *local, return ret; } -void drv_sta_rc_update(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, - struct ieee80211_sta *sta, u32 changed) +void drv_link_sta_rc_update(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_link_sta *link_sta, + u32 changed) { sdata = get_bss_sdata(sdata); if (!check_sdata_in_driver(sdata)) @@ -193,10 +194,10 @@ void drv_sta_rc_update(struct ieee80211_local *local, (sdata->vif.type != NL80211_IFTYPE_ADHOC && sdata->vif.type != NL80211_IFTYPE_MESH_POINT)); - trace_drv_sta_rc_update(local, sdata, sta, changed); - if (local->ops->sta_rc_update) - local->ops->sta_rc_update(&local->hw, &sdata->vif, - sta, changed); + trace_drv_link_sta_rc_update(local, sdata, link_sta, changed); + if (local->ops->link_sta_rc_update) + local->ops->link_sta_rc_update(&local->hw, &sdata->vif, + link_sta, changed); trace_drv_return_void(local); } diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 48bc2da728c0..edd1e4d4ad9d 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -594,9 +594,9 @@ int drv_sta_set_txpwr(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct sta_info *sta); -void drv_sta_rc_update(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, - struct ieee80211_sta *sta, u32 changed); +void drv_link_sta_rc_update(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_link_sta *link_sta, u32 changed); static inline void drv_sta_rate_tbl_update(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 08fac295ad5b..a1b4178deccf 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -1072,7 +1072,8 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata, if (sta->sta.deflink.rx_nss != rx_nss) changed |= IEEE80211_RC_NSS_CHANGED; - drv_sta_rc_update(local, sdata, &sta->sta, changed); + drv_link_sta_rc_update(local, sdata, &sta->sta.deflink, + changed); } rcu_read_unlock(); diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index be0700d64549..6ea35c88dc48 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -486,10 +486,11 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata, sta->sta.deflink.bandwidth = IEEE80211_STA_RX_BW_20; } + /* FIXME: this check is wrong without SW rate control */ if (!test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) rate_control_rate_init(&sta->deflink); else - rate_control_rate_update(local, sband, sta, 0, changed); + rate_control_rate_update(local, sband, &sta->deflink, changed); out: spin_unlock_bh(&sta->mesh->plink_lock); } diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 23b4f1af37e0..63fe58311a77 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -113,16 +113,15 @@ void rate_control_tx_status(struct ieee80211_local *local, void rate_control_rate_update(struct ieee80211_local *local, struct ieee80211_supported_band *sband, - struct sta_info *sta, unsigned int link_id, + struct link_sta_info *link_sta, u32 changed) { struct rate_control_ref *ref = local->rate_ctrl; + struct sta_info *sta = link_sta->sta; struct ieee80211_sta *ista = &sta->sta; void *priv_sta = sta->rate_ctrl_priv; struct ieee80211_chanctx_conf *chanctx_conf; - WARN_ON(link_id != 0); - if (ref && ref->ops->rate_update) { rcu_read_lock(); @@ -140,7 +139,8 @@ void rate_control_rate_update(struct ieee80211_local *local, } if (sta->uploaded) - drv_sta_rc_update(local, sta->sdata, &sta->sta, changed); + drv_link_sta_rc_update(local, sta->sdata, link_sta->pub, + changed); } int ieee80211_rate_control_register(const struct rate_control_ops *ops) diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index 8d3c8903b4ae..673aa9efe30b 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -33,8 +33,7 @@ void rate_control_rate_init(struct link_sta_info *link_sta); void rate_control_rate_init_all_links(struct sta_info *sta); void rate_control_rate_update(struct ieee80211_local *local, struct ieee80211_supported_band *sband, - struct sta_info *sta, - unsigned int link_id, + struct link_sta_info *link_sta, u32 changed); static inline void *rate_control_alloc_sta(struct rate_control_ref *ref, diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 694b43091fec..d7b294221d43 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3568,7 +3568,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) sband = rx->local->hw.wiphy->bands[status->band]; - rate_control_rate_update(local, sband, rx->sta, 0, + rate_control_rate_update(local, sband, rx->link_sta, IEEE80211_RC_SMPS_CHANGED); cfg80211_sta_opmode_change_notify(sdata->dev, rx->sta->addr, @@ -3605,7 +3605,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) ieee80211_sta_rx_bw_to_chan_width(rx->link_sta); sta_opmode.changed = STA_OPMODE_MAX_BW_CHANGED; - rate_control_rate_update(local, sband, rx->sta, 0, + rate_control_rate_update(local, sband, rx->link_sta, IEEE80211_RC_BW_CHANGED); cfg80211_sta_opmode_change_notify(sdata->dev, rx->sta->addr, diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index f07b40916485..2f92e7c7f203 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -1342,7 +1342,8 @@ static void iee80211_tdls_recalc_chanctx(struct ieee80211_sub_if_data *sdata, bw = min(bw, ieee80211_sta_cap_rx_bw(&sta->deflink)); if (bw != sta->sta.deflink.bandwidth) { sta->sta.deflink.bandwidth = bw; - rate_control_rate_update(local, sband, sta, 0, + rate_control_rate_update(local, sband, + &sta->deflink, IEEE80211_RC_BW_CHANGED); /* * if a TDLS peer BW was updated, we need to diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index e6f0ce8e5d43..7a4985fc2b16 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -939,31 +939,34 @@ TRACE_EVENT(drv_sta_set_txpwr, ) ); -TRACE_EVENT(drv_sta_rc_update, +TRACE_EVENT(drv_link_sta_rc_update, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, - struct ieee80211_sta *sta, + struct ieee80211_link_sta *link_sta, u32 changed), - TP_ARGS(local, sdata, sta, changed), + TP_ARGS(local, sdata, link_sta, changed), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY STA_ENTRY __field(u32, changed) + __field(u32, link_id) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; - STA_ASSIGN; + STA_NAMED_ASSIGN(link_sta->sta); __entry->changed = changed; + __entry->link_id = link_sta->link_id; ), TP_printk( - LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " changed: 0x%x", - LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->changed + LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " (link %d) changed: 0x%x", + LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->link_id, + __entry->changed ) ); diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index eafe47bf201a..0d439cefb445 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -766,8 +766,7 @@ void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, if (changed > 0) { ieee80211_recalc_min_chandef(sdata, link_sta->link_id); - rate_control_rate_update(local, sband, link_sta->sta, - link_sta->link_id, changed); + rate_control_rate_update(local, sband, link_sta, changed); } } -- cgit v1.2.3 From 751e7489c1d74b94ffffbed619d8fd724eeff4ee Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 7 Oct 2024 15:00:58 +0300 Subject: wifi: mac80211: expose ieee80211_chan_width_to_rx_bw() to drivers Drivers might need to also do this calculation, no point in them duplicating the code. Since it's so simple, just make it an inline. Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20241007144851.af003cb4a088.I8b5d29504b726caae24af6013c65b3daebe842a2@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 27 +++++++++++++++++++++++++++ net/mac80211/ieee80211_i.h | 2 -- net/mac80211/vht.c | 22 ---------------------- 3 files changed, 27 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 7a6edc04e212..32bdda90a4ac 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -7704,6 +7704,33 @@ void ieee80211_set_active_links_async(struct ieee80211_vif *vif, */ void ieee80211_send_teardown_neg_ttlm(struct ieee80211_vif *vif); +/** + * ieee80211_chan_width_to_rx_bw - convert channel width to STA RX bandwidth + * @width: the channel width value to convert + * Return: the STA RX bandwidth value for the channel width + */ +static inline enum ieee80211_sta_rx_bandwidth +ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width) +{ + switch (width) { + default: + WARN_ON_ONCE(1); + fallthrough; + case NL80211_CHAN_WIDTH_20_NOHT: + case NL80211_CHAN_WIDTH_20: + return IEEE80211_STA_RX_BW_20; + case NL80211_CHAN_WIDTH_40: + return IEEE80211_STA_RX_BW_40; + case NL80211_CHAN_WIDTH_80: + return IEEE80211_STA_RX_BW_80; + case NL80211_CHAN_WIDTH_160: + case NL80211_CHAN_WIDTH_80P80: + return IEEE80211_STA_RX_BW_160; + case NL80211_CHAN_WIDTH_320: + return IEEE80211_STA_RX_BW_320; + } +} + /* for older drivers - let's not document these ... */ int ieee80211_emulate_add_chanctx(struct ieee80211_hw *hw, struct ieee80211_chanctx_conf *ctx); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 45987add530f..d20c2e796703 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2194,8 +2194,6 @@ ieee80211_sta_cur_vht_bw(struct link_sta_info *link_sta) return _ieee80211_sta_cur_vht_bw(link_sta, NULL); } void ieee80211_sta_init_nss(struct link_sta_info *link_sta); -enum ieee80211_sta_rx_bandwidth -ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width); enum nl80211_chan_width ieee80211_sta_cap_chan_bw(struct link_sta_info *link_sta); void ieee80211_process_mu_groups(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index 0d439cefb445..6a20fa099190 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -479,28 +479,6 @@ ieee80211_sta_rx_bw_to_chan_width(struct link_sta_info *link_sta) } } -enum ieee80211_sta_rx_bandwidth -ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width) -{ - switch (width) { - case NL80211_CHAN_WIDTH_20_NOHT: - case NL80211_CHAN_WIDTH_20: - return IEEE80211_STA_RX_BW_20; - case NL80211_CHAN_WIDTH_40: - return IEEE80211_STA_RX_BW_40; - case NL80211_CHAN_WIDTH_80: - return IEEE80211_STA_RX_BW_80; - case NL80211_CHAN_WIDTH_160: - case NL80211_CHAN_WIDTH_80P80: - return IEEE80211_STA_RX_BW_160; - case NL80211_CHAN_WIDTH_320: - return IEEE80211_STA_RX_BW_320; - default: - WARN_ON_ONCE(1); - return IEEE80211_STA_RX_BW_20; - } -} - /* FIXME: rename/move - this deals with everything not just VHT */ enum ieee80211_sta_rx_bandwidth _ieee80211_sta_cur_vht_bw(struct link_sta_info *link_sta, -- cgit v1.2.3 From 3607798ad9bdef35ad08489a8239390fccaac6b5 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 9 Oct 2024 10:25:42 +0200 Subject: wifi: cfg80211: add option for vif allowed radios This allows users to prevent a vif from affecting radios other than the configured ones. This can be useful in cases where e.g. an AP is running on one radio, and triggering a scan on another radio should not disturb it. Changing the allowed radios list for a vif is supported, but only while it is down. While it is possible to achieve the same by always explicitly specifying a frequency list for scan requests and ensuring that the wrong channel/band is never accidentally set on an unrelated interface, this change makes multi-radio wiphy setups a lot easier to deal with for CLI users. By itself, this patch only enforces the radio mask for scanning requests and remain-on-channel. Follow-up changes build on this to limit configured frequencies. Signed-off-by: Felix Fietkau Link: https://patch.msgid.link/eefcb218780f71a1549875d149f1196486762756.1728462320.git-series.nbd@nbd.name Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 14 +++++++++++ include/uapi/linux/nl80211.h | 5 ++++ net/wireless/core.c | 2 ++ net/wireless/nl80211.c | 60 ++++++++++++++++++++++++++++++++++++++------ net/wireless/scan.c | 10 +++++--- net/wireless/util.c | 29 +++++++++++++++++++++ 6 files changed, 109 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index c8ce5c2e14f4..95d05e67e69a 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -6221,6 +6221,7 @@ enum ieee80211_ap_reg_power { * entered. * @links.cac_time_ms: CAC time in ms * @valid_links: bitmap describing what elements of @links are valid + * @radio_mask: Bitmask of radios that this interface is allowed to operate on. */ struct wireless_dev { struct wiphy *wiphy; @@ -6333,6 +6334,8 @@ struct wireless_dev { unsigned int cac_time_ms; } links[IEEE80211_MLD_MAX_NUM_LINKS]; u16 valid_links; + + u32 radio_mask; }; static inline const u8 *wdev_address(struct wireless_dev *wdev) @@ -6518,6 +6521,17 @@ static inline bool cfg80211_channel_is_psc(struct ieee80211_channel *chan) bool cfg80211_radio_chandef_valid(const struct wiphy_radio *radio, const struct cfg80211_chan_def *chandef); +/** + * cfg80211_wdev_channel_allowed - Check if the wdev may use the channel + * + * @wdev: the wireless device + * @chan: channel to check + * + * Return: whether or not the wdev may use the channel + */ +bool cfg80211_wdev_channel_allowed(struct wireless_dev *wdev, + struct ieee80211_channel *chan); + /** * ieee80211_get_response_rate - get basic rate for a given rate * diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index f97f5adc8d51..d31ccee99cc7 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2868,6 +2868,9 @@ enum nl80211_commands { * nested item, it contains attributes defined in * &enum nl80211_if_combination_attrs. * + * @NL80211_ATTR_VIF_RADIO_MASK: Bitmask of allowed radios (u32). + * A value of 0 means all radios. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3416,6 +3419,8 @@ enum nl80211_attrs { NL80211_ATTR_WIPHY_RADIOS, NL80211_ATTR_WIPHY_INTERFACE_COMBINATIONS, + NL80211_ATTR_VIF_RADIO_MASK, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/net/wireless/core.c b/net/wireless/core.c index 4c8d8f167409..93d62a1d3a45 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1430,6 +1430,8 @@ void cfg80211_init_wdev(struct wireless_dev *wdev) /* allow mac80211 to determine the timeout */ wdev->ps_timeout = -1; + wdev->radio_mask = BIT(wdev->wiphy->n_radio) - 1; + if ((wdev->iftype == NL80211_IFTYPE_STATION || wdev->iftype == NL80211_IFTYPE_P2P_CLIENT || wdev->iftype == NL80211_IFTYPE_ADHOC) && !wdev->use_4addr) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index fb35c03af34c..a330347dd7a3 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -829,6 +829,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_MLO_TTLM_DLINK] = NLA_POLICY_EXACT_LEN(sizeof(u16) * 8), [NL80211_ATTR_MLO_TTLM_ULINK] = NLA_POLICY_EXACT_LEN(sizeof(u16) * 8), [NL80211_ATTR_ASSOC_SPP_AMSDU] = { .type = NLA_FLAG }, + [NL80211_ATTR_VIF_RADIO_MASK] = { .type = NLA_U32 }, }; /* policy for the key attributes */ @@ -3996,7 +3997,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag nla_put_u32(msg, NL80211_ATTR_GENERATION, rdev->devlist_generation ^ (cfg80211_rdev_list_generation << 2)) || - nla_put_u8(msg, NL80211_ATTR_4ADDR, wdev->use_4addr)) + nla_put_u8(msg, NL80211_ATTR_4ADDR, wdev->use_4addr) || + nla_put_u32(msg, NL80211_ATTR_VIF_RADIO_MASK, wdev->radio_mask)) goto nla_put_failure; if (rdev->ops->get_channel && !wdev->valid_links) { @@ -4312,6 +4314,29 @@ static int nl80211_valid_4addr(struct cfg80211_registered_device *rdev, return -EOPNOTSUPP; } +static int nl80211_parse_vif_radio_mask(struct genl_info *info, + u32 *radio_mask) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct nlattr *attr = info->attrs[NL80211_ATTR_VIF_RADIO_MASK]; + u32 mask, allowed; + + if (!attr) { + *radio_mask = 0; + return 0; + } + + allowed = BIT(rdev->wiphy.n_radio) - 1; + mask = nla_get_u32(attr); + if (mask & ~allowed) + return -EINVAL; + if (!mask) + mask = allowed; + *radio_mask = mask; + + return 1; +} + static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -4319,6 +4344,8 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) int err; enum nl80211_iftype otype, ntype; struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + u32 radio_mask = 0; bool change = false; memset(¶ms, 0, sizeof(params)); @@ -4332,8 +4359,6 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) } if (info->attrs[NL80211_ATTR_MESH_ID]) { - struct wireless_dev *wdev = dev->ieee80211_ptr; - if (ntype != NL80211_IFTYPE_MESH_POINT) return -EINVAL; if (otype != NL80211_IFTYPE_MESH_POINT) @@ -4364,6 +4389,12 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) if (err > 0) change = true; + err = nl80211_parse_vif_radio_mask(info, &radio_mask); + if (err < 0) + return err; + if (err && netif_running(dev)) + return -EBUSY; + if (change) err = cfg80211_change_iface(rdev, dev, ntype, ¶ms); else @@ -4372,11 +4403,11 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) if (!err && params.use_4addr != -1) dev->ieee80211_ptr->use_4addr = params.use_4addr; - if (change && !err) { - struct wireless_dev *wdev = dev->ieee80211_ptr; + if (radio_mask) + wdev->radio_mask = radio_mask; + if (change && !err) nl80211_notify_iface(rdev, wdev, NL80211_CMD_SET_INTERFACE); - } return err; } @@ -4387,6 +4418,7 @@ static int _nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) struct vif_params params; struct wireless_dev *wdev; struct sk_buff *msg; + u32 radio_mask; int err; enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED; @@ -4424,6 +4456,10 @@ static int _nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) if (err < 0) return err; + err = nl80211_parse_vif_radio_mask(info, &radio_mask); + if (err < 0) + return err; + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -4465,6 +4501,9 @@ static int _nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) break; } + if (radio_mask) + wdev->radio_mask = radio_mask; + if (nl80211_send_iface(msg, info->snd_portid, info->snd_seq, 0, rdev, wdev, NL80211_CMD_NEW_INTERFACE) < 0) { nlmsg_free(msg); @@ -9156,6 +9195,9 @@ static bool cfg80211_off_channel_oper_allowed(struct wireless_dev *wdev, lockdep_assert_wiphy(wdev->wiphy); + if (!cfg80211_wdev_channel_allowed(wdev, chan)) + return false; + if (!cfg80211_beaconing_iface_active(wdev)) return true; @@ -9368,7 +9410,8 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) } /* ignore disabled channels */ - if (chan->flags & IEEE80211_CHAN_DISABLED) + if (chan->flags & IEEE80211_CHAN_DISABLED || + !cfg80211_wdev_channel_allowed(wdev, chan)) continue; request->channels[i] = chan; @@ -9388,7 +9431,8 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) chan = &wiphy->bands[band]->channels[j]; - if (chan->flags & IEEE80211_CHAN_DISABLED) + if (chan->flags & IEEE80211_CHAN_DISABLED || + !cfg80211_wdev_channel_allowed(wdev, chan)) continue; request->channels[i] = chan; diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 8ba618f4734f..8e3d46bf4836 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -956,7 +956,8 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev) struct ieee80211_channel *chan = ieee80211_get_channel(&rdev->wiphy, ap->center_freq); - if (!chan || chan->flags & IEEE80211_CHAN_DISABLED) + if (!chan || chan->flags & IEEE80211_CHAN_DISABLED || + !cfg80211_wdev_channel_allowed(rdev_req->wdev, chan)) continue; for (i = 0; i < rdev_req->n_channels; i++) { @@ -3515,9 +3516,12 @@ int cfg80211_wext_siwscan(struct net_device *dev, continue; for (j = 0; j < wiphy->bands[band]->n_channels; j++) { + struct ieee80211_channel *chan; + /* ignore disabled channels */ - if (wiphy->bands[band]->channels[j].flags & - IEEE80211_CHAN_DISABLED) + chan = &wiphy->bands[band]->channels[j]; + if (chan->flags & IEEE80211_CHAN_DISABLED || + !cfg80211_wdev_channel_allowed(creq->wdev, chan)) continue; /* If we have a wireless request structure and the diff --git a/net/wireless/util.c b/net/wireless/util.c index 93a9c32418a6..040d62051eb9 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -2923,3 +2923,32 @@ bool cfg80211_radio_chandef_valid(const struct wiphy_radio *radio, return true; } EXPORT_SYMBOL(cfg80211_radio_chandef_valid); + +bool cfg80211_wdev_channel_allowed(struct wireless_dev *wdev, + struct ieee80211_channel *chan) +{ + struct wiphy *wiphy = wdev->wiphy; + const struct wiphy_radio *radio; + struct cfg80211_chan_def chandef; + u32 radio_mask; + int i; + + radio_mask = wdev->radio_mask; + if (!wiphy->n_radio || radio_mask == BIT(wiphy->n_radio) - 1) + return true; + + cfg80211_chandef_create(&chandef, chan, NL80211_CHAN_HT20); + for (i = 0; i < wiphy->n_radio; i++) { + if (!(radio_mask & BIT(i))) + continue; + + radio = &wiphy->radio[i]; + if (!cfg80211_radio_chandef_valid(radio, &chandef)) + continue; + + return true; + } + + return false; +} +EXPORT_SYMBOL(cfg80211_wdev_channel_allowed); -- cgit v1.2.3 From ebda716ea4da03326ac4d0a71604d18aa8a2e695 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 9 Oct 2024 10:25:45 +0200 Subject: wifi: cfg80211: report per wiphy radio antenna mask With multi-radio devices, each radio typically gets a fixed set of antennas. In order to be able to disable specific antennas for some radios, user space needs to know which antenna mask bits are assigned to which radio. Signed-off-by: Felix Fietkau Link: https://patch.msgid.link/e0a26afa2c88eaa188ec96ec6d17ecac4e827641.1728462320.git-series.nbd@nbd.name Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 ++++ include/uapi/linux/nl80211.h | 3 +++ net/wireless/nl80211.c | 5 +++++ 3 files changed, 12 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 95d05e67e69a..3100733f3e23 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5434,6 +5434,8 @@ struct wiphy_radio_freq_range { * @iface_combinations: Valid interface combinations array, should not * list single interface types. * @n_iface_combinations: number of entries in @iface_combinations array. + * + * @antenna_mask: bitmask of antennas connected to this radio. */ struct wiphy_radio { const struct wiphy_radio_freq_range *freq_range; @@ -5441,6 +5443,8 @@ struct wiphy_radio { const struct ieee80211_iface_combination *iface_combinations; int n_iface_combinations; + + u32 antenna_mask; }; #define CFG80211_HW_TIMESTAMP_ALL_PEERS 0xffff diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index d31ccee99cc7..1b8827f920ff 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -8036,6 +8036,8 @@ enum nl80211_ap_settings_flags { * @NL80211_WIPHY_RADIO_ATTR_INTERFACE_COMBINATION: Supported interface * combination for this radio. Attribute may be present multiple times * and contains attributes defined in &enum nl80211_if_combination_attrs. + * @NL80211_WIPHY_RADIO_ATTR_ANTENNA_MASK: bitmask (u32) of antennas + * connected to this radio. * * @__NL80211_WIPHY_RADIO_ATTR_LAST: Internal * @NL80211_WIPHY_RADIO_ATTR_MAX: Highest attribute @@ -8046,6 +8048,7 @@ enum nl80211_wiphy_radio_attrs { NL80211_WIPHY_RADIO_ATTR_INDEX, NL80211_WIPHY_RADIO_ATTR_FREQ_RANGE, NL80211_WIPHY_RADIO_ATTR_INTERFACE_COMBINATION, + NL80211_WIPHY_RADIO_ATTR_ANTENNA_MASK, /* keep last */ __NL80211_WIPHY_RADIO_ATTR_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a330347dd7a3..aa78f18dd454 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2431,6 +2431,11 @@ static int nl80211_put_radio(struct wiphy *wiphy, struct sk_buff *msg, int idx) if (nla_put_u32(msg, NL80211_WIPHY_RADIO_ATTR_INDEX, idx)) goto nla_put_failure; + if (r->antenna_mask && + nla_put_u32(msg, NL80211_WIPHY_RADIO_ATTR_ANTENNA_MASK, + r->antenna_mask)) + goto nla_put_failure; + for (i = 0; i < r->n_freq_range; i++) { const struct wiphy_radio_freq_range *range = &r->freq_range[i]; -- cgit v1.2.3 From 006a97ceb6732c861c0e2fa3b6a34512caac9354 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 9 Oct 2024 10:25:46 +0200 Subject: wifi: mac80211: remove status->ampdu_delimiter_crc This was never used by any driver, so remove it to free up some space. Signed-off-by: Felix Fietkau Link: https://patch.msgid.link/e6fee6eed49b105261830db1c74f13841fb9616c.1728462320.git-series.nbd@nbd.name Signed-off-by: Johannes Berg --- include/net/mac80211.h | 6 +----- net/mac80211/rx.c | 7 +------ 2 files changed, 2 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 32bdda90a4ac..d4f23224eff9 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1463,8 +1463,6 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * @RX_FLAG_AMPDU_IS_LAST: this subframe is the last subframe of the A-MPDU * @RX_FLAG_AMPDU_DELIM_CRC_ERROR: A delimiter CRC error has been detected * on this subframe - * @RX_FLAG_AMPDU_DELIM_CRC_KNOWN: The delimiter CRC field is known (the CRC - * is stored in the @ampdu_delimiter_crc field) * @RX_FLAG_MIC_STRIPPED: The mic was stripped of this packet. Decryption was * done by the hardware * @RX_FLAG_ONLY_MONITOR: Report frame only to monitor interfaces without @@ -1536,7 +1534,7 @@ enum mac80211_rx_flags { RX_FLAG_AMPDU_LAST_KNOWN = BIT(12), RX_FLAG_AMPDU_IS_LAST = BIT(13), RX_FLAG_AMPDU_DELIM_CRC_ERROR = BIT(14), - RX_FLAG_AMPDU_DELIM_CRC_KNOWN = BIT(15), + /* one free bit at 15 */ RX_FLAG_MACTIME = BIT(16) | BIT(17), RX_FLAG_MACTIME_PLCP_START = 1 << 16, RX_FLAG_MACTIME_START = 2 << 16, @@ -1633,7 +1631,6 @@ enum mac80211_rx_encoding { * @rx_flags: internal RX flags for mac80211 * @ampdu_reference: A-MPDU reference number, must be a different value for * each A-MPDU but the same for each subframe within one A-MPDU - * @ampdu_delimiter_crc: A-MPDU delimiter CRC * @zero_length_psdu_type: radiotap type of the 0-length PSDU * @link_valid: if the link which is identified by @link_id is valid. This flag * is set only when connection is MLO. @@ -1671,7 +1668,6 @@ struct ieee80211_rx_status { s8 signal; u8 chains; s8 chain_signal[IEEE80211_MAX_CHAINS]; - u8 ampdu_delimiter_crc; u8 zero_length_psdu_type; u8 link_valid:1, link_id:4; }; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index d7b294221d43..a34523bbd156 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -508,18 +508,13 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, flags |= IEEE80211_RADIOTAP_AMPDU_IS_LAST; if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_ERROR) flags |= IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_ERR; - if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_KNOWN) - flags |= IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_KNOWN; if (status->flag & RX_FLAG_AMPDU_EOF_BIT_KNOWN) flags |= IEEE80211_RADIOTAP_AMPDU_EOF_KNOWN; if (status->flag & RX_FLAG_AMPDU_EOF_BIT) flags |= IEEE80211_RADIOTAP_AMPDU_EOF; put_unaligned_le16(flags, pos); pos += 2; - if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_KNOWN) - *pos++ = status->ampdu_delimiter_crc; - else - *pos++ = 0; + *pos++ = 0; *pos++ = 0; } -- cgit v1.2.3 From 9c4f830927750a2bf9fd9426a5257f0fdce3b662 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 9 Oct 2024 10:25:47 +0200 Subject: wifi: cfg80211: pass net_device to .set_monitor_channel Preparation for allowing multiple monitor interfaces with different channels on a multi-radio wiphy. Signed-off-by: Felix Fietkau Link: https://patch.msgid.link/35fa652dbfebf93343f8b9a08fdef0467a2a02dc.1728462320.git-series.nbd@nbd.name Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/wil6210/cfg80211.c | 1 + drivers/net/wireless/marvell/libertas/cfg.c | 1 + drivers/net/wireless/microchip/wilc1000/cfg80211.c | 3 ++- include/net/cfg80211.h | 1 + net/mac80211/cfg.c | 1 + net/wireless/chan.c | 3 ++- net/wireless/core.h | 1 + net/wireless/nl80211.c | 2 +- net/wireless/rdev-ops.h | 5 +++-- net/wireless/trace.h | 10 ++++++---- net/wireless/wext-compat.c | 2 +- 11 files changed, 20 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c index e8f1d30a8d73..a1a0a9223e74 100644 --- a/drivers/net/wireless/ath/wil6210/cfg80211.c +++ b/drivers/net/wireless/ath/wil6210/cfg80211.c @@ -1493,6 +1493,7 @@ out: } static int wil_cfg80211_set_channel(struct wiphy *wiphy, + struct net_device *dev, struct cfg80211_chan_def *chandef) { struct wil6210_priv *wil = wiphy_to_wil(wiphy); diff --git a/drivers/net/wireless/marvell/libertas/cfg.c b/drivers/net/wireless/marvell/libertas/cfg.c index afe9bcd3ad46..2e2c193716d9 100644 --- a/drivers/net/wireless/marvell/libertas/cfg.c +++ b/drivers/net/wireless/marvell/libertas/cfg.c @@ -486,6 +486,7 @@ static int lbs_add_wps_enrollee_tlv(u8 *tlv, const u8 *ie, size_t ie_len) */ static int lbs_cfg_set_monitor_channel(struct wiphy *wiphy, + struct net_device *dev, struct cfg80211_chan_def *chandef) { struct lbs_private *priv = wiphy_priv(wiphy); diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c index b0dae6f7c633..e96736cc7259 100644 --- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c +++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c @@ -231,6 +231,7 @@ struct wilc_vif *wilc_get_wl_to_vif(struct wilc *wl) } static int set_channel(struct wiphy *wiphy, + struct net_device *dev, struct cfg80211_chan_def *chandef) { struct wilc *wl = wiphy_priv(wiphy); @@ -1424,7 +1425,7 @@ static int start_ap(struct wiphy *wiphy, struct net_device *dev, struct wilc_vif *vif = netdev_priv(dev); int ret; - ret = set_channel(wiphy, &settings->chandef); + ret = set_channel(wiphy, dev, &settings->chandef); if (ret != 0) netdev_err(dev, "Error in setting channel\n"); diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 3100733f3e23..5feb93ba0400 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4694,6 +4694,7 @@ struct cfg80211_ops { struct ieee80211_channel *chan); int (*set_monitor_channel)(struct wiphy *wiphy, + struct net_device *dev, struct cfg80211_chan_def *chandef); int (*scan)(struct wiphy *wiphy, diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index ecc138869b4b..492349d6f7bb 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -897,6 +897,7 @@ static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev, } static int ieee80211_set_monitor_channel(struct wiphy *wiphy, + struct net_device *dev, struct cfg80211_chan_def *chandef) { struct ieee80211_local *local = wiphy_priv(wiphy); diff --git a/net/wireless/chan.c b/net/wireless/chan.c index afd86f7c66ce..40b6375a5de4 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -1628,6 +1628,7 @@ bool cfg80211_reg_check_beaconing(struct wiphy *wiphy, EXPORT_SYMBOL(cfg80211_reg_check_beaconing); int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev, + struct net_device *dev, struct cfg80211_chan_def *chandef) { if (!rdev->ops->set_monitor_channel) @@ -1635,7 +1636,7 @@ int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev, if (!cfg80211_has_monitors_only(rdev)) return -EBUSY; - return rdev_set_monitor_channel(rdev, chandef); + return rdev_set_monitor_channel(rdev, dev, chandef); } bool cfg80211_any_usable_channels(struct wiphy *wiphy, diff --git a/net/wireless/core.h b/net/wireless/core.h index 3b3e3cd7027a..4c45f994a8c0 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -516,6 +516,7 @@ static inline unsigned int elapsed_jiffies_msecs(unsigned long start) } int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev, + struct net_device *dev, struct cfg80211_chan_def *chandef); int ieee80211_get_ratemask(struct ieee80211_supported_band *sband, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index aa78f18dd454..84015f56e93a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3567,7 +3567,7 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, case NL80211_IFTYPE_MESH_POINT: return cfg80211_set_mesh_channel(rdev, wdev, &chandef); case NL80211_IFTYPE_MONITOR: - return cfg80211_set_monitor_channel(rdev, &chandef); + return cfg80211_set_monitor_channel(rdev, dev, &chandef); default: break; } diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index f5adbf6b5c84..adb6105bbb7d 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -445,11 +445,12 @@ rdev_libertas_set_mesh_channel(struct cfg80211_registered_device *rdev, static inline int rdev_set_monitor_channel(struct cfg80211_registered_device *rdev, + struct net_device *dev, struct cfg80211_chan_def *chandef) { int ret; - trace_rdev_set_monitor_channel(&rdev->wiphy, chandef); - ret = rdev->ops->set_monitor_channel(&rdev->wiphy, chandef); + trace_rdev_set_monitor_channel(&rdev->wiphy, dev, chandef); + ret = rdev->ops->set_monitor_channel(&rdev->wiphy, dev, chandef); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 97c21b627791..d5c9bb614fa6 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1318,19 +1318,21 @@ TRACE_EVENT(rdev_libertas_set_mesh_channel, ); TRACE_EVENT(rdev_set_monitor_channel, - TP_PROTO(struct wiphy *wiphy, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_chan_def *chandef), - TP_ARGS(wiphy, chandef), + TP_ARGS(wiphy, netdev, chandef), TP_STRUCT__entry( WIPHY_ENTRY + NETDEV_ENTRY CHAN_DEF_ENTRY ), TP_fast_assign( WIPHY_ASSIGN; + NETDEV_ASSIGN; CHAN_DEF_ASSIGN(chandef); ), - TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT, - WIPHY_PR_ARG, CHAN_DEF_PR_ARG) + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT, + WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG) ); TRACE_EVENT(rdev_auth, diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 0c8d3797a02e..90d5c0592667 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -821,7 +821,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, ret = -EINVAL; break; } - ret = cfg80211_set_monitor_channel(rdev, &chandef); + ret = cfg80211_set_monitor_channel(rdev, dev, &chandef); break; case NL80211_IFTYPE_MESH_POINT: freq = cfg80211_wext_freq(wextfreq); -- cgit v1.2.3 From 9d40f7e32774279be7e5a7a278d7a290872b2f81 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 9 Oct 2024 10:25:48 +0200 Subject: wifi: mac80211: add flag to opt out of virtual monitor support This is useful for multi-radio devices that are capable of monitoring on multiple channels simultanenously. When this flag is set, each monitor interface is passed to the driver individually and can have a configured channel. The vif mac address for non-active monitor interfaces is cleared, in order to allow the driver to tell them apart from active ones. Signed-off-by: Felix Fietkau Link: https://patch.msgid.link/3c55505ee0cf0a5f141fbcb30d1e8be8d9f40373.1728462320.git-series.nbd@nbd.name Signed-off-by: Johannes Berg --- include/net/mac80211.h | 6 ++++++ net/mac80211/cfg.c | 44 ++++++++++++++++++++++++++++---------------- net/mac80211/chan.c | 14 +++++++++++++- net/mac80211/debugfs.c | 1 + net/mac80211/driver-ops.c | 1 + net/mac80211/iface.c | 22 +++++++++++++++++----- net/mac80211/rx.c | 3 +++ net/mac80211/tx.c | 6 ++++-- net/mac80211/util.c | 14 ++++++++++---- 9 files changed, 83 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index d4f23224eff9..b4f246cdcca4 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2694,6 +2694,11 @@ struct ieee80211_txq { * a virtual monitor interface when monitor interfaces are the only * active interfaces. * + * @IEEE80211_HW_NO_VIRTUAL_MONITOR: The driver would like to be informed + * of any monitor interface, as well as their configured channel. + * This is useful for supporting multiple monitor interfaces on different + * channels. + * * @IEEE80211_HW_NO_AUTO_VIF: The driver would like for no wlanX to * be created. It is expected user-space will create vifs as * desired (and thus have them named as desired). @@ -2853,6 +2858,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_SUPPORTS_DYNAMIC_PS, IEEE80211_HW_MFP_CAPABLE, IEEE80211_HW_WANT_MONITOR_VIF, + IEEE80211_HW_NO_VIRTUAL_MONITOR, IEEE80211_HW_NO_AUTO_VIF, IEEE80211_HW_SW_CRYPTO_CONTROL, IEEE80211_HW_SUPPORT_FAST_XMIT, diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 492349d6f7bb..6c0b228523cb 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -105,8 +105,11 @@ static int ieee80211_set_mon_options(struct ieee80211_sub_if_data *sdata, } /* also validate MU-MIMO change */ - monitor_sdata = wiphy_dereference(local->hw.wiphy, - local->monitor_sdata); + if (ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) + monitor_sdata = sdata; + else + monitor_sdata = wiphy_dereference(local->hw.wiphy, + local->monitor_sdata); if (!monitor_sdata && (params->vht_mumimo_groups || params->vht_mumimo_follow_addr)) @@ -114,7 +117,9 @@ static int ieee80211_set_mon_options(struct ieee80211_sub_if_data *sdata, /* apply all changes now - no failures allowed */ - if (monitor_sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) + if (monitor_sdata && + (ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF) || + ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR))) ieee80211_set_mu_mimo_follow(monitor_sdata, params); if (params->flags) { @@ -907,22 +912,25 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy, lockdep_assert_wiphy(local->hw.wiphy); - if (cfg80211_chandef_identical(&local->monitor_chanreq.oper, - &chanreq.oper)) - return 0; + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + if (!ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) { + if (cfg80211_chandef_identical(&local->monitor_chanreq.oper, + &chanreq.oper)) + return 0; - sdata = wiphy_dereference(local->hw.wiphy, - local->monitor_sdata); - if (!sdata) - goto done; + sdata = wiphy_dereference(wiphy, local->monitor_sdata); + if (!sdata) + goto done; + } - if (cfg80211_chandef_identical(&sdata->vif.bss_conf.chanreq.oper, + if (rcu_access_pointer(sdata->deflink.conf->chanctx_conf) && + cfg80211_chandef_identical(&sdata->vif.bss_conf.chanreq.oper, &chanreq.oper)) return 0; ieee80211_link_release_channel(&sdata->deflink); ret = ieee80211_link_use_channel(&sdata->deflink, &chanreq, - IEEE80211_CHANCTX_EXCLUSIVE); + IEEE80211_CHANCTX_SHARED); if (ret) return ret; done: @@ -3084,7 +3092,8 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, if (wdev) { sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); - if (sdata->vif.type == NL80211_IFTYPE_MONITOR) { + if (sdata->vif.type == NL80211_IFTYPE_MONITOR && + !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) { if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) return -EOPNOTSUPP; @@ -3118,7 +3127,8 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, local->user_power_level = user_power_level; list_for_each_entry(sdata, &local->interfaces, list) { - if (sdata->vif.type == NL80211_IFTYPE_MONITOR) { + if (sdata->vif.type == NL80211_IFTYPE_MONITOR && + !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) { has_monitor = true; continue; } @@ -3139,7 +3149,8 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, } } list_for_each_entry(sdata, &local->interfaces, list) { - if (sdata->vif.type == NL80211_IFTYPE_MONITOR) + if (sdata->vif.type == NL80211_IFTYPE_MONITOR && + !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) continue; for (int link_id = 0; @@ -4342,7 +4353,8 @@ static int ieee80211_cfg_get_channel(struct wiphy *wiphy, if (chanctx_conf) { *chandef = link->conf->chanreq.oper; ret = 0; - } else if (local->open_count > 0 && + } else if (!ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR) && + local->open_count > 0 && local->open_count == local->monitors && sdata->vif.type == NL80211_IFTYPE_MONITOR) { *chandef = local->monitor_chanreq.oper; diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 996965005d49..a442cb667520 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -347,6 +347,10 @@ ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local, case NL80211_IFTYPE_P2P_DEVICE: case NL80211_IFTYPE_NAN: continue; + case NL80211_IFTYPE_MONITOR: + WARN_ON_ONCE(!ieee80211_hw_check(&local->hw, + NO_VIRTUAL_MONITOR)); + fallthrough; case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_OCB: @@ -355,7 +359,6 @@ ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local, case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: - case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_P2P_GO: WARN_ON_ONCE(1); @@ -964,6 +967,10 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, if (!link->sdata->u.mgd.associated) continue; break; + case NL80211_IFTYPE_MONITOR: + if (!ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) + continue; + break; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_MESH_POINT: @@ -976,6 +983,11 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, if (rcu_access_pointer(link->conf->chanctx_conf) != &chanctx->conf) continue; + if (link->sdata->vif.type == NL80211_IFTYPE_MONITOR) { + rx_chains_dynamic = rx_chains_static = local->rx_chains; + break; + } + switch (link->smps_mode) { default: WARN_ONCE(1, "Invalid SMPS mode %d\n", diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 02b5476a4376..b777240c924e 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -456,6 +456,7 @@ static const char *hw_flag_names[] = { FLAG(SUPPORTS_DYNAMIC_PS), FLAG(MFP_CAPABLE), FLAG(WANT_MONITOR_VIF), + FLAG(NO_VIRTUAL_MONITOR), FLAG(NO_AUTO_VIF), FLAG(SW_CRYPTO_CONTROL), FLAG(SUPPORT_FAST_XMIT), diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index 84d048339113..299d38e9e863 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -65,6 +65,7 @@ int drv_add_interface(struct ieee80211_local *local, if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_AP_VLAN || (sdata->vif.type == NL80211_IFTYPE_MONITOR && !ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF) && + !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR) && !(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE)))) return -EINVAL; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 7a99fa057cd9..683cc50cc266 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -279,8 +279,13 @@ static int _ieee80211_change_mac(struct ieee80211_sub_if_data *sdata, ret = eth_mac_addr(sdata->dev, sa); if (ret == 0) { - memcpy(sdata->vif.addr, sa->sa_data, ETH_ALEN); - ether_addr_copy(sdata->vif.bss_conf.addr, sdata->vif.addr); + if (check_dup) { + memcpy(sdata->vif.addr, sa->sa_data, ETH_ALEN); + ether_addr_copy(sdata->vif.bss_conf.addr, sdata->vif.addr); + } else { + memset(sdata->vif.addr, 0, ETH_ALEN); + memset(sdata->vif.bss_conf.addr, 0, ETH_ALEN); + } } /* Regardless of eth_mac_addr() return we still want to add the @@ -699,9 +704,11 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do ieee80211_recalc_idle(local); ieee80211_recalc_offload(local); - if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE)) + if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) && + !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) break; + ieee80211_link_release_channel(&sdata->deflink); fallthrough; default: if (!going_down) @@ -1131,7 +1138,8 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local) ASSERT_RTNL(); lockdep_assert_wiphy(local->hw.wiphy); - if (local->monitor_sdata) + if (local->monitor_sdata || + ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) return 0; sdata = kzalloc(sizeof(*sdata) + local->hw.vif_data_size, GFP_KERNEL); @@ -1193,6 +1201,9 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; + if (ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) + return; + ASSERT_RTNL(); lockdep_assert_wiphy(local->hw.wiphy); @@ -1328,7 +1339,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) break; } - if (sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) { + if ((sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) || + ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) { res = drv_add_interface(local, sdata); if (res) goto err_stop; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index a34523bbd156..d032bfb00ade 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -840,6 +840,9 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, bool last_monitor = list_is_last(&sdata->u.mntr.list, &local->mon_list); + if (ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) + ieee80211_handle_mu_mimo_mon(sdata, origskb, rtap_space); + if (!monskb) monskb = ieee80211_make_monitor_skb(local, &origskb, rate, rtap_space, diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index f10379bef9fb..a24636bda679 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1763,7 +1763,8 @@ static bool __ieee80211_tx(struct ieee80211_local *local, switch (sdata->vif.type) { case NL80211_IFTYPE_MONITOR: - if (sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) { + if ((sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) || + ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) { vif = &sdata->vif; break; } @@ -3952,7 +3953,8 @@ begin: switch (tx.sdata->vif.type) { case NL80211_IFTYPE_MONITOR: - if (tx.sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) { + if ((tx.sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) || + ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) { vif = &tx.sdata->vif; break; } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index bd93de637f94..a4e1301cc999 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -756,7 +756,8 @@ static void __iterate_interfaces(struct ieee80211_local *local, lockdep_is_held(&local->hw.wiphy->mtx)) { switch (sdata->vif.type) { case NL80211_IFTYPE_MONITOR: - if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE)) + if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) && + !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) continue; break; case NL80211_IFTYPE_AP_VLAN: @@ -1873,8 +1874,10 @@ int ieee80211_reconfig(struct ieee80211_local *local) } list_for_each_entry(sdata, &local->interfaces, list) { + if (sdata->vif.type == NL80211_IFTYPE_MONITOR && + !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) + continue; if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN && - sdata->vif.type != NL80211_IFTYPE_MONITOR && ieee80211_sdata_running(sdata)) { res = drv_add_interface(local, sdata); if (WARN_ON(res)) @@ -1887,11 +1890,14 @@ int ieee80211_reconfig(struct ieee80211_local *local) */ if (res) { list_for_each_entry_continue_reverse(sdata, &local->interfaces, - list) + list) { + if (sdata->vif.type == NL80211_IFTYPE_MONITOR && + !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) + continue; if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN && - sdata->vif.type != NL80211_IFTYPE_MONITOR && ieee80211_sdata_running(sdata)) drv_remove_interface(local, sdata); + } ieee80211_handle_reconfig_failure(local); return res; } -- cgit v1.2.3 From a77e527b470cc38754c730bce1483711f643bb60 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 9 Oct 2024 10:25:49 +0200 Subject: wifi: cfg80211: add monitor SKIP_TX flag This can be used to indicate that the user is not interested in receiving locally sent packets on the monitor interface. Signed-off-by: Felix Fietkau Link: https://patch.msgid.link/f0c20f832eadd36c71fba9a2a16ba57d78389b6c.1728462320.git-series.nbd@nbd.name Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ include/uapi/linux/nl80211.h | 2 ++ net/wireless/nl80211.c | 1 + 3 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 5feb93ba0400..8f9853b1a5d1 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2267,6 +2267,7 @@ static inline int cfg80211_get_station(struct net_device *dev, * @MONITOR_FLAG_OTHER_BSS: disable BSSID filtering * @MONITOR_FLAG_COOK_FRAMES: report frames after processing * @MONITOR_FLAG_ACTIVE: active monitor, ACKs frames on its MAC address + * @MONITOR_FLAG_SKIP_TX: do not pass locally transmitted frames */ enum monitor_flags { MONITOR_FLAG_CHANGED = BIT(__NL80211_MNTR_FLAG_INVALID), @@ -2276,6 +2277,7 @@ enum monitor_flags { MONITOR_FLAG_OTHER_BSS = BIT(NL80211_MNTR_FLAG_OTHER_BSS), MONITOR_FLAG_COOK_FRAMES = BIT(NL80211_MNTR_FLAG_COOK_FRAMES), MONITOR_FLAG_ACTIVE = BIT(NL80211_MNTR_FLAG_ACTIVE), + MONITOR_FLAG_SKIP_TX = BIT(NL80211_MNTR_FLAG_SKIP_TX), }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 1b8827f920ff..6d11437596b9 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4703,6 +4703,7 @@ enum nl80211_survey_info { * overrides all other flags. * @NL80211_MNTR_FLAG_ACTIVE: use the configured MAC address * and ACK incoming unicast packets. + * @NL80211_MNTR_FLAG_SKIP_TX: do not pass local tx packets * * @__NL80211_MNTR_FLAG_AFTER_LAST: internal use * @NL80211_MNTR_FLAG_MAX: highest possible monitor flag @@ -4715,6 +4716,7 @@ enum nl80211_mntr_flags { NL80211_MNTR_FLAG_OTHER_BSS, NL80211_MNTR_FLAG_COOK_FRAMES, NL80211_MNTR_FLAG_ACTIVE, + NL80211_MNTR_FLAG_SKIP_TX, /* keep last */ __NL80211_MNTR_FLAG_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 84015f56e93a..4a8c3b6d49d1 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4206,6 +4206,7 @@ static const struct nla_policy mntr_flags_policy[NL80211_MNTR_FLAG_MAX + 1] = { [NL80211_MNTR_FLAG_OTHER_BSS] = { .type = NLA_FLAG }, [NL80211_MNTR_FLAG_COOK_FRAMES] = { .type = NLA_FLAG }, [NL80211_MNTR_FLAG_ACTIVE] = { .type = NLA_FLAG }, + [NL80211_MNTR_FLAG_SKIP_TX] = { .type = NLA_FLAG }, }; static int parse_monitor_flags(struct nlattr *nla, u32 *mntrflags) -- cgit v1.2.3 From 8dc6d81c6b2acc434b00c4585f0594739031c4e4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 22 Oct 2024 15:18:34 +0200 Subject: debugfs: add small file operations for most files As struct file_operations is really big, but (most) debugfs files only use simple_open, read, write and perhaps seek, and don't need anything else, this wastes a lot of space for NULL pointers. Add a struct debugfs_short_fops and some bookkeeping code in debugfs so that users can use that with debugfs_create_file() using _Generic to figure out which function to use. Converting mac80211 to use it where possible saves quite a bit of space: 1010127 205064 1220 1216411 128f9b net/mac80211/mac80211.ko (before) 981199 205064 1220 1187483 121e9b net/mac80211/mac80211.ko (after) ------- -28928 = ~28KiB With a marginal space cost in debugfs: 8701 550 16 9267 2433 fs/debugfs/inode.o (before) 25233 325 32 25590 63f6 fs/debugfs/file.o (before) 8914 558 16 9488 2510 fs/debugfs/inode.o (after) 25380 325 32 25737 6489 fs/debugfs/file.o (after) --------------- +360 +8 (All on x86-64) A simple spatch suggests there are more than 300 instances, not even counting the ones hidden in macros like in mac80211, that could be trivially converted, for additional savings of about 240 bytes for each. Reviewed-by: Greg Kroah-Hartman Link: https://patch.msgid.link/20241022151838.26f9925fb959.Ia80b55e934bbfc45ce0df42a3233d34b35508046@changeid Signed-off-by: Johannes Berg --- fs/debugfs/file.c | 100 +++++++++++++++++++++++++++++++++--------------- fs/debugfs/inode.c | 63 +++++++++++++----------------- fs/debugfs/internal.h | 6 +++ include/linux/debugfs.h | 62 ++++++++++++++++++++++++++++-- 4 files changed, 160 insertions(+), 71 deletions(-) (limited to 'include') diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 67299e8b734e..47dc96dfe386 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -100,8 +100,16 @@ int debugfs_file_get(struct dentry *dentry) if (!fsd) return -ENOMEM; - fsd->real_fops = (void *)((unsigned long)d_fsd & - ~DEBUGFS_FSDATA_IS_REAL_FOPS_BIT); + if ((unsigned long)d_fsd & DEBUGFS_FSDATA_IS_SHORT_FOPS_BIT) { + fsd->real_fops = NULL; + fsd->short_fops = (void *)((unsigned long)d_fsd & + ~(DEBUGFS_FSDATA_IS_REAL_FOPS_BIT | + DEBUGFS_FSDATA_IS_SHORT_FOPS_BIT)); + } else { + fsd->real_fops = (void *)((unsigned long)d_fsd & + ~DEBUGFS_FSDATA_IS_REAL_FOPS_BIT); + fsd->short_fops = NULL; + } refcount_set(&fsd->active_users, 1); init_completion(&fsd->active_users_drained); INIT_LIST_HEAD(&fsd->cancellations); @@ -241,9 +249,10 @@ static int debugfs_locked_down(struct inode *inode, { if ((inode->i_mode & 07777 & ~0444) == 0 && !(filp->f_mode & FMODE_WRITE) && - !real_fops->unlocked_ioctl && - !real_fops->compat_ioctl && - !real_fops->mmap) + (!real_fops || + (!real_fops->unlocked_ioctl && + !real_fops->compat_ioctl && + !real_fops->mmap))) return 0; if (security_locked_down(LOCKDOWN_DEBUGFS)) @@ -316,19 +325,38 @@ static ret_type full_proxy_ ## name(proto) \ return r; \ } -FULL_PROXY_FUNC(llseek, loff_t, filp, - PROTO(struct file *filp, loff_t offset, int whence), - ARGS(filp, offset, whence)); +#define FULL_PROXY_FUNC_BOTH(name, ret_type, filp, proto, args) \ +static ret_type full_proxy_ ## name(proto) \ +{ \ + struct dentry *dentry = F_DENTRY(filp); \ + struct debugfs_fsdata *fsd; \ + ret_type r; \ + \ + r = debugfs_file_get(dentry); \ + if (unlikely(r)) \ + return r; \ + fsd = dentry->d_fsdata; \ + if (fsd->real_fops) \ + r = fsd->real_fops->name(args); \ + else \ + r = fsd->short_fops->name(args); \ + debugfs_file_put(dentry); \ + return r; \ +} + +FULL_PROXY_FUNC_BOTH(llseek, loff_t, filp, + PROTO(struct file *filp, loff_t offset, int whence), + ARGS(filp, offset, whence)); -FULL_PROXY_FUNC(read, ssize_t, filp, - PROTO(struct file *filp, char __user *buf, size_t size, - loff_t *ppos), - ARGS(filp, buf, size, ppos)); +FULL_PROXY_FUNC_BOTH(read, ssize_t, filp, + PROTO(struct file *filp, char __user *buf, size_t size, + loff_t *ppos), + ARGS(filp, buf, size, ppos)); -FULL_PROXY_FUNC(write, ssize_t, filp, - PROTO(struct file *filp, const char __user *buf, size_t size, - loff_t *ppos), - ARGS(filp, buf, size, ppos)); +FULL_PROXY_FUNC_BOTH(write, ssize_t, filp, + PROTO(struct file *filp, const char __user *buf, + size_t size, loff_t *ppos), + ARGS(filp, buf, size, ppos)); FULL_PROXY_FUNC(unlocked_ioctl, long, filp, PROTO(struct file *filp, unsigned int cmd, unsigned long arg), @@ -363,7 +391,7 @@ static int full_proxy_release(struct inode *inode, struct file *filp) * not to leak any resources. Releasers must not assume that * ->i_private is still being meaningful here. */ - if (real_fops->release) + if (real_fops && real_fops->release) r = real_fops->release(inode, filp); replace_fops(filp, d_inode(dentry)->i_fop); @@ -373,39 +401,48 @@ static int full_proxy_release(struct inode *inode, struct file *filp) } static void __full_proxy_fops_init(struct file_operations *proxy_fops, - const struct file_operations *real_fops) + struct debugfs_fsdata *fsd) { proxy_fops->release = full_proxy_release; - if (real_fops->llseek) + + if ((fsd->real_fops && fsd->real_fops->llseek) || + (fsd->short_fops && fsd->short_fops->llseek)) proxy_fops->llseek = full_proxy_llseek; - if (real_fops->read) + + if ((fsd->real_fops && fsd->real_fops->read) || + (fsd->short_fops && fsd->short_fops->read)) proxy_fops->read = full_proxy_read; - if (real_fops->write) + + if ((fsd->real_fops && fsd->real_fops->write) || + (fsd->short_fops && fsd->short_fops->write)) proxy_fops->write = full_proxy_write; - if (real_fops->poll) + + if (fsd->real_fops && fsd->real_fops->poll) proxy_fops->poll = full_proxy_poll; - if (real_fops->unlocked_ioctl) + + if (fsd->real_fops && fsd->real_fops->unlocked_ioctl) proxy_fops->unlocked_ioctl = full_proxy_unlocked_ioctl; } static int full_proxy_open(struct inode *inode, struct file *filp) { struct dentry *dentry = F_DENTRY(filp); - const struct file_operations *real_fops = NULL; + const struct file_operations *real_fops; struct file_operations *proxy_fops = NULL; + struct debugfs_fsdata *fsd; int r; r = debugfs_file_get(dentry); if (r) return r == -EIO ? -ENOENT : r; - real_fops = debugfs_real_fops(filp); - + fsd = dentry->d_fsdata; + real_fops = fsd->real_fops; r = debugfs_locked_down(inode, filp, real_fops); if (r) goto out; - if (!fops_get(real_fops)) { + if (real_fops && !fops_get(real_fops)) { #ifdef CONFIG_MODULES if (real_fops->owner && real_fops->owner->state == MODULE_STATE_GOING) { @@ -426,11 +463,14 @@ static int full_proxy_open(struct inode *inode, struct file *filp) r = -ENOMEM; goto free_proxy; } - __full_proxy_fops_init(proxy_fops, real_fops); + __full_proxy_fops_init(proxy_fops, fsd); replace_fops(filp, proxy_fops); - if (real_fops->open) { - r = real_fops->open(inode, filp); + if (!real_fops || real_fops->open) { + if (real_fops) + r = real_fops->open(inode, filp); + else + r = simple_open(inode, filp); if (r) { replace_fops(filp, d_inode(dentry)->i_fop); goto free_proxy; diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 66d9b3b4c588..38a9c7eb97e6 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -412,7 +412,7 @@ static struct dentry *end_creating(struct dentry *dentry) static struct dentry *__debugfs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *proxy_fops, - const struct file_operations *real_fops) + const void *real_fops) { struct dentry *dentry; struct inode *inode; @@ -450,49 +450,38 @@ static struct dentry *__debugfs_create_file(const char *name, umode_t mode, return end_creating(dentry); } -/** - * debugfs_create_file - create a file in the debugfs filesystem - * @name: a pointer to a string containing the name of the file to create. - * @mode: the permission that the file should have. - * @parent: a pointer to the parent dentry for this file. This should be a - * directory dentry if set. If this parameter is NULL, then the - * file will be created in the root of the debugfs filesystem. - * @data: a pointer to something that the caller will want to get to later - * on. The inode.i_private pointer will point to this value on - * the open() call. - * @fops: a pointer to a struct file_operations that should be used for - * this file. - * - * This is the basic "create a file" function for debugfs. It allows for a - * wide range of flexibility in creating a file, or a directory (if you want - * to create a directory, the debugfs_create_dir() function is - * recommended to be used instead.) - * - * This function will return a pointer to a dentry if it succeeds. This - * pointer must be passed to the debugfs_remove() function when the file is - * to be removed (no automatic cleanup happens if your module is unloaded, - * you are responsible here.) If an error occurs, ERR_PTR(-ERROR) will be - * returned. - * - * If debugfs is not enabled in the kernel, the value -%ENODEV will be - * returned. - * - * NOTE: it's expected that most callers should _ignore_ the errors returned - * by this function. Other debugfs functions handle the fact that the "dentry" - * passed to them could be an error and they don't crash in that case. - * Drivers should generally work fine even if debugfs fails to init anyway. - */ -struct dentry *debugfs_create_file(const char *name, umode_t mode, - struct dentry *parent, void *data, - const struct file_operations *fops) +struct dentry *debugfs_create_file_full(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct file_operations *fops) { + if (WARN_ON((unsigned long)fops & + (DEBUGFS_FSDATA_IS_SHORT_FOPS_BIT | + DEBUGFS_FSDATA_IS_REAL_FOPS_BIT))) + return ERR_PTR(-EINVAL); return __debugfs_create_file(name, mode, parent, data, fops ? &debugfs_full_proxy_file_operations : &debugfs_noop_file_operations, fops); } -EXPORT_SYMBOL_GPL(debugfs_create_file); +EXPORT_SYMBOL_GPL(debugfs_create_file_full); + +struct dentry *debugfs_create_file_short(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct debugfs_short_fops *fops) +{ + if (WARN_ON((unsigned long)fops & + (DEBUGFS_FSDATA_IS_SHORT_FOPS_BIT | + DEBUGFS_FSDATA_IS_REAL_FOPS_BIT))) + return ERR_PTR(-EINVAL); + + return __debugfs_create_file(name, mode, parent, data, + fops ? &debugfs_full_proxy_file_operations : + &debugfs_noop_file_operations, + (const void *)((unsigned long)fops | + DEBUGFS_FSDATA_IS_SHORT_FOPS_BIT)); +} +EXPORT_SYMBOL_GPL(debugfs_create_file_short); /** * debugfs_create_file_unsafe - create a file in the debugfs filesystem diff --git a/fs/debugfs/internal.h b/fs/debugfs/internal.h index dae80c2a469e..a3edfa4f0d8e 100644 --- a/fs/debugfs/internal.h +++ b/fs/debugfs/internal.h @@ -18,6 +18,7 @@ extern const struct file_operations debugfs_full_proxy_file_operations; struct debugfs_fsdata { const struct file_operations *real_fops; + const struct debugfs_short_fops *short_fops; union { /* automount_fn is used when real_fops is NULL */ debugfs_automount_t automount; @@ -39,6 +40,11 @@ struct debugfs_fsdata { * pointer gets its lowest bit set. */ #define DEBUGFS_FSDATA_IS_REAL_FOPS_BIT BIT(0) +/* + * A dentry's ->d_fsdata, when pointing to real fops, is with + * short fops instead of full fops. + */ +#define DEBUGFS_FSDATA_IS_SHORT_FOPS_BIT BIT(1) /* Access BITS */ #define DEBUGFS_ALLOW_API BIT(0) diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 0928a6c8ae1e..59444b495d49 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -71,9 +71,63 @@ typedef struct vfsmount *(*debugfs_automount_t)(struct dentry *, void *); struct dentry *debugfs_lookup(const char *name, struct dentry *parent); -struct dentry *debugfs_create_file(const char *name, umode_t mode, - struct dentry *parent, void *data, - const struct file_operations *fops); +struct debugfs_short_fops { + ssize_t (*read)(struct file *, char __user *, size_t, loff_t *); + ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *); + loff_t (*llseek) (struct file *, loff_t, int); +}; + +struct dentry *debugfs_create_file_full(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct file_operations *fops); +struct dentry *debugfs_create_file_short(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct debugfs_short_fops *fops); + +/** + * debugfs_create_file - create a file in the debugfs filesystem + * @name: a pointer to a string containing the name of the file to create. + * @mode: the permission that the file should have. + * @parent: a pointer to the parent dentry for this file. This should be a + * directory dentry if set. If this parameter is NULL, then the + * file will be created in the root of the debugfs filesystem. + * @data: a pointer to something that the caller will want to get to later + * on. The inode.i_private pointer will point to this value on + * the open() call. + * @fops: a pointer to a struct file_operations or struct debugfs_short_fops that + * should be used for this file. + * + * This is the basic "create a file" function for debugfs. It allows for a + * wide range of flexibility in creating a file, or a directory (if you want + * to create a directory, the debugfs_create_dir() function is + * recommended to be used instead.) + * + * This function will return a pointer to a dentry if it succeeds. This + * pointer must be passed to the debugfs_remove() function when the file is + * to be removed (no automatic cleanup happens if your module is unloaded, + * you are responsible here.) If an error occurs, ERR_PTR(-ERROR) will be + * returned. + * + * If debugfs is not enabled in the kernel, the value -%ENODEV will be + * returned. + * + * If fops points to a struct debugfs_short_fops, then simple_open() will be + * used for the open, and only read/write/llseek are supported and are proxied, + * so no module reference or release are needed. + * + * NOTE: it's expected that most callers should _ignore_ the errors returned + * by this function. Other debugfs functions handle the fact that the "dentry" + * passed to them could be an error and they don't crash in that case. + * Drivers should generally work fine even if debugfs fails to init anyway. + */ +#define debugfs_create_file(name, mode, parent, data, fops) \ + _Generic(fops, \ + const struct file_operations *: debugfs_create_file_full, \ + const struct debugfs_short_fops *: debugfs_create_file_short, \ + struct file_operations *: debugfs_create_file_full, \ + struct debugfs_short_fops *: debugfs_create_file_short) \ + (name, mode, parent, data, fops) + struct dentry *debugfs_create_file_unsafe(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops); @@ -207,7 +261,7 @@ static inline struct dentry *debugfs_lookup(const char *name, static inline struct dentry *debugfs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, - const struct file_operations *fops) + const void *fops) { return ERR_PTR(-ENODEV); } -- cgit v1.2.3 From 26ff1fb02991e1260481185bb5ccab1ee498d5e4 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 16 Oct 2024 09:19:29 -0700 Subject: rcu: Delete unused rcu_gp_might_be_stalled() function The rcu_gp_might_be_stalled() function is no longer used, so this commit removes it. Signed-off-by: Paul E. McKenney Reviewed-by: Joel Fernandes (Google) Signed-off-by: Frederic Weisbecker --- include/linux/rcutiny.h | 1 - include/linux/rcutree.h | 1 - kernel/rcu/tree_stall.h | 30 ------------------------------ 3 files changed, 32 deletions(-) (limited to 'include') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 0ee270b3f5ed..fe42315f667f 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -165,7 +165,6 @@ static inline bool rcu_inkernel_boot_has_ended(void) { return true; } static inline bool rcu_is_watching(void) { return true; } static inline void rcu_momentary_eqs(void) { } static inline void kfree_rcu_scheduler_running(void) { } -static inline bool rcu_gp_might_be_stalled(void) { return false; } /* Avoid RCU read-side critical sections leaking across. */ static inline void rcu_all_qs(void) { barrier(); } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 90a684f94776..27d86d912781 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -40,7 +40,6 @@ void kvfree_rcu_barrier(void); void rcu_barrier(void); void rcu_momentary_eqs(void); void kfree_rcu_scheduler_running(void); -bool rcu_gp_might_be_stalled(void); struct rcu_gp_oldstate { unsigned long rgos_norm; diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 4432db6d0b99..d7cdd535e50b 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -76,36 +76,6 @@ int rcu_jiffies_till_stall_check(void) } EXPORT_SYMBOL_GPL(rcu_jiffies_till_stall_check); -/** - * rcu_gp_might_be_stalled - Is it likely that the grace period is stalled? - * - * Returns @true if the current grace period is sufficiently old that - * it is reasonable to assume that it might be stalled. This can be - * useful when deciding whether to allocate memory to enable RCU-mediated - * freeing on the one hand or just invoking synchronize_rcu() on the other. - * The latter is preferable when the grace period is stalled. - * - * Note that sampling of the .gp_start and .gp_seq fields must be done - * carefully to avoid false positives at the beginnings and ends of - * grace periods. - */ -bool rcu_gp_might_be_stalled(void) -{ - unsigned long d = rcu_jiffies_till_stall_check() / RCU_STALL_MIGHT_DIV; - unsigned long j = jiffies; - - if (d < RCU_STALL_MIGHT_MIN) - d = RCU_STALL_MIGHT_MIN; - smp_mb(); // jiffies before .gp_seq to avoid false positives. - if (!rcu_gp_in_progress()) - return false; - // Long delays at this point avoids false positive, but a delay - // of ULONG_MAX/4 jiffies voids your no-false-positive warranty. - smp_mb(); // .gp_seq before second .gp_start - // And ditto here. - return !time_before(j, READ_ONCE(rcu_state.gp_start) + d); -} - /* Don't do RCU CPU stall warnings during long sysrq printouts. */ void rcu_sysrq_start(void) { -- cgit v1.2.3 From b3aba04883de872488e5dabda199427b2bfa0395 Mon Sep 17 00:00:00 2001 From: Manikanta Mylavarapu Date: Tue, 20 Aug 2024 11:26:16 +0530 Subject: dt-bindings: clock: qcom: gcc-ipq5332: remove q6 bring up clock macros Q6 firmware takes care of bringup clocks, so remove them. Signed-off-by: Manikanta Mylavarapu Reviewed-by: Krzysztof Kozlowski Signed-off-by: Gokul Sriram Palanisamy Link: https://lore.kernel.org/r/20240820055618.267554-4-quic_gokulsri@quicinc.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,ipq5332-gcc.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,ipq5332-gcc.h b/include/dt-bindings/clock/qcom,ipq5332-gcc.h index 8a405a0a96d0..da9b507c30bf 100644 --- a/include/dt-bindings/clock/qcom,ipq5332-gcc.h +++ b/include/dt-bindings/clock/qcom,ipq5332-gcc.h @@ -96,15 +96,7 @@ #define GCC_PCNOC_BFDCD_CLK_SRC 87 #define GCC_PCNOC_LPASS_CLK 88 #define GCC_PRNG_AHB_CLK 89 -#define GCC_Q6_AHB_CLK 90 -#define GCC_Q6_AHB_S_CLK 91 -#define GCC_Q6_AXIM_CLK 92 #define GCC_Q6_AXIM_CLK_SRC 93 -#define GCC_Q6_AXIS_CLK 94 -#define GCC_Q6_TSCTR_1TO2_CLK 95 -#define GCC_Q6SS_ATBM_CLK 96 -#define GCC_Q6SS_PCLKDBG_CLK 97 -#define GCC_Q6SS_TRIG_CLK 98 #define GCC_QDSS_AT_CLK 99 #define GCC_QDSS_AT_CLK_SRC 100 #define GCC_QDSS_CFG_AHB_CLK 101 @@ -134,7 +126,6 @@ #define GCC_SNOC_PCIE3_2LANE_S_CLK 125 #define GCC_SNOC_USB_CLK 126 #define GCC_SYS_NOC_AT_CLK 127 -#define GCC_SYS_NOC_WCSS_AHB_CLK 128 #define GCC_SYSTEM_NOC_BFDCD_CLK_SRC 129 #define GCC_UNIPHY0_AHB_CLK 130 #define GCC_UNIPHY0_SYS_CLK 131 @@ -155,17 +146,6 @@ #define GCC_USB0_PIPE_CLK 146 #define GCC_USB0_SLEEP_CLK 147 #define GCC_WCSS_AHB_CLK_SRC 148 -#define GCC_WCSS_AXIM_CLK 149 -#define GCC_WCSS_AXIS_CLK 150 -#define GCC_WCSS_DBG_IFC_APB_BDG_CLK 151 -#define GCC_WCSS_DBG_IFC_APB_CLK 152 -#define GCC_WCSS_DBG_IFC_ATB_BDG_CLK 153 -#define GCC_WCSS_DBG_IFC_ATB_CLK 154 -#define GCC_WCSS_DBG_IFC_NTS_BDG_CLK 155 -#define GCC_WCSS_DBG_IFC_NTS_CLK 156 -#define GCC_WCSS_ECAHB_CLK 157 -#define GCC_WCSS_MST_ASYNC_BDG_CLK 158 -#define GCC_WCSS_SLV_ASYNC_BDG_CLK 159 #define GCC_XO_CLK 160 #define GCC_XO_CLK_SRC 161 #define GCC_XO_DIV4_CLK 162 -- cgit v1.2.3 From da040d56031976144740bddba942485999f6a16f Mon Sep 17 00:00:00 2001 From: Manikanta Mylavarapu Date: Tue, 20 Aug 2024 11:26:17 +0530 Subject: dt-bindings: clock: qcom: gcc-ipq9574: remove q6 bring up clock macros Q6 firmware takes care of bringup clocks, so remove them. Signed-off-by: Manikanta Mylavarapu Reviewed-by: Krzysztof Kozlowski Signed-off-by: Gokul Sriram Palanisamy Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240820055618.267554-5-quic_gokulsri@quicinc.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,ipq9574-gcc.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,ipq9574-gcc.h b/include/dt-bindings/clock/qcom,ipq9574-gcc.h index 52123c5a09fa..f238aa4794a8 100644 --- a/include/dt-bindings/clock/qcom,ipq9574-gcc.h +++ b/include/dt-bindings/clock/qcom,ipq9574-gcc.h @@ -132,16 +132,8 @@ #define GCC_NSSNOC_SNOC_1_CLK 123 #define GCC_QDSS_ETR_USB_CLK 124 #define WCSS_AHB_CLK_SRC 125 -#define GCC_Q6_AHB_CLK 126 -#define GCC_Q6_AHB_S_CLK 127 -#define GCC_WCSS_ECAHB_CLK 128 -#define GCC_WCSS_ACMT_CLK 129 -#define GCC_SYS_NOC_WCSS_AHB_CLK 130 #define WCSS_AXI_M_CLK_SRC 131 -#define GCC_ANOC_WCSS_AXI_M_CLK 132 #define QDSS_AT_CLK_SRC 133 -#define GCC_Q6SS_ATBM_CLK 134 -#define GCC_WCSS_DBG_IFC_ATB_CLK 135 #define GCC_NSSNOC_ATB_CLK 136 #define GCC_QDSS_AT_CLK 137 #define GCC_SYS_NOC_AT_CLK 138 @@ -154,27 +146,18 @@ #define QDSS_TRACECLKIN_CLK_SRC 145 #define GCC_QDSS_TRACECLKIN_CLK 146 #define QDSS_TSCTR_CLK_SRC 147 -#define GCC_Q6_TSCTR_1TO2_CLK 148 -#define GCC_WCSS_DBG_IFC_NTS_CLK 149 #define GCC_QDSS_TSCTR_DIV2_CLK 150 #define GCC_QDSS_TS_CLK 151 #define GCC_QDSS_TSCTR_DIV4_CLK 152 #define GCC_NSS_TS_CLK 153 #define GCC_QDSS_TSCTR_DIV8_CLK 154 #define GCC_QDSS_TSCTR_DIV16_CLK 155 -#define GCC_Q6SS_PCLKDBG_CLK 156 -#define GCC_Q6SS_TRIG_CLK 157 -#define GCC_WCSS_DBG_IFC_APB_CLK 158 -#define GCC_WCSS_DBG_IFC_DAPBUS_CLK 159 #define GCC_QDSS_DAP_CLK 160 #define GCC_QDSS_APB2JTAG_CLK 161 #define GCC_QDSS_TSCTR_DIV3_CLK 162 #define QPIC_IO_MACRO_CLK_SRC 163 #define GCC_QPIC_IO_MACRO_CLK 164 #define Q6_AXI_CLK_SRC 165 -#define GCC_Q6_AXIM_CLK 166 -#define GCC_WCSS_Q6_TBU_CLK 167 -#define GCC_MEM_NOC_Q6_AXI_CLK 168 #define Q6_AXIM2_CLK_SRC 169 #define NSSNOC_MEMNOC_BFDCD_CLK_SRC 170 #define GCC_NSSNOC_MEMNOC_CLK 171 @@ -199,7 +182,6 @@ #define GCC_UNIPHY2_SYS_CLK 190 #define GCC_CMN_12GPLL_SYS_CLK 191 #define GCC_NSSNOC_XO_DCD_CLK 192 -#define GCC_Q6SS_BOOT_CLK 193 #define UNIPHY_SYS_CLK_SRC 194 #define NSS_TS_CLK_SRC 195 #define GCC_ANOC_PCIE0_1LANE_M_CLK 196 -- cgit v1.2.3 From da09a9e0c3eab164af950be44ee6bdea8527c3e5 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 18 Oct 2024 22:22:51 +0200 Subject: uprobe: Add data pointer to consumer handlers Adding data pointer to both entry and exit consumer handlers and all its users. The functionality itself is coming in following change. Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Acked-by: Oleg Nesterov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20241018202252.693462-2-jolsa@kernel.org --- include/linux/uprobes.h | 4 ++-- kernel/events/uprobes.c | 4 ++-- kernel/trace/bpf_trace.c | 6 ++++-- kernel/trace/trace_uprobe.c | 12 ++++++++---- tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c | 2 +- 5 files changed, 17 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 2b294bf1881f..bb265a632b91 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -37,10 +37,10 @@ struct uprobe_consumer { * for the current process. If filter() is omitted or returns true, * UPROBE_HANDLER_REMOVE is effectively ignored. */ - int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs); + int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs, __u64 *data); int (*ret_handler)(struct uprobe_consumer *self, unsigned long func, - struct pt_regs *regs); + struct pt_regs *regs, __u64 *data); bool (*filter)(struct uprobe_consumer *self, struct mm_struct *mm); struct list_head cons_node; diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 2a0059464383..6b44c386a5df 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -2090,7 +2090,7 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) int rc = 0; if (uc->handler) { - rc = uc->handler(uc, regs); + rc = uc->handler(uc, regs, NULL); WARN(rc & ~UPROBE_HANDLER_MASK, "bad rc=0x%x from %ps()\n", rc, uc->handler); } @@ -2128,7 +2128,7 @@ handle_uretprobe_chain(struct return_instance *ri, struct pt_regs *regs) rcu_read_lock_trace(); list_for_each_entry_rcu(uc, &uprobe->consumers, cons_node, rcu_read_lock_trace_held()) { if (uc->ret_handler) - uc->ret_handler(uc, ri->func, regs); + uc->ret_handler(uc, ri->func, regs, NULL); } rcu_read_unlock_trace(); } diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index a582cd25ca87..fdab7ecd8dfa 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -3244,7 +3244,8 @@ uprobe_multi_link_filter(struct uprobe_consumer *con, struct mm_struct *mm) } static int -uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs) +uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs, + __u64 *data) { struct bpf_uprobe *uprobe; @@ -3253,7 +3254,8 @@ uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs) } static int -uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs) +uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs, + __u64 *data) { struct bpf_uprobe *uprobe; diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index c40531d2cbad..5895eabe3581 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -89,9 +89,11 @@ static struct trace_uprobe *to_trace_uprobe(struct dyn_event *ev) static int register_uprobe_event(struct trace_uprobe *tu); static int unregister_uprobe_event(struct trace_uprobe *tu); -static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs); +static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs, + __u64 *data); static int uretprobe_dispatcher(struct uprobe_consumer *con, - unsigned long func, struct pt_regs *regs); + unsigned long func, struct pt_regs *regs, + __u64 *data); #ifdef CONFIG_STACK_GROWSUP static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n) @@ -1517,7 +1519,8 @@ trace_uprobe_register(struct trace_event_call *event, enum trace_reg type, } } -static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) +static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs, + __u64 *data) { struct trace_uprobe *tu; struct uprobe_dispatch_data udd; @@ -1548,7 +1551,8 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) } static int uretprobe_dispatcher(struct uprobe_consumer *con, - unsigned long func, struct pt_regs *regs) + unsigned long func, struct pt_regs *regs, + __u64 *data) { struct trace_uprobe *tu; struct uprobe_dispatch_data udd; diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 8835761d9a12..12005e3dc3e4 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -461,7 +461,7 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = { static int uprobe_ret_handler(struct uprobe_consumer *self, unsigned long func, - struct pt_regs *regs) + struct pt_regs *regs, __u64 *data) { regs->ax = 0x12345678deadbeef; -- cgit v1.2.3 From 4d756095d3994cb41393817dc696b458938a6bd0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 18 Oct 2024 22:22:52 +0200 Subject: uprobe: Add support for session consumer This change allows the uprobe consumer to behave as session which means that 'handler' and 'ret_handler' callbacks are connected in a way that allows to: - control execution of 'ret_handler' from 'handler' callback - share data between 'handler' and 'ret_handler' callbacks The session concept fits to our common use case where we do filtering on entry uprobe and based on the result we decide to run the return uprobe (or not). It's also convenient to share the data between session callbacks. To achive this we are adding new return value the uprobe consumer can return from 'handler' callback: UPROBE_HANDLER_IGNORE - Ignore 'ret_handler' callback for this consumer. And store cookie and pass it to 'ret_handler' when consumer has both 'handler' and 'ret_handler' callbacks defined. We store shared data in the return_consumer object array as part of the return_instance object. This way the handle_uretprobe_chain can find related return_consumer and its shared data. We also store entry handler return value, for cases when there are multiple consumers on single uprobe and some of them are ignored and some of them not, in which case the return probe gets installed and we need to have a way to find out which consumer needs to be ignored. The tricky part is when consumer is registered 'after' the uprobe entry handler is hit. In such case this consumer's 'ret_handler' gets executed as well, but it won't have the proper data pointer set, so we can filter it out. Suggested-by: Oleg Nesterov Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Oleg Nesterov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20241018202252.693462-3-jolsa@kernel.org --- include/linux/uprobes.h | 21 ++++++- kernel/events/uprobes.c | 148 ++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 139 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index bb265a632b91..dbaf04189548 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -23,8 +23,17 @@ struct inode; struct notifier_block; struct page; +/* + * Allowed return values from uprobe consumer's handler callback + * with following meaning: + * + * UPROBE_HANDLER_REMOVE + * - Remove the uprobe breakpoint from current->mm. + * UPROBE_HANDLER_IGNORE + * - Ignore ret_handler callback for this consumer. + */ #define UPROBE_HANDLER_REMOVE 1 -#define UPROBE_HANDLER_MASK 1 +#define UPROBE_HANDLER_IGNORE 2 #define MAX_URETPROBE_DEPTH 64 @@ -44,6 +53,8 @@ struct uprobe_consumer { bool (*filter)(struct uprobe_consumer *self, struct mm_struct *mm); struct list_head cons_node; + + __u64 id; /* set when uprobe_consumer is registered */ }; #ifdef CONFIG_UPROBES @@ -83,14 +94,22 @@ struct uprobe_task { unsigned int depth; }; +struct return_consumer { + __u64 cookie; + __u64 id; +}; + struct return_instance { struct uprobe *uprobe; unsigned long func; unsigned long stack; /* stack pointer */ unsigned long orig_ret_vaddr; /* original return address */ bool chained; /* true, if instance is nested */ + int consumers_cnt; struct return_instance *next; /* keep as stack */ + + struct return_consumer consumers[] __counted_by(consumers_cnt); }; enum rp_check { diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 6b44c386a5df..4ef4b51776eb 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -64,7 +64,7 @@ struct uprobe { struct rcu_head rcu; loff_t offset; loff_t ref_ctr_offset; - unsigned long flags; + unsigned long flags; /* "unsigned long" so bitops work */ /* * The generic code assumes that it has two members of unknown type @@ -823,8 +823,11 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset, static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc) { + static atomic64_t id; + down_write(&uprobe->consumer_rwsem); list_add_rcu(&uc->cons_node, &uprobe->consumers); + uc->id = (__u64) atomic64_inc_return(&id); up_write(&uprobe->consumer_rwsem); } @@ -1761,6 +1764,34 @@ static struct uprobe_task *get_utask(void) return current->utask; } +static size_t ri_size(int consumers_cnt) +{ + struct return_instance *ri; + + return sizeof(*ri) + sizeof(ri->consumers[0]) * consumers_cnt; +} + +#define DEF_CNT 4 + +static struct return_instance *alloc_return_instance(void) +{ + struct return_instance *ri; + + ri = kzalloc(ri_size(DEF_CNT), GFP_KERNEL); + if (!ri) + return ZERO_SIZE_PTR; + + ri->consumers_cnt = DEF_CNT; + return ri; +} + +static struct return_instance *dup_return_instance(struct return_instance *old) +{ + size_t size = ri_size(old->consumers_cnt); + + return kmemdup(old, size, GFP_KERNEL); +} + static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask) { struct uprobe_task *n_utask; @@ -1773,11 +1804,10 @@ static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask) p = &n_utask->return_instances; for (o = o_utask->return_instances; o; o = o->next) { - n = kmalloc(sizeof(struct return_instance), GFP_KERNEL); + n = dup_return_instance(o); if (!n) return -ENOMEM; - *n = *o; /* * uprobe's refcnt has to be positive at this point, kept by * utask->return_instances items; return_instances can't be @@ -1870,35 +1900,31 @@ static void cleanup_return_instances(struct uprobe_task *utask, bool chained, utask->return_instances = ri; } -static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs) +static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs, + struct return_instance *ri) { struct uprobe_task *utask = current->utask; unsigned long orig_ret_vaddr, trampoline_vaddr; - struct return_instance *ri; bool chained; if (!get_xol_area()) - return; + goto free; if (utask->depth >= MAX_URETPROBE_DEPTH) { printk_ratelimited(KERN_INFO "uprobe: omit uretprobe due to" " nestedness limit pid/tgid=%d/%d\n", current->pid, current->tgid); - return; + goto free; } /* we need to bump refcount to store uprobe in utask */ if (!try_get_uprobe(uprobe)) - return; - - ri = kmalloc(sizeof(struct return_instance), GFP_KERNEL); - if (!ri) - goto fail; + goto free; trampoline_vaddr = uprobe_get_trampoline_vaddr(); orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs); if (orig_ret_vaddr == -1) - goto fail; + goto put; /* drop the entries invalidated by longjmp() */ chained = (orig_ret_vaddr == trampoline_vaddr); @@ -1916,7 +1942,7 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs) * attack from user-space. */ uprobe_warn(current, "handle tail call"); - goto fail; + goto put; } orig_ret_vaddr = utask->return_instances->orig_ret_vaddr; } @@ -1931,9 +1957,10 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs) utask->return_instances = ri; return; -fail: - kfree(ri); +put: put_uprobe(uprobe); +free: + kfree(ri); } /* Prepare to single-step probed instruction out of line. */ @@ -2077,34 +2104,90 @@ static struct uprobe *find_active_uprobe_rcu(unsigned long bp_vaddr, int *is_swb return uprobe; } +static struct return_instance* +push_consumer(struct return_instance *ri, int idx, __u64 id, __u64 cookie) +{ + if (unlikely(ri == ZERO_SIZE_PTR)) + return ri; + + if (unlikely(idx >= ri->consumers_cnt)) { + struct return_instance *old_ri = ri; + + ri->consumers_cnt += DEF_CNT; + ri = krealloc(old_ri, ri_size(old_ri->consumers_cnt), GFP_KERNEL); + if (!ri) { + kfree(old_ri); + return ZERO_SIZE_PTR; + } + } + + ri->consumers[idx].id = id; + ri->consumers[idx].cookie = cookie; + return ri; +} + +static struct return_consumer * +return_consumer_find(struct return_instance *ri, int *iter, int id) +{ + struct return_consumer *ric; + int idx = *iter; + + for (ric = &ri->consumers[idx]; idx < ri->consumers_cnt; idx++, ric++) { + if (ric->id == id) { + *iter = idx + 1; + return ric; + } + } + return NULL; +} + +static bool ignore_ret_handler(int rc) +{ + return rc == UPROBE_HANDLER_REMOVE || rc == UPROBE_HANDLER_IGNORE; +} + static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) { struct uprobe_consumer *uc; - int remove = UPROBE_HANDLER_REMOVE; - bool need_prep = false; /* prepare return uprobe, when needed */ - bool has_consumers = false; + bool has_consumers = false, remove = true; + struct return_instance *ri = NULL; + int push_idx = 0; current->utask->auprobe = &uprobe->arch; list_for_each_entry_rcu(uc, &uprobe->consumers, cons_node, rcu_read_lock_trace_held()) { + bool session = uc->handler && uc->ret_handler; + __u64 cookie = 0; int rc = 0; if (uc->handler) { - rc = uc->handler(uc, regs, NULL); - WARN(rc & ~UPROBE_HANDLER_MASK, + rc = uc->handler(uc, regs, &cookie); + WARN(rc < 0 || rc > 2, "bad rc=0x%x from %ps()\n", rc, uc->handler); } - if (uc->ret_handler) - need_prep = true; - - remove &= rc; + remove &= rc == UPROBE_HANDLER_REMOVE; has_consumers = true; + + if (!uc->ret_handler || ignore_ret_handler(rc)) + continue; + + if (!ri) + ri = alloc_return_instance(); + + if (session) + ri = push_consumer(ri, push_idx++, uc->id, cookie); } current->utask->auprobe = NULL; - if (need_prep && !remove) - prepare_uretprobe(uprobe, regs); /* put bp at return */ + if (!ZERO_OR_NULL_PTR(ri)) { + /* + * The push_idx value has the final number of return consumers, + * and ri->consumers_cnt has number of allocated consumers. + */ + ri->consumers_cnt = push_idx; + prepare_uretprobe(uprobe, regs, ri); + } if (remove && has_consumers) { down_read(&uprobe->register_rwsem); @@ -2123,12 +2206,19 @@ static void handle_uretprobe_chain(struct return_instance *ri, struct pt_regs *regs) { struct uprobe *uprobe = ri->uprobe; + struct return_consumer *ric; struct uprobe_consumer *uc; + int ric_idx = 0; rcu_read_lock_trace(); list_for_each_entry_rcu(uc, &uprobe->consumers, cons_node, rcu_read_lock_trace_held()) { - if (uc->ret_handler) - uc->ret_handler(uc, ri->func, regs, NULL); + bool session = uc->handler && uc->ret_handler; + + if (uc->ret_handler) { + ric = return_consumer_find(ri, &ric_idx, uc->id); + if (!session || ric) + uc->ret_handler(uc, ri->func, regs, ric ? &ric->cookie : NULL); + } } rcu_read_unlock_trace(); } -- cgit v1.2.3 From ee1251fc0c4e799a48025318f262739919deb977 Mon Sep 17 00:00:00 2001 From: Chen Ridong Date: Tue, 22 Oct 2024 11:49:45 +0000 Subject: cgroup/freezer: Reduce redundant traversal for cgroup_freeze MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Whether a cgroup is frozen is determined solely by whether it is set to to be frozen and whether its parent is frozen. Currently, when is cgroup is frozen or unfrozen, it iterates through the entire subtree to freeze or unfreeze its descentdants. However, this is unesessary for a cgroup that does not change its effective frozen status. This path aims to skip the subtree if its parent does not have a change in effective freeze. For an example, subtree like, a-b-c-d-e-f-g, when a is frozen, the entire tree is frozen. If we freeze b and c again, it is unesessary to iterate d, e, f and g. So does that If we unfreeze b/c. Reviewed-by: Michal Koutný Signed-off-by: Chen Ridong Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 2 +- kernel/cgroup/freezer.c | 30 ++++++++++++++---------------- 2 files changed, 15 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 0a80ef9191a6..1b20d2d8ef7c 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -398,7 +398,7 @@ struct cgroup_freezer_state { bool freeze; /* Should the cgroup actually be frozen? */ - int e_freeze; + bool e_freeze; /* Fields below are protected by css_set_lock */ diff --git a/kernel/cgroup/freezer.c b/kernel/cgroup/freezer.c index 617861a54793..188d5f2aeb5a 100644 --- a/kernel/cgroup/freezer.c +++ b/kernel/cgroup/freezer.c @@ -260,8 +260,10 @@ void cgroup_freezer_migrate_task(struct task_struct *task, void cgroup_freeze(struct cgroup *cgrp, bool freeze) { struct cgroup_subsys_state *css; + struct cgroup *parent; struct cgroup *dsct; bool applied = false; + bool old_e; lockdep_assert_held(&cgroup_mutex); @@ -282,22 +284,18 @@ void cgroup_freeze(struct cgroup *cgrp, bool freeze) if (cgroup_is_dead(dsct)) continue; - if (freeze) { - dsct->freezer.e_freeze++; - /* - * Already frozen because of ancestor's settings? - */ - if (dsct->freezer.e_freeze > 1) - continue; - } else { - dsct->freezer.e_freeze--; - /* - * Still frozen because of ancestor's settings? - */ - if (dsct->freezer.e_freeze > 0) - continue; - - WARN_ON_ONCE(dsct->freezer.e_freeze < 0); + /* + * e_freeze is affected by parent's e_freeze and dst's freeze. + * If old e_freeze eq new e_freeze, no change, its children + * will not be affected. So do nothing and skip the subtree + */ + old_e = dsct->freezer.e_freeze; + parent = cgroup_parent(dsct); + dsct->freezer.e_freeze = (dsct->freezer.freeze || + parent->freezer.e_freeze); + if (dsct->freezer.e_freeze == old_e) { + css = css_rightmost_descendant(css); + continue; } /* -- cgit v1.2.3 From c0813ce2e5b0d1174782aff30d366509377abc7b Mon Sep 17 00:00:00 2001 From: Pengfei Li Date: Wed, 23 Oct 2024 11:46:48 -0700 Subject: dt-bindings: clock: imx93: Drop IMX93_CLK_END macro definition IMX93_CLK_END should be dropped as it is not part of the ABI. Signed-off-by: Pengfei Li Acked-by: Krzysztof Kozlowski Acked-by: Peng Fan Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20241023184651.381265-3-pengfei.li_1@nxp.com Signed-off-by: Abel Vesa --- include/dt-bindings/clock/imx93-clock.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/dt-bindings/clock/imx93-clock.h b/include/dt-bindings/clock/imx93-clock.h index 787c9e74dc96..a1d0b326bb6b 100644 --- a/include/dt-bindings/clock/imx93-clock.h +++ b/include/dt-bindings/clock/imx93-clock.h @@ -204,6 +204,5 @@ #define IMX93_CLK_A55_SEL 199 #define IMX93_CLK_A55_CORE 200 #define IMX93_CLK_PDM_IPG 201 -#define IMX93_CLK_END 202 #endif -- cgit v1.2.3 From f029d870096fcd8565a2ee8c2d0078b9aaec4fdb Mon Sep 17 00:00:00 2001 From: Pengfei Li Date: Wed, 23 Oct 2024 11:46:49 -0700 Subject: dt-bindings: clock: Add i.MX91 clock support i.MX91 has similar Clock Control Module(CCM) design as i.MX93, only add few new clock compared to i.MX93. Add a new compatible string and some new clocks for i.MX91. Signed-off-by: Pengfei Li Reviewed-by: Frank Li Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20241023184651.381265-4-pengfei.li_1@nxp.com Signed-off-by: Abel Vesa --- Documentation/devicetree/bindings/clock/imx93-clock.yaml | 1 + include/dt-bindings/clock/imx93-clock.h | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/Documentation/devicetree/bindings/clock/imx93-clock.yaml b/Documentation/devicetree/bindings/clock/imx93-clock.yaml index ccb53c6b96c1..98c0800732ef 100644 --- a/Documentation/devicetree/bindings/clock/imx93-clock.yaml +++ b/Documentation/devicetree/bindings/clock/imx93-clock.yaml @@ -16,6 +16,7 @@ description: | properties: compatible: enum: + - fsl,imx91-ccm - fsl,imx93-ccm reg: diff --git a/include/dt-bindings/clock/imx93-clock.h b/include/dt-bindings/clock/imx93-clock.h index a1d0b326bb6b..6c685067288b 100644 --- a/include/dt-bindings/clock/imx93-clock.h +++ b/include/dt-bindings/clock/imx93-clock.h @@ -204,5 +204,10 @@ #define IMX93_CLK_A55_SEL 199 #define IMX93_CLK_A55_CORE 200 #define IMX93_CLK_PDM_IPG 201 +#define IMX91_CLK_ENET1_QOS_TSN 202 +#define IMX91_CLK_ENET_TIMER 203 +#define IMX91_CLK_ENET2_REGULAR 204 +#define IMX91_CLK_ENET2_REGULAR_GATE 205 +#define IMX91_CLK_ENET1_QOS_TSN_GATE 206 #endif -- cgit v1.2.3 From 04af8a399fa40310f831b4f1dc9f757085f41983 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Fri, 18 Oct 2024 17:47:48 +0300 Subject: PCI: Protect Link Control 2 Register with RMW locking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PCIe Bandwidth Controller performs RMW accesses the Link Control 2 Register which can occur concurrently to other sources of Link Control 2 Register writes. Therefore, add Link Control 2 Register among the PCI Express Capability Registers that need RMW locking. Link: https://lore.kernel.org/r/20241018144755.7875-3-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Reviewed-by: Lukas Wunner Reviewed-by: Jonathan Cameron --- Documentation/PCI/pciebus-howto.rst | 1 + include/linux/pci.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/Documentation/PCI/pciebus-howto.rst b/Documentation/PCI/pciebus-howto.rst index e48d01422efc..375d9ce171f6 100644 --- a/Documentation/PCI/pciebus-howto.rst +++ b/Documentation/PCI/pciebus-howto.rst @@ -221,6 +221,7 @@ a selected set of PCI Express Capability Registers: * Link Control Register * Root Control Register +* Link Control 2 Register Any change to those registers should be performed using RMW accessors to avoid problems due to concurrent updates. For the up-to-date list of diff --git a/include/linux/pci.h b/include/linux/pci.h index 573b4c4c2be6..be5ed534c39c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1274,6 +1274,7 @@ static inline int pcie_capability_clear_and_set_word(struct pci_dev *dev, { switch (pos) { case PCI_EXP_LNKCTL: + case PCI_EXP_LNKCTL2: case PCI_EXP_RTCTL: return pcie_capability_clear_and_set_word_locked(dev, pos, clear, set); -- cgit v1.2.3 From 52e0874fc16bd26e9ea1871e30ffb2c6dff187cf Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 12 Aug 2024 12:39:02 +0200 Subject: locking/rt: Add sparse annotation PREEMPT_RT's sleeping locks. The sleeping locks on PREEMPT_RT (rt_spin_lock() and friends) lack sparse annotation. Therefore a missing spin_unlock() won't be spotted by sparse in a PREEMPT_RT build while it is noticed on a !PREEMPT_RT build. Add the __acquires/__releases macros to the lock/ unlock functions. The trylock functions already use the __cond_lock() wrapper. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20240812104200.2239232-2-bigeasy@linutronix.de --- include/linux/rwlock_rt.h | 10 +++++----- include/linux/spinlock_rt.h | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/rwlock_rt.h b/include/linux/rwlock_rt.h index 8544ff05e594..7d81fc6918ee 100644 --- a/include/linux/rwlock_rt.h +++ b/include/linux/rwlock_rt.h @@ -24,13 +24,13 @@ do { \ __rt_rwlock_init(rwl, #rwl, &__key); \ } while (0) -extern void rt_read_lock(rwlock_t *rwlock); +extern void rt_read_lock(rwlock_t *rwlock) __acquires(rwlock); extern int rt_read_trylock(rwlock_t *rwlock); -extern void rt_read_unlock(rwlock_t *rwlock); -extern void rt_write_lock(rwlock_t *rwlock); -extern void rt_write_lock_nested(rwlock_t *rwlock, int subclass); +extern void rt_read_unlock(rwlock_t *rwlock) __releases(rwlock); +extern void rt_write_lock(rwlock_t *rwlock) __acquires(rwlock); +extern void rt_write_lock_nested(rwlock_t *rwlock, int subclass) __acquires(rwlock); extern int rt_write_trylock(rwlock_t *rwlock); -extern void rt_write_unlock(rwlock_t *rwlock); +extern void rt_write_unlock(rwlock_t *rwlock) __releases(rwlock); static __always_inline void read_lock(rwlock_t *rwlock) { diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h index 61c49b16f69a..babc3e028779 100644 --- a/include/linux/spinlock_rt.h +++ b/include/linux/spinlock_rt.h @@ -32,10 +32,10 @@ do { \ __rt_spin_lock_init(slock, #slock, &__key, true); \ } while (0) -extern void rt_spin_lock(spinlock_t *lock); -extern void rt_spin_lock_nested(spinlock_t *lock, int subclass); -extern void rt_spin_lock_nest_lock(spinlock_t *lock, struct lockdep_map *nest_lock); -extern void rt_spin_unlock(spinlock_t *lock); +extern void rt_spin_lock(spinlock_t *lock) __acquires(lock); +extern void rt_spin_lock_nested(spinlock_t *lock, int subclass) __acquires(lock); +extern void rt_spin_lock_nest_lock(spinlock_t *lock, struct lockdep_map *nest_lock) __acquires(lock); +extern void rt_spin_unlock(spinlock_t *lock) __releases(lock); extern void rt_spin_lock_unlock(spinlock_t *lock); extern int rt_spin_trylock_bh(spinlock_t *lock); extern int rt_spin_trylock(spinlock_t *lock); -- cgit v1.2.3 From b1f01f9e54b1aaadb6740f86017e8fabdee77fe2 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 12 Aug 2024 12:39:03 +0200 Subject: locking/rt: Remove one __cond_lock() in RT's spin_trylock_irqsave() spin_trylock_irqsave() has a __cond_lock() wrapper which points to __spin_trylock_irqsave(). The function then invokes spin_trylock() which has another __cond_lock() finally pointing to rt_spin_trylock(). The compiler has no problem to parse this but sparse does not recognise that users of spin_trylock_irqsave() acquire a conditional lock and complains. Remove one layer of __cond_lock() so that sparse recognises conditional locking. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20240812104200.2239232-3-bigeasy@linutronix.de --- include/linux/spinlock_rt.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h index babc3e028779..f9f14e135be7 100644 --- a/include/linux/spinlock_rt.h +++ b/include/linux/spinlock_rt.h @@ -132,7 +132,7 @@ static __always_inline void spin_unlock_irqrestore(spinlock_t *lock, #define spin_trylock_irq(lock) \ __cond_lock(lock, rt_spin_trylock(lock)) -#define __spin_trylock_irqsave(lock, flags) \ +#define spin_trylock_irqsave(lock, flags) \ ({ \ int __locked; \ \ @@ -142,9 +142,6 @@ static __always_inline void spin_unlock_irqrestore(spinlock_t *lock, __locked; \ }) -#define spin_trylock_irqsave(lock, flags) \ - __cond_lock(lock, __spin_trylock_irqsave(lock, flags)) - #define spin_is_contended(lock) (((void)(lock), 0)) static inline int spin_is_locked(spinlock_t *lock) -- cgit v1.2.3 From cdda1f26e74bac732eca537a69f19f6a37b641be Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Thu, 10 Oct 2024 16:52:32 +0100 Subject: pidfd: add ioctl to retrieve pid info A common pattern when using pid fds is having to get information about the process, which currently requires /proc being mounted, resolving the fd to a pid, and then do manual string parsing of /proc/N/status and friends. This needs to be reimplemented over and over in all userspace projects (e.g.: I have reimplemented resolving in systemd, dbus, dbus-daemon, polkit so far), and requires additional care in checking that the fd is still valid after having parsed the data, to avoid races. Having a programmatic API that can be used directly removes all these requirements, including having /proc mounted. As discussed at LPC24, add an ioctl with an extensible struct so that more parameters can be added later if needed. Start with returning pid/tgid/ppid and creds unconditionally, and cgroupid optionally. Signed-off-by: Luca Boccassi Link: https://lore.kernel.org/r/20241010155401.2268522-1-luca.boccassi@gmail.com Signed-off-by: Christian Brauner --- fs/pidfs.c | 86 ++++++++++++++++++++++++- include/uapi/linux/pidfd.h | 50 ++++++++++++++ tools/testing/selftests/pidfd/pidfd_open_test.c | 82 ++++++++++++++++++++++- 3 files changed, 214 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/fs/pidfs.c b/fs/pidfs.c index 80675b6bf884..618abb1fa1b8 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -114,6 +115,81 @@ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts) return poll_flags; } +static long pidfd_info(struct task_struct *task, unsigned int cmd, unsigned long arg) +{ + struct pidfd_info __user *uinfo = (struct pidfd_info __user *)arg; + size_t usize = _IOC_SIZE(cmd); + struct pidfd_info kinfo = {}; + struct user_namespace *user_ns; + const struct cred *c; + __u64 mask; +#ifdef CONFIG_CGROUPS + struct cgroup *cgrp; +#endif + + if (!uinfo) + return -EINVAL; + if (usize < PIDFD_INFO_SIZE_VER0) + return -EINVAL; /* First version, no smaller struct possible */ + + if (copy_from_user(&mask, &uinfo->mask, sizeof(mask))) + return -EFAULT; + + c = get_task_cred(task); + if (!c) + return -ESRCH; + + /* Unconditionally return identifiers and credentials, the rest only on request */ + + user_ns = current_user_ns(); + kinfo.ruid = from_kuid_munged(user_ns, c->uid); + kinfo.rgid = from_kgid_munged(user_ns, c->gid); + kinfo.euid = from_kuid_munged(user_ns, c->euid); + kinfo.egid = from_kgid_munged(user_ns, c->egid); + kinfo.suid = from_kuid_munged(user_ns, c->suid); + kinfo.sgid = from_kgid_munged(user_ns, c->sgid); + kinfo.fsuid = from_kuid_munged(user_ns, c->fsuid); + kinfo.fsgid = from_kgid_munged(user_ns, c->fsgid); + kinfo.mask |= PIDFD_INFO_CREDS; + put_cred(c); + +#ifdef CONFIG_CGROUPS + rcu_read_lock(); + cgrp = task_dfl_cgroup(task); + kinfo.cgroupid = cgroup_id(cgrp); + kinfo.mask |= PIDFD_INFO_CGROUPID; + rcu_read_unlock(); +#endif + + /* + * Copy pid/tgid last, to reduce the chances the information might be + * stale. Note that it is not possible to ensure it will be valid as the + * task might return as soon as the copy_to_user finishes, but that's ok + * and userspace expects that might happen and can act accordingly, so + * this is just best-effort. What we can do however is checking that all + * the fields are set correctly, or return ESRCH to avoid providing + * incomplete information. */ + + kinfo.ppid = task_ppid_nr_ns(task, NULL); + kinfo.tgid = task_tgid_vnr(task); + kinfo.pid = task_pid_vnr(task); + kinfo.mask |= PIDFD_INFO_PID; + + if (kinfo.pid == 0 || kinfo.tgid == 0 || (kinfo.ppid == 0 && kinfo.pid != 1)) + return -ESRCH; + + /* + * If userspace and the kernel have the same struct size it can just + * be copied. If userspace provides an older struct, only the bits that + * userspace knows about will be copied. If userspace provides a new + * struct, only the bits that the kernel knows about will be copied. + */ + if (copy_to_user(uinfo, &kinfo, min(usize, sizeof(kinfo)))) + return -EFAULT; + + return 0; +} + static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct task_struct *task __free(put_task) = NULL; @@ -122,13 +198,17 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct ns_common *ns_common = NULL; struct pid_namespace *pid_ns; - if (arg) - return -EINVAL; - task = get_pid_task(pid, PIDTYPE_PID); if (!task) return -ESRCH; + /* Extensible IOCTL that does not open namespace FDs, take a shortcut */ + if (_IOC_NR(cmd) == _IOC_NR(PIDFD_GET_INFO)) + return pidfd_info(task, cmd, arg); + + if (arg) + return -EINVAL; + scoped_guard(task_lock, task) { nsp = task->nsproxy; if (nsp) diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h index 565fc0629fff..4540f6301b8c 100644 --- a/include/uapi/linux/pidfd.h +++ b/include/uapi/linux/pidfd.h @@ -16,6 +16,55 @@ #define PIDFD_SIGNAL_THREAD_GROUP (1UL << 1) #define PIDFD_SIGNAL_PROCESS_GROUP (1UL << 2) +/* Flags for pidfd_info. */ +#define PIDFD_INFO_PID (1UL << 0) /* Always returned, even if not requested */ +#define PIDFD_INFO_CREDS (1UL << 1) /* Always returned, even if not requested */ +#define PIDFD_INFO_CGROUPID (1UL << 2) /* Always returned if available, even if not requested */ + +#define PIDFD_INFO_SIZE_VER0 64 /* sizeof first published struct */ + +struct pidfd_info { + /* + * This mask is similar to the request_mask in statx(2). + * + * Userspace indicates what extensions or expensive-to-calculate fields + * they want by setting the corresponding bits in mask. The kernel + * will ignore bits that it does not know about. + * + * When filling the structure, the kernel will only set bits + * corresponding to the fields that were actually filled by the kernel. + * This also includes any future extensions that might be automatically + * filled. If the structure size is too small to contain a field + * (requested or not), to avoid confusion the mask will not + * contain a bit for that field. + * + * As such, userspace MUST verify that mask contains the + * corresponding flags after the ioctl(2) returns to ensure that it is + * using valid data. + */ + __u64 mask; + /* + * The information contained in the following fields might be stale at the + * time it is received, as the target process might have exited as soon as + * the IOCTL was processed, and there is no way to avoid that. However, it + * is guaranteed that if the call was successful, then the information was + * correct and referred to the intended process at the time the work was + * performed. */ + __u64 cgroupid; + __u32 pid; + __u32 tgid; + __u32 ppid; + __u32 ruid; + __u32 rgid; + __u32 euid; + __u32 egid; + __u32 suid; + __u32 sgid; + __u32 fsuid; + __u32 fsgid; + __u32 spare0[1]; +}; + #define PIDFS_IOCTL_MAGIC 0xFF #define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1) @@ -28,5 +77,6 @@ #define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8) #define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9) #define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10) +#define PIDFD_GET_INFO _IOWR(PIDFS_IOCTL_MAGIC, 11, struct pidfd_info) #endif /* _UAPI_LINUX_PIDFD_H */ diff --git a/tools/testing/selftests/pidfd/pidfd_open_test.c b/tools/testing/selftests/pidfd/pidfd_open_test.c index c62564c264b1..ce413a221bac 100644 --- a/tools/testing/selftests/pidfd/pidfd_open_test.c +++ b/tools/testing/selftests/pidfd/pidfd_open_test.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -21,6 +22,32 @@ #include "pidfd.h" #include "../kselftest.h" +#ifndef PIDFS_IOCTL_MAGIC +#define PIDFS_IOCTL_MAGIC 0xFF +#endif + +#ifndef PIDFD_GET_INFO +#define PIDFD_GET_INFO _IOWR(PIDFS_IOCTL_MAGIC, 11, struct pidfd_info) +#define PIDFD_INFO_CGROUPID (1UL << 0) + +struct pidfd_info { + __u64 request_mask; + __u64 cgroupid; + __u32 pid; + __u32 tgid; + __u32 ppid; + __u32 ruid; + __u32 rgid; + __u32 euid; + __u32 egid; + __u32 suid; + __u32 sgid; + __u32 fsuid; + __u32 fsgid; + __u32 spare0[1]; +}; +#endif + static int safe_int(const char *numstr, int *converted) { char *err = NULL; @@ -120,10 +147,13 @@ out: int main(int argc, char **argv) { + struct pidfd_info info = { + .request_mask = PIDFD_INFO_CGROUPID, + }; int pidfd = -1, ret = 1; pid_t pid; - ksft_set_plan(3); + ksft_set_plan(4); pidfd = sys_pidfd_open(-1, 0); if (pidfd >= 0) { @@ -153,6 +183,56 @@ int main(int argc, char **argv) pid = get_pid_from_fdinfo_file(pidfd, "Pid:", sizeof("Pid:") - 1); ksft_print_msg("pidfd %d refers to process with pid %d\n", pidfd, pid); + if (ioctl(pidfd, PIDFD_GET_INFO, &info) < 0) { + ksft_print_msg("%s - failed to get info from pidfd\n", strerror(errno)); + goto on_error; + } + if (info.pid != pid) { + ksft_print_msg("pid from fdinfo file %d does not match pid from ioctl %d\n", + pid, info.pid); + goto on_error; + } + if (info.ppid != getppid()) { + ksft_print_msg("ppid %d does not match ppid from ioctl %d\n", + pid, info.pid); + goto on_error; + } + if (info.ruid != getuid()) { + ksft_print_msg("uid %d does not match uid from ioctl %d\n", + getuid(), info.ruid); + goto on_error; + } + if (info.rgid != getgid()) { + ksft_print_msg("gid %d does not match gid from ioctl %d\n", + getgid(), info.rgid); + goto on_error; + } + if (info.euid != geteuid()) { + ksft_print_msg("euid %d does not match euid from ioctl %d\n", + geteuid(), info.euid); + goto on_error; + } + if (info.egid != getegid()) { + ksft_print_msg("egid %d does not match egid from ioctl %d\n", + getegid(), info.egid); + goto on_error; + } + if (info.suid != geteuid()) { + ksft_print_msg("suid %d does not match suid from ioctl %d\n", + geteuid(), info.suid); + goto on_error; + } + if (info.sgid != getegid()) { + ksft_print_msg("sgid %d does not match sgid from ioctl %d\n", + getegid(), info.sgid); + goto on_error; + } + if ((info.request_mask & PIDFD_INFO_CGROUPID) && info.cgroupid == 0) { + ksft_print_msg("cgroupid should not be 0 when PIDFD_INFO_CGROUPID is set\n"); + goto on_error; + } + ksft_test_result_pass("get info from pidfd test: passed\n"); + ret = 0; on_error: -- cgit v1.2.3 From 9a9f7e13952b2638bc57bc9b34e6bdd106509836 Mon Sep 17 00:00:00 2001 From: Victor Shih Date: Fri, 18 Oct 2024 18:53:18 +0800 Subject: mmc: core: Support UHS-II card control and access Embed UHS-II access/control functionality into the MMC request processing flow. Signed-off-by: Jason Lai Signed-off-by: Victor Shih Message-ID: <20241018105333.4569-2-victorshihgli@gmail.com> [Ulf: A couple of cleanups and fixed sd_uhs2_power_off()] Signed-off-by: Ulf Hansson --- drivers/mmc/core/core.c | 8 +- drivers/mmc/core/sd.c | 4 +- drivers/mmc/core/sd.h | 2 + drivers/mmc/core/sd_ops.c | 9 + drivers/mmc/core/sd_ops.h | 2 + drivers/mmc/core/sd_uhs2.c | 1083 ++++++++++++++++++++++++++++++++++++++++++-- include/linux/mmc/core.h | 17 + include/linux/mmc/host.h | 15 + 8 files changed, 1103 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 54ca9dc2114c..a499f3c59de5 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -354,6 +354,9 @@ int mmc_start_request(struct mmc_host *host, struct mmc_request *mrq) if (err) return err; + if (host->uhs2_sd_tran) + mmc_uhs2_prepare_cmd(host, mrq); + led_trigger_event(host->led, LED_FULL); __mmc_start_request(host, mrq); @@ -453,6 +456,9 @@ int mmc_cqe_start_req(struct mmc_host *host, struct mmc_request *mrq) if (err) goto out_err; + if (host->uhs2_sd_tran) + mmc_uhs2_prepare_cmd(host, mrq); + err = host->cqe_ops->cqe_request(host, mrq); if (err) goto out_err; @@ -1135,7 +1141,7 @@ u32 mmc_select_voltage(struct mmc_host *host, u32 ocr) return 0; } - if (host->caps2 & MMC_CAP2_FULL_PWR_CYCLE) { + if (!mmc_card_uhs2(host) && host->caps2 & MMC_CAP2_FULL_PWR_CYCLE) { bit = ffs(ocr) - 1; ocr &= 3 << bit; mmc_power_cycle(host, ocr); diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index 918b86bf8bbb..cc757b850e79 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -200,7 +200,7 @@ static int mmc_decode_csd(struct mmc_card *card, bool is_sduc) /* * Given a 64-bit response, decode to our card SCR structure. */ -static int mmc_decode_scr(struct mmc_card *card) +int mmc_decode_scr(struct mmc_card *card) { struct sd_scr *scr = &card->scr; unsigned int scr_struct; @@ -903,7 +903,7 @@ int mmc_sd_get_csd(struct mmc_card *card, bool is_sduc) return 0; } -static int mmc_sd_get_ro(struct mmc_host *host) +int mmc_sd_get_ro(struct mmc_host *host) { int ro; diff --git a/drivers/mmc/core/sd.h b/drivers/mmc/core/sd.h index 7e8beface2ca..301dc34b8b63 100644 --- a/drivers/mmc/core/sd.h +++ b/drivers/mmc/core/sd.h @@ -11,6 +11,8 @@ struct mmc_card; int mmc_sd_get_cid(struct mmc_host *host, u32 ocr, u32 *cid, u32 *rocr); int mmc_sd_get_csd(struct mmc_card *card, bool is_sduc); +int mmc_decode_scr(struct mmc_card *card); +int mmc_sd_get_ro(struct mmc_host *host); void mmc_decode_cid(struct mmc_card *card); int mmc_sd_setup_card(struct mmc_host *host, struct mmc_card *card, bool reinit); diff --git a/drivers/mmc/core/sd_ops.c b/drivers/mmc/core/sd_ops.c index 50d1380e93b8..cd86463dd306 100644 --- a/drivers/mmc/core/sd_ops.c +++ b/drivers/mmc/core/sd_ops.c @@ -42,6 +42,15 @@ int mmc_app_cmd(struct mmc_host *host, struct mmc_card *card) if (WARN_ON(card && card->host != host)) return -EINVAL; + /* + * UHS2 packet has APP bit so only set APP_CMD flag here. + * Will set the APP bit when assembling UHS2 packet. + */ + if (host->uhs2_sd_tran) { + host->uhs2_app_cmd = true; + return 0; + } + cmd.opcode = MMC_APP_CMD; if (card) { diff --git a/drivers/mmc/core/sd_ops.h b/drivers/mmc/core/sd_ops.h index fd3f10b9cf86..8fffc1b29757 100644 --- a/drivers/mmc/core/sd_ops.h +++ b/drivers/mmc/core/sd_ops.h @@ -12,6 +12,7 @@ struct mmc_card; struct mmc_host; +struct mmc_request; int mmc_app_set_bus_width(struct mmc_card *card, int width); int mmc_send_app_op_cond(struct mmc_host *host, u32 ocr, u32 *rocr); @@ -22,6 +23,7 @@ int mmc_app_send_scr(struct mmc_card *card); int mmc_app_sd_status(struct mmc_card *card, void *ssr); int mmc_app_cmd(struct mmc_host *host, struct mmc_card *card); int mmc_send_ext_addr(struct mmc_host *host, u32 addr); +void mmc_uhs2_prepare_cmd(struct mmc_host *host, struct mmc_request *mrq); #endif diff --git a/drivers/mmc/core/sd_uhs2.c b/drivers/mmc/core/sd_uhs2.c index 19d62d45e1ec..ddd2291ad7c4 100644 --- a/drivers/mmc/core/sd_uhs2.c +++ b/drivers/mmc/core/sd_uhs2.c @@ -1,23 +1,51 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2021 Linaro Ltd - * * Author: Ulf Hansson * + * Copyright (C) 2014 Intel Corp, All Rights Reserved. + * Author: Yi Sun + * + * Copyright (C) 2020 Genesys Logic, Inc. + * Authors: Ben Chuang + * + * Copyright (C) 2020 Linaro Limited + * Author: AKASHI Takahiro + * + * Copyright (C) 2022 Genesys Logic, Inc. + * Authors: Jason Lai + * + * Copyright (C) 2023 Genesys Logic, Inc. + * Authors: Victor Shih + * * Support for SD UHS-II cards */ #include +#include #include #include +#include +#include +#include +#include "card.h" #include "core.h" #include "bus.h" #include "sd.h" +#include "sd_ops.h" #include "mmc_ops.h" +#define UHS2_WAIT_CFG_COMPLETE_PERIOD_US (1 * 1000) +#define UHS2_WAIT_CFG_COMPLETE_TIMEOUT_MS 100 + static const unsigned int sd_uhs2_freqs[] = { 52000000, 26000000 }; +struct sd_uhs2_wait_active_state_data { + struct mmc_host *host; + struct mmc_command *cmd; +}; + static int sd_uhs2_power_up(struct mmc_host *host) { int err; @@ -37,15 +65,23 @@ static int sd_uhs2_power_up(struct mmc_host *host) static int sd_uhs2_power_off(struct mmc_host *host) { + int err; + if (host->ios.power_mode == MMC_POWER_OFF) return 0; host->ios.vdd = 0; host->ios.clock = 0; - host->ios.timing = MMC_TIMING_LEGACY; host->ios.power_mode = MMC_POWER_OFF; + host->uhs2_sd_tran = false; + + err = host->ops->uhs2_control(host, UHS2_SET_IOS); + if (err) + return err; - return host->ops->uhs2_control(host, UHS2_SET_IOS); + /* For consistency, let's restore the initial timing. */ + host->ios.timing = MMC_TIMING_LEGACY; + return 0; } /* @@ -55,7 +91,47 @@ static int sd_uhs2_power_off(struct mmc_host *host) */ static int sd_uhs2_phy_init(struct mmc_host *host) { - return 0; + int err; + + err = host->ops->uhs2_control(host, UHS2_PHY_INIT); + if (err) { + pr_err("%s: failed to initial phy for UHS-II!\n", + mmc_hostname(host)); + } + + return err; +} + +/* + * sd_uhs2_cmd_assemble() - build up UHS-II command packet which is embedded in + * mmc_command structure + * @cmd: MMC command to executed + * @uhs2_cmd: UHS2 command corresponded to MMC command + * @header: Header field of UHS-II command cxpacket + * @arg: Argument field of UHS-II command packet + * @payload: Payload field of UHS-II command packet + * @plen: Payload length + * @resp: Response buffer is allocated by caller and it is used to keep + * the response of CM-TRAN command. For SD-TRAN command, uhs2_resp + * should be null and SD-TRAN command response should be stored in + * resp of mmc_command. + * @resp_len: Response buffer length + * + * The uhs2_command structure contains message packets which are transmited/ + * received on UHS-II bus. This function fills in the contents of uhs2_command + * structure and embededs UHS2 command into mmc_command structure, which is used + * in legacy SD operation functions. + * + */ +static void sd_uhs2_cmd_assemble(struct mmc_command *cmd, + struct uhs2_command *uhs2_cmd, + u8 plen, u8 resp_len) +{ + uhs2_cmd->payload_len = plen * sizeof(u32); + uhs2_cmd->packet_len = uhs2_cmd->payload_len + 4; + + cmd->uhs2_cmd = uhs2_cmd; + cmd->uhs2_cmd->uhs2_resp_len = resp_len; } /* @@ -64,6 +140,83 @@ static int sd_uhs2_phy_init(struct mmc_host *host) */ static int sd_uhs2_dev_init(struct mmc_host *host) { + struct mmc_command cmd = {0}; + struct uhs2_command uhs2_cmd = {}; + u32 cnt; + u32 dap, gap, resp_gap; + u8 gd = 0; + int err; + + dap = host->uhs2_caps.dap; + gap = host->uhs2_caps.gap; + + /* + * Refer to UHS-II Addendum Version 1.02 Figure 6-21 to see DEVICE_INIT CCMD format. + * Head: + * - Control Write(R/W=1) with 4-Byte payload(PLEN=01b). + * - IOADR = CMD_BASE + 002h + * Payload: + * - bit [3:0] : GAP(Group Allocated Power) + * - bit [7:4] : GD(Group Descriptor) + * - bit [11] : Complete Flag + * - bit [15:12]: DAP(Device Allocated Power) + */ + uhs2_cmd.header = UHS2_NATIVE_PACKET | UHS2_PACKET_TYPE_CCMD; + uhs2_cmd.arg = ((UHS2_DEV_CMD_DEVICE_INIT & 0xFF) << 8) | + UHS2_NATIVE_CMD_WRITE | + UHS2_NATIVE_CMD_PLEN_4B | + (UHS2_DEV_CMD_DEVICE_INIT >> 8); + + /* + * Refer to UHS-II Addendum Version 1.02 section 6.3.1. + * Max. time from DEVICE_INIT CCMD EOP reception on Device + * Rx to its SOP transmission on Device Tx(Tfwd_init_cmd) is + * 1 second. + */ + cmd.busy_timeout = 1000; + + /* + * Refer to UHS-II Addendum Version 1.02 section 6.2.6.3. + * Let's retry the DEVICE_INIT command no more than 30 times. + */ + for (cnt = 0; cnt < 30; cnt++) { + uhs2_cmd.payload[0] = ((dap & 0xF) << 12) | + UHS2_DEV_INIT_COMPLETE_FLAG | + ((gd & 0xF) << 4) | + (gap & 0xF); + + sd_uhs2_cmd_assemble(&cmd, &uhs2_cmd, UHS2_DEV_INIT_PAYLOAD_LEN, + UHS2_DEV_INIT_RESP_LEN); + + err = mmc_wait_for_cmd(host, &cmd, 0); + + if (err) { + pr_err("%s: %s: UHS2 CMD send fail, err= 0x%x!\n", + mmc_hostname(host), __func__, err); + continue; + } + + if (uhs2_cmd.uhs2_resp[3] != (UHS2_DEV_CMD_DEVICE_INIT & 0xFF)) { + pr_err("%s: DEVICE_INIT response is wrong!\n", + mmc_hostname(host)); + return -EIO; + } + + if (uhs2_cmd.uhs2_resp[5] & 0x8) { + host->uhs2_caps.group_desc = gd; + return 0; + } + resp_gap = uhs2_cmd.uhs2_resp[4] & 0x0F; + if (gap == resp_gap) + gd++; + } + + if (err) { + pr_err("%s: %s: UHS2 CMD send fail, err= 0x%x!\n", + mmc_hostname(host), __func__, err); + return err; + } + return 0; } @@ -74,6 +227,48 @@ static int sd_uhs2_dev_init(struct mmc_host *host) */ static int sd_uhs2_enum(struct mmc_host *host, u32 *node_id) { + struct mmc_command cmd = {0}; + struct uhs2_command uhs2_cmd = {}; + u8 id_f = 0xF, id_l = 0x0; + int err; + + /* + * Refer to UHS-II Addendum Version 1.02 Figure 6-28 to see ENUMERATE CCMD format. + * Header: + * - Control Write(R/W=1) with 4-Byte payload(PLEN=01b). + * - IOADR = CMD_BASE + 003h + * Payload: + * - bit [3:0]: ID_L(Last Node ID) + * - bit [7:4]: ID_F(First Node ID) + */ + uhs2_cmd.header = UHS2_NATIVE_PACKET | UHS2_PACKET_TYPE_CCMD; + uhs2_cmd.arg = ((UHS2_DEV_CMD_ENUMERATE & 0xFF) << 8) | + UHS2_NATIVE_CMD_WRITE | + UHS2_NATIVE_CMD_PLEN_4B | + (UHS2_DEV_CMD_ENUMERATE >> 8); + + uhs2_cmd.payload[0] = (id_f << 4) | id_l; + uhs2_cmd.payload[0] = cpu_to_be32(uhs2_cmd.payload[0]); + + sd_uhs2_cmd_assemble(&cmd, &uhs2_cmd, UHS2_DEV_ENUM_PAYLOAD_LEN, UHS2_DEV_ENUM_RESP_LEN); + + err = mmc_wait_for_cmd(host, &cmd, 0); + if (err) { + pr_err("%s: %s: UHS2 CMD send fail, err= 0x%x!\n", + mmc_hostname(host), __func__, err); + return err; + } + + if (uhs2_cmd.uhs2_resp[3] != (UHS2_DEV_CMD_ENUMERATE & 0xFF)) { + pr_err("%s: ENUMERATE response is wrong!\n", + mmc_hostname(host)); + return -EIO; + } + + id_f = (uhs2_cmd.uhs2_resp[4] >> 4) & 0xF; + id_l = uhs2_cmd.uhs2_resp[4] & 0xF; + *node_id = id_f; + return 0; } @@ -84,6 +279,180 @@ static int sd_uhs2_enum(struct mmc_host *host, u32 *node_id) */ static int sd_uhs2_config_read(struct mmc_host *host, struct mmc_card *card) { + struct mmc_command cmd = {0}; + struct uhs2_command uhs2_cmd = {}; + u32 cap; + int err; + + /* + * Use Control Read CCMD to read Generic Capability from Configuration Register. + * - Control Write(R/W=1) with 4-Byte payload(PLEN=01b). + * - IOADR = Generic Capability Register(CFG_BASE + 000h) + */ + uhs2_cmd.header = UHS2_NATIVE_PACKET | UHS2_PACKET_TYPE_CCMD | card->uhs2_config.node_id; + uhs2_cmd.arg = ((UHS2_DEV_CONFIG_GEN_CAPS & 0xFF) << 8) | + UHS2_NATIVE_CMD_READ | + UHS2_NATIVE_CMD_PLEN_4B | + (UHS2_DEV_CONFIG_GEN_CAPS >> 8); + + /* + * There is no payload because per spec, there should be + * no payload field for read CCMD. + * Plen is set in arg. Per spec, plen for read CCMD + * represents the len of read data which is assigned in payload + * of following RES (p136). + */ + sd_uhs2_cmd_assemble(&cmd, &uhs2_cmd, 0, 0); + + err = mmc_wait_for_cmd(host, &cmd, 0); + if (err) { + pr_err("%s: %s: UHS2 CMD send fail, err= 0x%x!\n", + mmc_hostname(host), __func__, err); + return err; + } + + /* + * Generic Capability Register: + * bit [7:0] : Reserved + * bit [13:8] : Device-Specific Number of Lanes and Functionality + * bit 8: 2L-HD + * bit 9: 2D-1U FD + * bit 10: 1D-2U FD + * bit 11: 2D-2U FD + * Others: Reserved + * bit [14] : DADR Length + * 0: 4 bytes + * 1: Reserved + * bit [23:16]: Application Type + * bit 16: 0=Non-SD memory, 1=SD memory + * bit 17: 0=Non-SDIO, 1=SDIO + * bit 18: 0=Card, 1=Embedded + * bit [63:24]: Reserved + */ + cap = cmd.resp[0]; + card->uhs2_config.n_lanes = + (cap >> UHS2_DEV_CONFIG_N_LANES_POS) & + UHS2_DEV_CONFIG_N_LANES_MASK; + card->uhs2_config.dadr_len = + (cap >> UHS2_DEV_CONFIG_DADR_POS) & + UHS2_DEV_CONFIG_DADR_MASK; + card->uhs2_config.app_type = + (cap >> UHS2_DEV_CONFIG_APP_POS) & + UHS2_DEV_CONFIG_APP_MASK; + + /* + * Use Control Read CCMD to read PHY Capability from Configuration Register. + * - Control Write(R/W=1) with 8-Byte payload(PLEN=10b). + * - IOADR = PHY Capability Register(CFG_BASE + 002h) + */ + uhs2_cmd.arg = ((UHS2_DEV_CONFIG_PHY_CAPS & 0xFF) << 8) | + UHS2_NATIVE_CMD_READ | + UHS2_NATIVE_CMD_PLEN_8B | + (UHS2_DEV_CONFIG_PHY_CAPS >> 8); + + sd_uhs2_cmd_assemble(&cmd, &uhs2_cmd, 0, 0); + + err = mmc_wait_for_cmd(host, &cmd, 0); + if (err) { + pr_err("%s: %s: UHS2 CMD send fail, err= 0x%x!\n", + mmc_hostname(host), __func__, err); + return err; + } + + /* + * PHY Capability Register: + * bit [3:0] : PHY Minor Revision + * bit [5:4] : PHY Major Revision + * bit [15] : Support Hibernate Mode + * 0: Not support Hibernate Mode + * 1: Support Hibernate Mode + * bit [31:16]: Reserved + * bit [35:32]: Device-Specific N_LSS_SYN + * bit [39:36]: Device-Specific N_LSS_DIR + * bit [63:40]: Reserved + */ + cap = cmd.resp[0]; + card->uhs2_config.phy_minor_rev = + cap & UHS2_DEV_CONFIG_PHY_MINOR_MASK; + card->uhs2_config.phy_major_rev = + (cap >> UHS2_DEV_CONFIG_PHY_MAJOR_POS) & + UHS2_DEV_CONFIG_PHY_MAJOR_MASK; + card->uhs2_config.can_hibernate = + (cap >> UHS2_DEV_CONFIG_CAN_HIBER_POS) & + UHS2_DEV_CONFIG_CAN_HIBER_MASK; + + cap = cmd.resp[1]; + card->uhs2_config.n_lss_sync = + cap & UHS2_DEV_CONFIG_N_LSS_SYN_MASK; + card->uhs2_config.n_lss_dir = + (cap >> UHS2_DEV_CONFIG_N_LSS_DIR_POS) & + UHS2_DEV_CONFIG_N_LSS_DIR_MASK; + if (card->uhs2_config.n_lss_sync == 0) + card->uhs2_config.n_lss_sync = 16 << 2; + else + card->uhs2_config.n_lss_sync <<= 2; + + if (card->uhs2_config.n_lss_dir == 0) + card->uhs2_config.n_lss_dir = 16 << 3; + else + card->uhs2_config.n_lss_dir <<= 3; + + /* + * Use Control Read CCMD to read LINK/TRAN Capability from Configuration Register. + * - Control Write(R/W=1) with 8-Byte payload(PLEN=10b). + * - IOADR = LINK/TRAN Capability Register(CFG_BASE + 004h) + */ + uhs2_cmd.arg = ((UHS2_DEV_CONFIG_LINK_TRAN_CAPS & 0xFF) << 8) | + UHS2_NATIVE_CMD_READ | + UHS2_NATIVE_CMD_PLEN_8B | + (UHS2_DEV_CONFIG_LINK_TRAN_CAPS >> 8); + + sd_uhs2_cmd_assemble(&cmd, &uhs2_cmd, 0, 0); + + err = mmc_wait_for_cmd(host, &cmd, 0); + if (err) { + pr_err("%s: %s: UHS2 CMD send fail, err= 0x%x!\n", + mmc_hostname(host), __func__, err); + return err; + } + + /* + * LINK/TRAN Capability Register: + * bit [3:0] : LINK_TRAN Minor Revision + * bit [5:4] : LINK/TRAN Major Revision + * bit [7:6] : Reserved + * bit [15:8] : Device-Specific N_FCU + * bit [18:16]: Device Type + * 001b=Host + * 010b=Device + * 011b=Reserved for CMD issuable Device + * bit [19] : Reserved + * bit [31:20]: Device-Specific MAX_BLKLEN + * bit [39:32]: Device-Specific N_DATA_GAP + * bit [63:40]: Reserved + */ + cap = cmd.resp[0]; + card->uhs2_config.link_minor_rev = + cap & UHS2_DEV_CONFIG_LT_MINOR_MASK; + card->uhs2_config.link_major_rev = + (cap >> UHS2_DEV_CONFIG_LT_MAJOR_POS) & + UHS2_DEV_CONFIG_LT_MAJOR_MASK; + card->uhs2_config.n_fcu = + (cap >> UHS2_DEV_CONFIG_N_FCU_POS) & + UHS2_DEV_CONFIG_N_FCU_MASK; + card->uhs2_config.dev_type = + (cap >> UHS2_DEV_CONFIG_DEV_TYPE_POS) & + UHS2_DEV_CONFIG_DEV_TYPE_MASK; + card->uhs2_config.maxblk_len = + (cap >> UHS2_DEV_CONFIG_MAX_BLK_LEN_POS) & + UHS2_DEV_CONFIG_MAX_BLK_LEN_MASK; + + cap = cmd.resp[1]; + card->uhs2_config.n_data_gap = + cap & UHS2_DEV_CONFIG_N_DATA_GAP_MASK; + if (card->uhs2_config.n_fcu == 0) + card->uhs2_config.n_fcu = 256; + return 0; } @@ -98,18 +467,336 @@ static int sd_uhs2_config_read(struct mmc_host *host, struct mmc_card *card) */ static int sd_uhs2_config_write(struct mmc_host *host, struct mmc_card *card) { + struct mmc_command cmd = {0}; + struct uhs2_command uhs2_cmd = {}; + u8 nMinDataGap; + int err; + + /* + * Use Control Write CCMD to set Generic Setting in Configuration Register. + * - Control Write(R/W=1) with 8-Byte payload(PLEN=10b). + * - IOADR = Generic Setting Register(CFG_BASE + 008h) + * - Payload = New contents to be written to Generic Setting Register + */ + uhs2_cmd.header = UHS2_NATIVE_PACKET | UHS2_PACKET_TYPE_CCMD | card->uhs2_config.node_id; + uhs2_cmd.arg = ((UHS2_DEV_CONFIG_GEN_SET & 0xFF) << 8) | + UHS2_NATIVE_CMD_WRITE | + UHS2_NATIVE_CMD_PLEN_8B | + (UHS2_DEV_CONFIG_GEN_SET >> 8); + + /* + * Most UHS-II cards only support FD and 2L-HD mode. Other lane numbers + * defined in UHS-II addendem Ver1.01 are optional. + */ + host->uhs2_caps.n_lanes_set = UHS2_DEV_CONFIG_GEN_SET_2L_FD_HD; + card->uhs2_config.n_lanes_set = UHS2_DEV_CONFIG_GEN_SET_2L_FD_HD; + + uhs2_cmd.payload[0] = card->uhs2_config.n_lanes_set << UHS2_DEV_CONFIG_N_LANES_POS; + uhs2_cmd.payload[1] = 0; + uhs2_cmd.payload[0] = cpu_to_be32(uhs2_cmd.payload[0]); + uhs2_cmd.payload[1] = cpu_to_be32(uhs2_cmd.payload[1]); + + /* + * There is no payload because per spec, there should be + * no payload field for read CCMD. + * Plen is set in arg. Per spec, plen for read CCMD + * represents the len of read data which is assigned in payload + * of following RES (p136). + */ + sd_uhs2_cmd_assemble(&cmd, &uhs2_cmd, UHS2_CFG_WRITE_PAYLOAD_LEN, 0); + + err = mmc_wait_for_cmd(host, &cmd, 0); + if (err) { + pr_err("%s: %s: UHS2 CMD send fail, err= 0x%x!\n", + mmc_hostname(host), __func__, err); + return err; + } + + /* + * Use Control Write CCMD to set PHY Setting in Configuration Register. + * - Control Write(R/W=1) with 8-Byte payload(PLEN=10b). + * - IOADR = PHY Setting Register(CFG_BASE + 00Ah) + * - Payload = New contents to be written to PHY Setting Register + */ + uhs2_cmd.arg = ((UHS2_DEV_CONFIG_PHY_SET & 0xFF) << 8) | + UHS2_NATIVE_CMD_WRITE | + UHS2_NATIVE_CMD_PLEN_8B | + (UHS2_DEV_CONFIG_PHY_SET >> 8); + + if (host->uhs2_caps.speed_range == UHS2_DEV_CONFIG_PHY_SET_SPEED_B) { + if (card->uhs2_config.n_lanes == UHS2_DEV_CONFIG_2L_HD_FD && + host->uhs2_caps.n_lanes == UHS2_DEV_CONFIG_2L_HD_FD) { + /* Support HD */ + host->ios.timing = MMC_TIMING_UHS2_SPEED_B_HD; + nMinDataGap = 1; + } else { + /* Only support 2L-FD so far */ + host->ios.timing = MMC_TIMING_UHS2_SPEED_B; + nMinDataGap = 3; + } + card->uhs2_config.speed_range_set = UHS2_DEV_CONFIG_PHY_SET_SPEED_B; + } else { + if (card->uhs2_config.n_lanes == UHS2_DEV_CONFIG_2L_HD_FD && + host->uhs2_caps.n_lanes == UHS2_DEV_CONFIG_2L_HD_FD) { + /* Support HD */ + host->ios.timing = MMC_TIMING_UHS2_SPEED_A_HD; + nMinDataGap = 1; + } else { + /* Only support 2L-FD so far */ + host->ios.timing = MMC_TIMING_UHS2_SPEED_A; + nMinDataGap = 3; + } + card->uhs2_config.speed_range_set = UHS2_DEV_CONFIG_PHY_SET_SPEED_A; + } + + uhs2_cmd.payload[0] = + card->uhs2_config.speed_range_set << UHS2_DEV_CONFIG_PHY_SET_SPEED_POS; + + card->uhs2_config.n_lss_sync_set = (max(card->uhs2_config.n_lss_sync, + host->uhs2_caps.n_lss_sync) >> 2) & + UHS2_DEV_CONFIG_N_LSS_SYN_MASK; + host->uhs2_caps.n_lss_sync_set = card->uhs2_config.n_lss_sync_set; + + card->uhs2_config.n_lss_dir_set = (max(card->uhs2_config.n_lss_dir, + host->uhs2_caps.n_lss_dir) >> 3) & + UHS2_DEV_CONFIG_N_LSS_DIR_MASK; + host->uhs2_caps.n_lss_dir_set = card->uhs2_config.n_lss_dir_set; + + uhs2_cmd.payload[1] = (card->uhs2_config.n_lss_dir_set << UHS2_DEV_CONFIG_N_LSS_DIR_POS) | + card->uhs2_config.n_lss_sync_set; + uhs2_cmd.payload[0] = cpu_to_be32(uhs2_cmd.payload[0]); + uhs2_cmd.payload[1] = cpu_to_be32(uhs2_cmd.payload[1]); + + memset(uhs2_cmd.uhs2_resp, 0, sizeof(uhs2_cmd.uhs2_resp)); + + sd_uhs2_cmd_assemble(&cmd, &uhs2_cmd, UHS2_CFG_WRITE_PAYLOAD_LEN, + UHS2_CFG_WRITE_PHY_SET_RESP_LEN); + + err = mmc_wait_for_cmd(host, &cmd, 0); + if (err) { + pr_err("%s: %s: UHS2 CMD send fail, err= 0x%x!\n", + mmc_hostname(host), __func__, err); + return err; + } + + if ((uhs2_cmd.uhs2_resp[2] & 0x80)) { + pr_err("%s: %s: UHS2 CMD not accepted, resp= 0x%x!\n", + mmc_hostname(host), __func__, uhs2_cmd.uhs2_resp[2]); + return -EIO; + } + + /* + * Use Control Write CCMD to set LINK/TRAN Setting in Configuration Register. + * - Control Write(R/W=1) with 8-Byte payload(PLEN=10b). + * - IOADR = LINK/TRAN Setting Register(CFG_BASE + 00Ch) + * - Payload = New contents to be written to LINK/TRAN Setting Register + */ + uhs2_cmd.arg = ((UHS2_DEV_CONFIG_LINK_TRAN_SET & 0xFF) << 8) | + UHS2_NATIVE_CMD_WRITE | + UHS2_NATIVE_CMD_PLEN_8B | + (UHS2_DEV_CONFIG_LINK_TRAN_SET >> 8); + + if (card->uhs2_config.app_type == UHS2_DEV_CONFIG_APP_SD_MEM) + card->uhs2_config.maxblk_len_set = UHS2_DEV_CONFIG_LT_SET_MAX_BLK_LEN; + else + card->uhs2_config.maxblk_len_set = min(card->uhs2_config.maxblk_len, + host->uhs2_caps.maxblk_len); + host->uhs2_caps.maxblk_len_set = card->uhs2_config.maxblk_len_set; + + card->uhs2_config.n_fcu_set = min(card->uhs2_config.n_fcu, host->uhs2_caps.n_fcu); + host->uhs2_caps.n_fcu_set = card->uhs2_config.n_fcu_set; + + card->uhs2_config.n_data_gap_set = max(nMinDataGap, card->uhs2_config.n_data_gap); + host->uhs2_caps.n_data_gap_set = card->uhs2_config.n_data_gap_set; + + host->uhs2_caps.max_retry_set = 3; + card->uhs2_config.max_retry_set = host->uhs2_caps.max_retry_set; + + uhs2_cmd.payload[0] = + (card->uhs2_config.maxblk_len_set << UHS2_DEV_CONFIG_MAX_BLK_LEN_POS) | + (card->uhs2_config.max_retry_set << UHS2_DEV_CONFIG_LT_SET_MAX_RETRY_POS) | + (card->uhs2_config.n_fcu_set << UHS2_DEV_CONFIG_N_FCU_POS); + uhs2_cmd.payload[1] = card->uhs2_config.n_data_gap_set; + uhs2_cmd.payload[0] = cpu_to_be32(uhs2_cmd.payload[0]); + uhs2_cmd.payload[1] = cpu_to_be32(uhs2_cmd.payload[1]); + + sd_uhs2_cmd_assemble(&cmd, &uhs2_cmd, UHS2_CFG_WRITE_PAYLOAD_LEN, 0); + + err = mmc_wait_for_cmd(host, &cmd, 0); + if (err) { + pr_err("%s: %s: UHS2 CMD send fail, err= 0x%x!\n", + mmc_hostname(host), __func__, err); + return err; + } + + /* + * Use Control Write CCMD to set Config Completion(payload bit 63) in Generic Setting + * Register. + * Header: + * - Control Write(R/W=1) with 8-Byte payload(PLEN=10b). + * - IOADR = PGeneric Setting Register(CFG_BASE + 008h) + * Payload: + * - bit [63]: Config Completion + * + * DLSM transits to Active state immediately when Config Completion is set to 1. + */ + uhs2_cmd.arg = ((UHS2_DEV_CONFIG_GEN_SET & 0xFF) << 8) | + UHS2_NATIVE_CMD_WRITE | + UHS2_NATIVE_CMD_PLEN_8B | + (UHS2_DEV_CONFIG_GEN_SET >> 8); + + uhs2_cmd.payload[0] = 0; + uhs2_cmd.payload[1] = UHS2_DEV_CONFIG_GEN_SET_CFG_COMPLETE; + uhs2_cmd.payload[0] = cpu_to_be32(uhs2_cmd.payload[0]); + uhs2_cmd.payload[1] = cpu_to_be32(uhs2_cmd.payload[1]); + + memset(uhs2_cmd.uhs2_resp, 0, sizeof(uhs2_cmd.uhs2_resp)); + sd_uhs2_cmd_assemble(&cmd, &uhs2_cmd, UHS2_CFG_WRITE_PAYLOAD_LEN, + UHS2_CFG_WRITE_GENERIC_SET_RESP_LEN); + + err = mmc_wait_for_cmd(host, &cmd, 0); + if (err) { + pr_err("%s: %s: UHS2 CMD send fail, err= 0x%x!\n", + mmc_hostname(host), __func__, err); + return err; + } + + /* Set host Config Setting registers */ + err = host->ops->uhs2_control(host, UHS2_SET_CONFIG); + if (err) { + pr_err("%s: %s: UHS2 SET_CONFIG fail!\n", mmc_hostname(host), __func__); + return err; + } + return 0; } -/* - * Initialize the UHS-II card through the SD-TRAN transport layer. This enables - * commands/requests to be backwards compatible through the legacy SD protocol. - * UHS-II cards has a specific power limit specified for VDD1/VDD2, that should - * be set through a legacy CMD6. Note that, the power limit that becomes set, - * survives a soft reset through the GO_DORMANT_STATE command. - */ -static int sd_uhs2_legacy_init(struct mmc_host *host, struct mmc_card *card) +static int sd_uhs2_go_dormant(struct mmc_host *host, u32 node_id) +{ + struct mmc_command cmd = {0}; + struct uhs2_command uhs2_cmd = {}; + int err; + + /* Disable Normal INT */ + err = host->ops->uhs2_control(host, UHS2_DISABLE_INT); + if (err) { + pr_err("%s: %s: UHS2 DISABLE_INT fail!\n", + mmc_hostname(host), __func__); + return err; + } + + /* + * Refer to UHS-II Addendum Version 1.02 Figure 6-17 to see GO_DORMANT_STATE CCMD format. + * Header: + * - Control Write(R/W=1) with 4-Byte payload(PLEN=01b). + * - IOADR = CMD_BASE + 001h + * Payload: + * - bit [7]: HBR(Entry to Hibernate Mode) + * 1: Host intends to enter Hibernate mode during Dormant state. + * The default setting is 0 because hibernate is currently not supported. + */ + uhs2_cmd.header = UHS2_NATIVE_PACKET | UHS2_PACKET_TYPE_CCMD | node_id; + uhs2_cmd.arg = ((UHS2_DEV_CMD_GO_DORMANT_STATE & 0xFF) << 8) | + UHS2_NATIVE_CMD_WRITE | + UHS2_NATIVE_CMD_PLEN_4B | + (UHS2_DEV_CMD_GO_DORMANT_STATE >> 8); + + sd_uhs2_cmd_assemble(&cmd, &uhs2_cmd, UHS2_GO_DORMANT_PAYLOAD_LEN, 0); + + err = mmc_wait_for_cmd(host, &cmd, 0); + if (err) { + pr_err("%s: %s: UHS2 CMD send fail, err= 0x%x!\n", + mmc_hostname(host), __func__, err); + return err; + } + + /* Check Dormant State in Present */ + err = host->ops->uhs2_control(host, UHS2_CHECK_DORMANT); + if (err) + return err; + + /* Disable UHS2 card clock */ + err = host->ops->uhs2_control(host, UHS2_DISABLE_CLK); + if (err) + return err; + + /* Restore sd clock */ + mmc_delay(5); + err = host->ops->uhs2_control(host, UHS2_ENABLE_CLK); + if (err) + return err; + + /* Enable Normal INT */ + err = host->ops->uhs2_control(host, UHS2_ENABLE_INT); + if (err) + return err; + + /* Detect UHS2 */ + err = host->ops->uhs2_control(host, UHS2_PHY_INIT); + if (err) + return err; + + return 0; +} + +static int sd_uhs2_wait_active_state_cb(void *cb_data, bool *busy) { + struct sd_uhs2_wait_active_state_data *data = cb_data; + struct mmc_host *host = data->host; + struct mmc_command *cmd = data->cmd; + int err; + + err = mmc_wait_for_cmd(host, cmd, 0); + if (err) + return err; + + if (cmd->resp[1] & UHS2_DEV_CONFIG_GEN_SET_CFG_COMPLETE) + *busy = false; + else + *busy = true; + + return 0; +} + +static int sd_uhs2_go_dormant_state(struct mmc_host *host, u32 node_id) +{ + struct mmc_command cmd = {0}; + struct uhs2_command uhs2_cmd = {}; + int err; + struct sd_uhs2_wait_active_state_data cb_data = { + .host = host, + .cmd = &cmd + }; + + err = sd_uhs2_go_dormant(host, node_id); + if (err) { + pr_err("%s: %s: UHS2 GO_DORMANT_STATE fail, err= 0x%x!\n", + mmc_hostname(host), __func__, err); + return err; + } + + /* + * Use Control Read CCMD to check Config Completion(bit 63) in Generic Setting Register. + * - Control Read(R/W=0) with 8-Byte payload(PLEN=10b). + * - IOADR = Generic Setting Register(CFG_BASE + 008h) + * + * When UHS-II card been switched to new speed mode, it will set Config Completion to 1. + */ + uhs2_cmd.header = UHS2_NATIVE_PACKET | UHS2_PACKET_TYPE_CCMD | node_id; + uhs2_cmd.arg = ((UHS2_DEV_CONFIG_GEN_SET & 0xFF) << 8) | + UHS2_NATIVE_CMD_READ | + UHS2_NATIVE_CMD_PLEN_8B | + (UHS2_DEV_CONFIG_GEN_SET >> 8); + + sd_uhs2_cmd_assemble(&cmd, &uhs2_cmd, 0, 0); + err = __mmc_poll_for_busy(host, UHS2_WAIT_CFG_COMPLETE_PERIOD_US, + UHS2_WAIT_CFG_COMPLETE_TIMEOUT_MS, + &sd_uhs2_wait_active_state_cb, &cb_data); + if (err) { + pr_err("%s: %s: Not switch to Active in 100 ms\n", mmc_hostname(host), __func__); + return err; + } + return 0; } @@ -117,7 +804,7 @@ static int sd_uhs2_legacy_init(struct mmc_host *host, struct mmc_card *card) * Allocate the data structure for the mmc_card and run the UHS-II specific * initialization sequence. */ -static int sd_uhs2_init_card(struct mmc_host *host) +static int sd_uhs2_init_card(struct mmc_host *host, struct mmc_card *oldcard) { struct mmc_card *card; u32 node_id = 0; @@ -131,29 +818,204 @@ static int sd_uhs2_init_card(struct mmc_host *host) if (err) return err; - card = mmc_alloc_card(host, &sd_type); - if (IS_ERR(card)) - return PTR_ERR(card); + if (oldcard) { + card = oldcard; + } else { + card = mmc_alloc_card(host, &sd_type); + if (IS_ERR(card)) + return PTR_ERR(card); + } card->uhs2_config.node_id = node_id; card->type = MMC_TYPE_SD; err = sd_uhs2_config_read(host, card); if (err) - goto err; + return err; err = sd_uhs2_config_write(host, card); if (err) - goto err; + return err; host->card = card; + /* If change speed to Range B, need to GO_DORMANT_STATE */ + if (host->ios.timing == MMC_TIMING_UHS2_SPEED_B || + host->ios.timing == MMC_TIMING_UHS2_SPEED_B_HD) { + err = sd_uhs2_go_dormant_state(host, node_id); + if (err) + return err; + } + + host->uhs2_sd_tran = true; + + return 0; +} + +/* + * Initialize the UHS-II card through the SD-TRAN transport layer. This enables + * commands/requests to be backwards compatible through the legacy SD protocol. + * UHS-II cards has a specific power limit specified for VDD1/VDD2, that should + * be set through a legacy CMD6. Note that, the power limit that becomes set, + * survives a soft reset through the GO_DORMANT_STATE command. + */ +static int sd_uhs2_legacy_init(struct mmc_host *host, struct mmc_card *card, + struct mmc_card *oldcard) +{ + int err; + u32 cid[4]; + u32 ocr; + u32 rocr; + u8 *status; + int ro; + + /* Send CMD0 to reset SD card */ + err = __mmc_go_idle(host); + if (err) + return err; + + mmc_delay(1); + + /* Send CMD8 to communicate SD interface operation condition */ + err = mmc_send_if_cond(host, host->ocr_avail); + if (err) { + dev_warn(mmc_dev(host), "CMD8 error\n"); + goto err; + } + + /* + * Probe SD card working voltage. + */ + err = mmc_send_app_op_cond(host, 0, &ocr); + if (err) + goto err; + + card->ocr = ocr; + + /* + * Some SD cards claims an out of spec VDD voltage range. Let's treat + * these bits as being in-valid and especially also bit7. + */ + ocr &= ~0x7FFF; + rocr = mmc_select_voltage(host, ocr); + /* + * Some cards have zero value of rocr in UHS-II mode. Assign host's + * ocr value to rocr. + */ + if (!rocr) + rocr = host->ocr_avail; + + rocr |= (SD_OCR_CCS | SD_OCR_XPC); + + /* Wait SD power on ready */ + ocr = rocr; + + err = mmc_send_app_op_cond(host, ocr, &rocr); + if (err) + goto err; + + err = mmc_send_cid(host, cid); + if (err) + goto err; + + if (oldcard) { + if (memcmp(cid, oldcard->raw_cid, sizeof(cid)) != 0) { + pr_debug("%s: Perhaps the card was replaced\n", + mmc_hostname(host)); + return -ENOENT; + } + + card = oldcard; + } else { + memcpy(card->raw_cid, cid, sizeof(card->raw_cid)); + mmc_decode_cid(card); + } + + /* + * For native busses: get card RCA and quit open drain mode. + */ + err = mmc_send_relative_addr(host, &card->rca); + if (err) + goto err; + + err = mmc_sd_get_csd(card, false); + if (err) + goto err; + + /* + * Select card, as all following commands rely on that. + */ + err = mmc_select_card(card); + if (err) + goto err; + + /* + * Fetch SCR from card. + */ + err = mmc_app_send_scr(card); + if (err) + goto err; + + err = mmc_decode_scr(card); + if (err) + goto err; + + /* + * Switch to high power consumption mode. + * Even switch failed, sd card can still work at lower power consumption mode, but + * performance will be lower than high power consumption mode. + */ + status = kmalloc(64, GFP_KERNEL); + if (!status) + return -ENOMEM; + + if (!(card->csd.cmdclass & CCC_SWITCH)) { + pr_warn("%s: card lacks mandatory switch function, performance might suffer\n", + mmc_hostname(card->host)); + } else { + /* + * Send CMD6 to set Maximum Power Consumption to get better + * performance. Ignore errors and continue. + */ + err = mmc_sd_switch(card, 0, 3, SD4_SET_POWER_LIMIT_1_80W, status); + if (!err) + mmc_sd_switch(card, 1, 3, SD4_SET_POWER_LIMIT_1_80W, status); + } + + /* + * Check if read-only switch is active. + */ + ro = mmc_sd_get_ro(host); + if (ro < 0) + pr_warn("%s: host does not support read-only switch, assuming write-enable\n", + mmc_hostname(host)); + else if (ro > 0) + mmc_card_set_readonly(card); + + kfree(status); return 0; err: - mmc_remove_card(card); return err; } +static int sd_uhs2_reinit(struct mmc_host *host) +{ + struct mmc_card *card = host->card; + int err; + + sd_uhs2_power_up(host); + + err = sd_uhs2_phy_init(host); + if (err) + return err; + + err = sd_uhs2_init_card(host, card); + if (err) + return err; + + return sd_uhs2_legacy_init(host, card, card); +} + static void sd_uhs2_remove(struct mmc_host *host) { mmc_remove_card(host->card); @@ -183,34 +1045,105 @@ static void sd_uhs2_detect(struct mmc_host *host) } } -static int sd_uhs2_suspend(struct mmc_host *host) +static int _sd_uhs2_suspend(struct mmc_host *host) { + struct mmc_card *card = host->card; + + mmc_claim_host(host); + + if (mmc_card_suspended(card)) + goto out; + + sd_uhs2_power_off(host); + mmc_card_set_suspended(card); + +out: + mmc_release_host(host); return 0; } +/* + * Callback for suspend + */ +static int sd_uhs2_suspend(struct mmc_host *host) +{ + int err; + + err = _sd_uhs2_suspend(host); + if (!err) { + pm_runtime_disable(&host->card->dev); + pm_runtime_set_suspended(&host->card->dev); + } + + return err; +} + +/* + * This function tries to determine if the same card is still present + * and, if so, restore all state to it. + */ +static int _mmc_sd_uhs2_resume(struct mmc_host *host) +{ + int err = 0; + + mmc_claim_host(host); + + if (!mmc_card_suspended(host->card)) + goto out; + + /* Power up UHS2 SD card and re-initialize it. */ + err = sd_uhs2_reinit(host); + mmc_card_clr_suspended(host->card); + +out: + mmc_release_host(host); + return err; +} + +/* + * Callback for resume + */ static int sd_uhs2_resume(struct mmc_host *host) { + pm_runtime_enable(&host->card->dev); return 0; } +/* + * Callback for runtime_suspend. + */ static int sd_uhs2_runtime_suspend(struct mmc_host *host) { - return 0; + int err; + + if (!(host->caps & MMC_CAP_AGGRESSIVE_PM)) + return 0; + + err = _sd_uhs2_suspend(host); + if (err) + pr_err("%s: error %d doing aggressive suspend\n", mmc_hostname(host), err); + + return err; } static int sd_uhs2_runtime_resume(struct mmc_host *host) { - return 0; -} + int err; -static int sd_uhs2_shutdown(struct mmc_host *host) -{ - return 0; + err = _mmc_sd_uhs2_resume(host); + if (err && err != -ENOMEDIUM) + pr_err("%s: error %d doing runtime resume\n", mmc_hostname(host), err); + + return err; } static int sd_uhs2_hw_reset(struct mmc_host *host) { - return 0; + sd_uhs2_power_off(host); + /* Wait at least 1 ms according to SD spec */ + mmc_delay(1); + + return sd_uhs2_reinit(host); } static const struct mmc_bus_ops sd_uhs2_ops = { @@ -221,7 +1154,7 @@ static const struct mmc_bus_ops sd_uhs2_ops = { .resume = sd_uhs2_resume, .runtime_suspend = sd_uhs2_runtime_suspend, .runtime_resume = sd_uhs2_runtime_resume, - .shutdown = sd_uhs2_shutdown, + .shutdown = sd_uhs2_suspend, .hw_reset = sd_uhs2_hw_reset, }; @@ -237,11 +1170,11 @@ static int sd_uhs2_attach(struct mmc_host *host) if (err) goto err; - err = sd_uhs2_init_card(host); + err = sd_uhs2_init_card(host, NULL); if (err) goto err; - err = sd_uhs2_legacy_init(host, host->card); + err = sd_uhs2_legacy_init(host, host->card, NULL); if (err) goto err; @@ -254,21 +1187,31 @@ static int sd_uhs2_attach(struct mmc_host *host) goto remove_card; mmc_claim_host(host); + return 0; remove_card: - mmc_remove_card(host->card); - host->card = NULL; + sd_uhs2_remove(host); mmc_claim_host(host); - mmc_detach_bus(host); + err: + mmc_detach_bus(host); sd_uhs2_power_off(host); return err; } +/** + * mmc_attach_sd_uhs2 - select UHS2 interface + * @host: MMC host + * + * Try to select UHS2 interface and initialize the bus for a given + * frequency, @freq. + * + * Return: 0 on success, non-zero error on failure + */ int mmc_attach_sd_uhs2(struct mmc_host *host) { - int i, err = 0; + int i, err; if (!(host->caps2 & MMC_CAP2_SD_UHS2)) return -EOPNOTSUPP; @@ -292,3 +1235,75 @@ int mmc_attach_sd_uhs2(struct mmc_host *host) return err; } + +/* + * mmc_uhs2_prepare_cmd - prepare for SD command packet + * @host: MMC host + * @mrq: MMC request + * + * Initialize and fill in a header and a payload of SD command packet. + * The caller should allocate uhs2_command in host->cmd->uhs2_cmd in + * advance. + * + * Return: 0 on success, non-zero error on failure + */ +void mmc_uhs2_prepare_cmd(struct mmc_host *host, struct mmc_request *mrq) +{ + struct mmc_command *cmd; + struct uhs2_command *uhs2_cmd; + u8 plen; + + cmd = mrq->cmd; + cmd->uhs2_cmd = &mrq->uhs2_cmd; + uhs2_cmd = cmd->uhs2_cmd; + uhs2_cmd->header = host->card->uhs2_config.node_id; + if ((cmd->flags & MMC_CMD_MASK) == MMC_CMD_ADTC) + uhs2_cmd->header |= UHS2_PACKET_TYPE_DCMD; + else + uhs2_cmd->header |= UHS2_PACKET_TYPE_CCMD; + + uhs2_cmd->arg = cmd->opcode << UHS2_SD_CMD_INDEX_POS; + if (host->uhs2_app_cmd) { + uhs2_cmd->arg |= UHS2_SD_CMD_APP; + host->uhs2_app_cmd = false; + } + + /* + * UHS-II Addendum 7.2.1.2 + * Host may set DM to 1 for DCMD which supports multi-block read/write regardless of + * data transfer length (e.g., CMD18, CMD25). Otherwise, it shall not set DM to 1. + * (e.g., CMD6, CMD17, CMD24). These rules are also applied to other multi-block read/write + * commands defined in other Part of SD specifications (for example, Host may set DM to 1 + * for ACMD18 or ACMD25). + */ + if (mmc_op_multi(cmd->opcode)) + cmd->uhs2_cmd->tmode_half_duplex = mmc_card_uhs2_hd_mode(host); + else + cmd->uhs2_cmd->tmode_half_duplex = 0; + + uhs2_cmd = cmd->uhs2_cmd; + plen = 2; /* at the maximum */ + + if ((cmd->flags & MMC_CMD_MASK) == MMC_CMD_ADTC && + cmd->uhs2_cmd->tmode_half_duplex) { + if (mmc_card_uhs2_hd_mode(host)) + uhs2_cmd->arg |= UHS2_DCMD_2L_HD_MODE; + + uhs2_cmd->arg |= UHS2_DCMD_LM_TLEN_EXIST; + + if (cmd->data->blocks == 1 && + cmd->data->blksz != 512 && + cmd->opcode != MMC_READ_SINGLE_BLOCK && + cmd->opcode != MMC_WRITE_BLOCK) { + uhs2_cmd->arg |= UHS2_DCMD_TLUM_BYTE_MODE; + uhs2_cmd->payload[1] = cpu_to_be32(cmd->data->blksz); + } else { + uhs2_cmd->payload[1] = cpu_to_be32(cmd->data->blocks); + } + } else { + plen = 1; + } + + uhs2_cmd->payload[0] = cpu_to_be32(cmd->arg); + sd_uhs2_cmd_assemble(cmd, uhs2_cmd, plen, 0); +} diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index a890a71288ef..56972bd78462 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -11,6 +11,20 @@ struct mmc_data; struct mmc_request; +#define UHS2_MAX_PAYLOAD_LEN 2 +#define UHS2_MAX_RESP_LEN 20 + +struct uhs2_command { + u16 header; + u16 arg; + __be32 payload[UHS2_MAX_PAYLOAD_LEN]; + u8 payload_len; + u8 packet_len; + u8 tmode_half_duplex; + u8 uhs2_resp[UHS2_MAX_RESP_LEN]; /* UHS2 native cmd resp */ + u8 uhs2_resp_len; /* UHS2 native cmd resp len */ +}; + struct mmc_command { u32 opcode; u32 arg; @@ -97,6 +111,8 @@ struct mmc_command { struct mmc_data *data; /* data segment associated with cmd */ struct mmc_request *mrq; /* associated request */ + struct uhs2_command *uhs2_cmd; /* UHS2 command */ + /* for SDUC */ bool has_ext_addr; u8 ext_addr; @@ -158,6 +174,7 @@ struct mmc_request { const struct bio_crypt_ctx *crypto_ctx; int crypto_key_slot; #endif + struct uhs2_command uhs2_cmd; }; struct mmc_card; diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 0980d06ed419..f166d6611ddb 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -127,6 +127,13 @@ struct sd_uhs2_caps { }; enum sd_uhs2_operation { + UHS2_PHY_INIT = 0, + UHS2_SET_CONFIG, + UHS2_ENABLE_INT, + UHS2_DISABLE_INT, + UHS2_ENABLE_CLK, + UHS2_DISABLE_CLK, + UHS2_CHECK_DORMANT, UHS2_SET_IOS, }; @@ -453,6 +460,8 @@ struct mmc_host { #endif #define MMC_CAP2_ALT_GPT_TEGRA (1 << 28) /* Host with eMMC that has GPT entry at a non-standard location */ + bool uhs2_sd_tran; /* UHS-II flag for SD_TRAN state */ + bool uhs2_app_cmd; /* UHS-II flag for APP command */ struct sd_uhs2_caps uhs2_caps; /* Host UHS-II capabilities */ int fixed_drv_type; /* fixed driver type for non-removable media */ @@ -714,6 +723,12 @@ static inline void mmc_debugfs_err_stats_inc(struct mmc_host *host, host->err_stats[stat] += 1; } +static inline int mmc_card_uhs2_hd_mode(struct mmc_host *host) +{ + return host->ios.timing == MMC_TIMING_UHS2_SPEED_A_HD || + host->ios.timing == MMC_TIMING_UHS2_SPEED_B_HD; +} + int mmc_sd_switch(struct mmc_card *card, bool mode, int group, u8 value, u8 *resp); int mmc_send_status(struct mmc_card *card, u32 *status); -- cgit v1.2.3 From a5a98a786e5e31e85a69d30935254e8730734706 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 14 Oct 2024 16:59:46 +0200 Subject: thermal: core: Add and use cooling device guard Add and use a special guard for cooling devices. This allows quite a few error code paths to be simplified among other things and brings in code size reduction for a good measure. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/5837621.DvuYhMxLoT@rjwysocki.net Reviewed-by: Lukasz Luba --- drivers/thermal/gov_power_allocator.c | 23 ++++++++-------- drivers/thermal/gov_step_wise.c | 6 ++--- drivers/thermal/thermal_core.c | 17 ++++-------- drivers/thermal/thermal_debugfs.c | 25 ++++++++++------- drivers/thermal/thermal_helpers.c | 19 ++++--------- drivers/thermal/thermal_sysfs.c | 51 +++++++++++++---------------------- include/linux/thermal.h | 3 +++ 7 files changed, 60 insertions(+), 84 deletions(-) (limited to 'include') diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c index bdae60001b2c..5cb03923fa8f 100644 --- a/drivers/thermal/gov_power_allocator.c +++ b/drivers/thermal/gov_power_allocator.c @@ -549,18 +549,17 @@ static void allow_maximum_power(struct thermal_zone_device *tz) cdev = instance->cdev; instance->target = 0; - mutex_lock(&cdev->lock); - /* - * Call for updating the cooling devices local stats and avoid - * periods of dozen of seconds when those have not been - * maintained. - */ - cdev->ops->get_requested_power(cdev, &req_power); - - if (params->update_cdevs) - __thermal_cdev_update(cdev); - - mutex_unlock(&cdev->lock); + scoped_guard(cooling_dev, cdev) { + /* + * Call for updating the cooling devices local stats and + * avoid periods of dozen of seconds when those have not + * been maintained. + */ + cdev->ops->get_requested_power(cdev, &req_power); + + if (params->update_cdevs) + __thermal_cdev_update(cdev); + } } } diff --git a/drivers/thermal/gov_step_wise.c b/drivers/thermal/gov_step_wise.c index ea4bf88d37f3..d1bb59f1dfbd 100644 --- a/drivers/thermal/gov_step_wise.c +++ b/drivers/thermal/gov_step_wise.c @@ -97,9 +97,9 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, instance->initialized = true; - mutex_lock(&instance->cdev->lock); - instance->cdev->updated = false; /* cdev needs update */ - mutex_unlock(&instance->cdev->lock); + scoped_guard(cooling_dev, instance->cdev) { + instance->cdev->updated = false; /* cdev needs update */ + } } } diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index ab0c7c6396c0..f98aa396787a 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -758,12 +758,10 @@ static int thermal_instance_add(struct thermal_instance *new_instance, list_add_tail(&new_instance->trip_node, &td->thermal_instances); - mutex_lock(&cdev->lock); + guard(cooling_dev)(cdev); list_add_tail(&new_instance->cdev_node, &cdev->thermal_instances); - mutex_unlock(&cdev->lock); - return 0; } @@ -872,11 +870,9 @@ static void thermal_instance_delete(struct thermal_instance *instance) { list_del(&instance->trip_node); - mutex_lock(&instance->cdev->lock); + guard(cooling_dev)(instance->cdev); list_del(&instance->cdev_node); - - mutex_unlock(&instance->cdev->lock); } /** @@ -1239,10 +1235,10 @@ void thermal_cooling_device_update(struct thermal_cooling_device *cdev) * Update under the cdev lock to prevent the state from being set beyond * the new limit concurrently. */ - mutex_lock(&cdev->lock); + guard(cooling_dev)(cdev); if (cdev->ops->get_max_state(cdev, &cdev->max_state)) - goto unlock; + return; thermal_cooling_device_stats_reinit(cdev); @@ -1269,12 +1265,9 @@ void thermal_cooling_device_update(struct thermal_cooling_device *cdev) } if (cdev->ops->get_cur_state(cdev, &state) || state > cdev->max_state) - goto unlock; + return; thermal_cooling_device_stats_update(cdev, state); - -unlock: - mutex_unlock(&cdev->lock); } EXPORT_SYMBOL_GPL(thermal_cooling_device_update); diff --git a/drivers/thermal/thermal_debugfs.c b/drivers/thermal/thermal_debugfs.c index d67021aeb136..c800504c3cfe 100644 --- a/drivers/thermal/thermal_debugfs.c +++ b/drivers/thermal/thermal_debugfs.c @@ -516,6 +516,19 @@ void thermal_debug_cdev_add(struct thermal_cooling_device *cdev, int state) cdev->debugfs = thermal_dbg; } +static struct thermal_debugfs *thermal_debug_cdev_clear(struct thermal_cooling_device *cdev) +{ + struct thermal_debugfs *thermal_dbg; + + guard(cooling_dev)(cdev); + + thermal_dbg = cdev->debugfs; + if (thermal_dbg) + cdev->debugfs = NULL; + + return thermal_dbg; +} + /** * thermal_debug_cdev_remove - Remove a cooling device debugfs entry * @@ -527,17 +540,9 @@ void thermal_debug_cdev_remove(struct thermal_cooling_device *cdev) { struct thermal_debugfs *thermal_dbg; - mutex_lock(&cdev->lock); - - thermal_dbg = cdev->debugfs; - if (!thermal_dbg) { - mutex_unlock(&cdev->lock); + thermal_dbg = thermal_debug_cdev_clear(cdev); + if (!thermal_dbg) return; - } - - cdev->debugfs = NULL; - - mutex_unlock(&cdev->lock); mutex_lock(&thermal_dbg->lock); diff --git a/drivers/thermal/thermal_helpers.c b/drivers/thermal/thermal_helpers.c index 030a4e4bc824..b1152ad7acc9 100644 --- a/drivers/thermal/thermal_helpers.c +++ b/drivers/thermal/thermal_helpers.c @@ -58,17 +58,10 @@ bool thermal_trip_is_bound_to_cdev(struct thermal_zone_device *tz, const struct thermal_trip *trip, struct thermal_cooling_device *cdev) { - bool ret; - guard(thermal_zone)(tz); + guard(cooling_dev)(cdev); - mutex_lock(&cdev->lock); - - ret = thermal_instance_present(tz, cdev, trip); - - mutex_unlock(&cdev->lock); - - return ret; + return thermal_instance_present(tz, cdev, trip); } EXPORT_SYMBOL_GPL(thermal_trip_is_bound_to_cdev); @@ -197,12 +190,12 @@ void __thermal_cdev_update(struct thermal_cooling_device *cdev) */ void thermal_cdev_update(struct thermal_cooling_device *cdev) { - mutex_lock(&cdev->lock); + guard(cooling_dev)(cdev); + if (!cdev->updated) { __thermal_cdev_update(cdev); cdev->updated = true; } - mutex_unlock(&cdev->lock); } /** @@ -211,11 +204,9 @@ void thermal_cdev_update(struct thermal_cooling_device *cdev) */ void thermal_cdev_update_nocheck(struct thermal_cooling_device *cdev) { - mutex_lock(&cdev->lock); + guard(cooling_dev)(cdev); __thermal_cdev_update(cdev); - - mutex_unlock(&cdev->lock); } /** diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c index 701607a953ff..24b9055a0b6c 100644 --- a/drivers/thermal/thermal_sysfs.c +++ b/drivers/thermal/thermal_sysfs.c @@ -544,14 +544,15 @@ cur_state_store(struct device *dev, struct device_attribute *attr, if (state > cdev->max_state) return -EINVAL; - mutex_lock(&cdev->lock); + guard(cooling_dev)(cdev); result = cdev->ops->set_cur_state(cdev, state); - if (!result) - thermal_cooling_device_stats_update(cdev, state); + if (result) + return result; + + thermal_cooling_device_stats_update(cdev, state); - mutex_unlock(&cdev->lock); - return result ? result : count; + return count; } static struct device_attribute @@ -625,21 +626,18 @@ static ssize_t total_trans_show(struct device *dev, { struct thermal_cooling_device *cdev = to_cooling_device(dev); struct cooling_dev_stats *stats; - int ret = 0; + int ret; - mutex_lock(&cdev->lock); + guard(cooling_dev)(cdev); stats = cdev->stats; if (!stats) - goto unlock; + return 0; spin_lock(&stats->lock); ret = sprintf(buf, "%u\n", stats->total_trans); spin_unlock(&stats->lock); -unlock: - mutex_unlock(&cdev->lock); - return ret; } @@ -652,11 +650,11 @@ time_in_state_ms_show(struct device *dev, struct device_attribute *attr, ssize_t len = 0; int i; - mutex_lock(&cdev->lock); + guard(cooling_dev)(cdev); stats = cdev->stats; if (!stats) - goto unlock; + return 0; spin_lock(&stats->lock); @@ -668,9 +666,6 @@ time_in_state_ms_show(struct device *dev, struct device_attribute *attr, } spin_unlock(&stats->lock); -unlock: - mutex_unlock(&cdev->lock); - return len; } @@ -682,11 +677,11 @@ reset_store(struct device *dev, struct device_attribute *attr, const char *buf, struct cooling_dev_stats *stats; int i, states; - mutex_lock(&cdev->lock); + guard(cooling_dev)(cdev); stats = cdev->stats; if (!stats) - goto unlock; + return count; states = cdev->max_state + 1; @@ -702,9 +697,6 @@ reset_store(struct device *dev, struct device_attribute *attr, const char *buf, spin_unlock(&stats->lock); -unlock: - mutex_unlock(&cdev->lock); - return count; } @@ -716,13 +708,11 @@ static ssize_t trans_table_show(struct device *dev, ssize_t len = 0; int i, j; - mutex_lock(&cdev->lock); + guard(cooling_dev)(cdev); stats = cdev->stats; - if (!stats) { - len = -ENODATA; - goto unlock; - } + if (!stats) + return -ENODATA; len += snprintf(buf + len, PAGE_SIZE - len, " From : To\n"); len += snprintf(buf + len, PAGE_SIZE - len, " : "); @@ -731,10 +721,8 @@ static ssize_t trans_table_show(struct device *dev, break; len += snprintf(buf + len, PAGE_SIZE - len, "state%2u ", i); } - if (len >= PAGE_SIZE) { - len = PAGE_SIZE; - goto unlock; - } + if (len >= PAGE_SIZE) + return PAGE_SIZE; len += snprintf(buf + len, PAGE_SIZE - len, "\n"); @@ -760,9 +748,6 @@ static ssize_t trans_table_show(struct device *dev, len = -EFBIG; } -unlock: - mutex_unlock(&cdev->lock); - return len; } diff --git a/include/linux/thermal.h b/include/linux/thermal.h index bcaa92732e14..754802478b96 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -140,6 +140,9 @@ struct thermal_cooling_device { #endif }; +DEFINE_GUARD(cooling_dev, struct thermal_cooling_device *, mutex_lock(&_T->lock), + mutex_unlock(&_T->lock)) + /* Structure to define Thermal Zone parameters */ struct thermal_zone_params { const char *governor_name; -- cgit v1.2.3 From 1773572863c43a14a3e45f0591f28b7dec1ee52a Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 22 Oct 2024 17:51:42 +0200 Subject: thermal: netlink: Add the commands and the events for the thresholds The thresholds exist but there is no notification neither action code related to them yet. These changes implement the netlink for the notifications when the thresholds are crossed, added, deleted or flushed as well as the commands which allows to get the list of the thresholds, flush them, add and delete. Signed-off-by: Daniel Lezcano Reviewed-by: Lukasz Luba Link: https://patch.msgid.link/20241022155147.463475-3-daniel.lezcano@linaro.org [ rjw: Use the thermal_zone guard for locking, subject edit ] Signed-off-by: Rafael J. Wysocki --- drivers/thermal/thermal_netlink.c | 232 ++++++++++++++++++++++++++++++++++- drivers/thermal/thermal_netlink.h | 34 +++++ drivers/thermal/thermal_thresholds.c | 34 ++--- drivers/thermal/thermal_thresholds.h | 2 +- include/uapi/linux/thermal.h | 27 ++-- 5 files changed, 301 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/drivers/thermal/thermal_netlink.c b/drivers/thermal/thermal_netlink.c index 91f3fe8c8f07..315a76b01f6a 100644 --- a/drivers/thermal/thermal_netlink.c +++ b/drivers/thermal/thermal_netlink.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -49,6 +50,11 @@ static const struct nla_policy thermal_genl_policy[THERMAL_GENL_ATTR_MAX + 1] = [THERMAL_GENL_ATTR_CPU_CAPABILITY_ID] = { .type = NLA_U32 }, [THERMAL_GENL_ATTR_CPU_CAPABILITY_PERFORMANCE] = { .type = NLA_U32 }, [THERMAL_GENL_ATTR_CPU_CAPABILITY_EFFICIENCY] = { .type = NLA_U32 }, + + /* Thresholds */ + [THERMAL_GENL_ATTR_THRESHOLD] = { .type = NLA_NESTED }, + [THERMAL_GENL_ATTR_THRESHOLD_TEMP] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_THRESHOLD_DIRECTION] = { .type = NLA_U32 }, }; struct param { @@ -62,6 +68,8 @@ struct param { int trip_type; int trip_hyst; int temp; + int prev_temp; + int direction; int cdev_state; int cdev_max_state; struct thermal_genl_cpu_caps *cpu_capabilities; @@ -234,6 +242,34 @@ out_cancel_nest: return -EMSGSIZE; } +static int thermal_genl_event_threshold_add(struct param *p) +{ + if (nla_put_u32(p->msg, THERMAL_GENL_ATTR_TZ_ID, p->tz_id) || + nla_put_u32(p->msg, THERMAL_GENL_ATTR_THRESHOLD_TEMP, p->temp) || + nla_put_u32(p->msg, THERMAL_GENL_ATTR_THRESHOLD_DIRECTION, p->direction)) + return -EMSGSIZE; + + return 0; +} + +static int thermal_genl_event_threshold_flush(struct param *p) +{ + if (nla_put_u32(p->msg, THERMAL_GENL_ATTR_TZ_ID, p->tz_id)) + return -EMSGSIZE; + + return 0; +} + +static int thermal_genl_event_threshold_up(struct param *p) +{ + if (nla_put_u32(p->msg, THERMAL_GENL_ATTR_TZ_ID, p->tz_id) || + nla_put_u32(p->msg, THERMAL_GENL_ATTR_TZ_PREV_TEMP, p->prev_temp) || + nla_put_u32(p->msg, THERMAL_GENL_ATTR_TZ_TEMP, p->temp)) + return -EMSGSIZE; + + return 0; +} + int thermal_genl_event_tz_delete(struct param *p) __attribute__((alias("thermal_genl_event_tz"))); @@ -246,6 +282,12 @@ int thermal_genl_event_tz_disable(struct param *p) int thermal_genl_event_tz_trip_down(struct param *p) __attribute__((alias("thermal_genl_event_tz_trip_up"))); +int thermal_genl_event_threshold_delete(struct param *p) + __attribute__((alias("thermal_genl_event_threshold_add"))); + +int thermal_genl_event_threshold_down(struct param *p) + __attribute__((alias("thermal_genl_event_threshold_up"))); + static cb_t event_cb[] = { [THERMAL_GENL_EVENT_TZ_CREATE] = thermal_genl_event_tz_create, [THERMAL_GENL_EVENT_TZ_DELETE] = thermal_genl_event_tz_delete, @@ -259,6 +301,11 @@ static cb_t event_cb[] = { [THERMAL_GENL_EVENT_CDEV_STATE_UPDATE] = thermal_genl_event_cdev_state_update, [THERMAL_GENL_EVENT_TZ_GOV_CHANGE] = thermal_genl_event_gov_change, [THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE] = thermal_genl_event_cpu_capability_change, + [THERMAL_GENL_EVENT_THRESHOLD_ADD] = thermal_genl_event_threshold_add, + [THERMAL_GENL_EVENT_THRESHOLD_DELETE] = thermal_genl_event_threshold_delete, + [THERMAL_GENL_EVENT_THRESHOLD_FLUSH] = thermal_genl_event_threshold_flush, + [THERMAL_GENL_EVENT_THRESHOLD_DOWN] = thermal_genl_event_threshold_down, + [THERMAL_GENL_EVENT_THRESHOLD_UP] = thermal_genl_event_threshold_up, }; /* @@ -401,6 +448,43 @@ int thermal_genl_cpu_capability_event(int count, } EXPORT_SYMBOL_GPL(thermal_genl_cpu_capability_event); +int thermal_notify_threshold_add(const struct thermal_zone_device *tz, + int temperature, int direction) +{ + struct param p = { .tz_id = tz->id, .temp = temperature, .direction = direction }; + + return thermal_genl_send_event(THERMAL_GENL_EVENT_THRESHOLD_ADD, &p); +} + +int thermal_notify_threshold_delete(const struct thermal_zone_device *tz, + int temperature, int direction) +{ + struct param p = { .tz_id = tz->id, .temp = temperature, .direction = direction }; + + return thermal_genl_send_event(THERMAL_GENL_EVENT_THRESHOLD_DELETE, &p); +} + +int thermal_notify_threshold_flush(const struct thermal_zone_device *tz) +{ + struct param p = { .tz_id = tz->id }; + + return thermal_genl_send_event(THERMAL_GENL_EVENT_THRESHOLD_FLUSH, &p); +} + +int thermal_notify_threshold_down(const struct thermal_zone_device *tz) +{ + struct param p = { .tz_id = tz->id, .temp = tz->temperature, .prev_temp = tz->last_temperature }; + + return thermal_genl_send_event(THERMAL_GENL_EVENT_THRESHOLD_DOWN, &p); +} + +int thermal_notify_threshold_up(const struct thermal_zone_device *tz) +{ + struct param p = { .tz_id = tz->id, .temp = tz->temperature, .prev_temp = tz->last_temperature }; + + return thermal_genl_send_event(THERMAL_GENL_EVENT_THRESHOLD_UP, &p); +} + /*************************** Command encoding ********************************/ static int __thermal_genl_cmd_tz_get_id(struct thermal_zone_device *tz, @@ -563,12 +647,128 @@ out_cancel_nest: return ret; } +static int __thermal_genl_cmd_threshold_get(struct user_threshold *threshold, void *arg) +{ + struct sk_buff *msg = arg; + + if (nla_put_u32(msg, THERMAL_GENL_ATTR_THRESHOLD_TEMP, threshold->temperature) || + nla_put_u32(msg, THERMAL_GENL_ATTR_THRESHOLD_DIRECTION, threshold->direction)) + return -1; + + return 0; +} + +static int thermal_genl_cmd_threshold_get(struct param *p) +{ + struct sk_buff *msg = p->msg; + struct nlattr *start_trip; + int id, ret; + + if (!p->attrs[THERMAL_GENL_ATTR_TZ_ID]) + return -EINVAL; + + id = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_TZ_ID]); + + CLASS(thermal_zone_get_by_id, tz)(id); + if (!tz) + return -EINVAL; + + start_trip = nla_nest_start(msg, THERMAL_GENL_ATTR_THRESHOLD); + if (!start_trip) + return -EMSGSIZE; + + ret = thermal_thresholds_for_each(tz, __thermal_genl_cmd_threshold_get, msg); + if (ret) + return -EMSGSIZE; + + nla_nest_end(msg, start_trip); + + return 0; +} + +static int thermal_genl_cmd_threshold_add(struct param *p) +{ + int id, temp, direction; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (!p->attrs[THERMAL_GENL_ATTR_TZ_ID] || + !p->attrs[THERMAL_GENL_ATTR_THRESHOLD_TEMP] || + !p->attrs[THERMAL_GENL_ATTR_THRESHOLD_DIRECTION]) + return -EINVAL; + + id = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_TZ_ID]); + temp = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_THRESHOLD_TEMP]); + direction = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_THRESHOLD_DIRECTION]); + + CLASS(thermal_zone_get_by_id, tz)(id); + if (!tz) + return -EINVAL; + + guard(thermal_zone)(tz); + + return thermal_thresholds_add(tz, temp, direction); +} + +static int thermal_genl_cmd_threshold_delete(struct param *p) +{ + int id, temp, direction; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (!p->attrs[THERMAL_GENL_ATTR_TZ_ID] || + !p->attrs[THERMAL_GENL_ATTR_THRESHOLD_TEMP] || + !p->attrs[THERMAL_GENL_ATTR_THRESHOLD_DIRECTION]) + return -EINVAL; + + id = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_TZ_ID]); + temp = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_THRESHOLD_TEMP]); + direction = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_THRESHOLD_DIRECTION]); + + CLASS(thermal_zone_get_by_id, tz)(id); + if (!tz) + return -EINVAL; + + guard(thermal_zone)(tz); + + return thermal_thresholds_delete(tz, temp, direction); +} + +static int thermal_genl_cmd_threshold_flush(struct param *p) +{ + int id; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (!p->attrs[THERMAL_GENL_ATTR_TZ_ID]) + return -EINVAL; + + id = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_TZ_ID]); + + CLASS(thermal_zone_get_by_id, tz)(id); + if (!tz) + return -EINVAL; + + guard(thermal_zone)(tz); + + thermal_thresholds_flush(tz); + + return 0; +} + static cb_t cmd_cb[] = { - [THERMAL_GENL_CMD_TZ_GET_ID] = thermal_genl_cmd_tz_get_id, - [THERMAL_GENL_CMD_TZ_GET_TRIP] = thermal_genl_cmd_tz_get_trip, - [THERMAL_GENL_CMD_TZ_GET_TEMP] = thermal_genl_cmd_tz_get_temp, - [THERMAL_GENL_CMD_TZ_GET_GOV] = thermal_genl_cmd_tz_get_gov, - [THERMAL_GENL_CMD_CDEV_GET] = thermal_genl_cmd_cdev_get, + [THERMAL_GENL_CMD_TZ_GET_ID] = thermal_genl_cmd_tz_get_id, + [THERMAL_GENL_CMD_TZ_GET_TRIP] = thermal_genl_cmd_tz_get_trip, + [THERMAL_GENL_CMD_TZ_GET_TEMP] = thermal_genl_cmd_tz_get_temp, + [THERMAL_GENL_CMD_TZ_GET_GOV] = thermal_genl_cmd_tz_get_gov, + [THERMAL_GENL_CMD_CDEV_GET] = thermal_genl_cmd_cdev_get, + [THERMAL_GENL_CMD_THRESHOLD_GET] = thermal_genl_cmd_threshold_get, + [THERMAL_GENL_CMD_THRESHOLD_ADD] = thermal_genl_cmd_threshold_add, + [THERMAL_GENL_CMD_THRESHOLD_DELETE] = thermal_genl_cmd_threshold_delete, + [THERMAL_GENL_CMD_THRESHOLD_FLUSH] = thermal_genl_cmd_threshold_flush, }; static int thermal_genl_cmd_dumpit(struct sk_buff *skb, @@ -679,6 +879,26 @@ static const struct genl_small_ops thermal_genl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = thermal_genl_cmd_dumpit, }, + { + .cmd = THERMAL_GENL_CMD_THRESHOLD_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = thermal_genl_cmd_doit, + }, + { + .cmd = THERMAL_GENL_CMD_THRESHOLD_ADD, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = thermal_genl_cmd_doit, + }, + { + .cmd = THERMAL_GENL_CMD_THRESHOLD_DELETE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = thermal_genl_cmd_doit, + }, + { + .cmd = THERMAL_GENL_CMD_THRESHOLD_FLUSH, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = thermal_genl_cmd_doit, + }, }; static struct genl_family thermal_genl_family __ro_after_init = { @@ -691,7 +911,7 @@ static struct genl_family thermal_genl_family __ro_after_init = { .unbind = thermal_genl_unbind, .small_ops = thermal_genl_ops, .n_small_ops = ARRAY_SIZE(thermal_genl_ops), - .resv_start_op = THERMAL_GENL_CMD_CDEV_GET + 1, + .resv_start_op = __THERMAL_GENL_CMD_MAX, .mcgrps = thermal_genl_mcgrps, .n_mcgrps = ARRAY_SIZE(thermal_genl_mcgrps), }; diff --git a/drivers/thermal/thermal_netlink.h b/drivers/thermal/thermal_netlink.h index e01221e8816b..075e9ae85f3d 100644 --- a/drivers/thermal/thermal_netlink.h +++ b/drivers/thermal/thermal_netlink.h @@ -53,6 +53,13 @@ int thermal_notify_tz_gov_change(const struct thermal_zone_device *tz, int thermal_genl_sampling_temp(int id, int temp); int thermal_genl_cpu_capability_event(int count, struct thermal_genl_cpu_caps *caps); +int thermal_notify_threshold_add(const struct thermal_zone_device *tz, + int temperature, int direction); +int thermal_notify_threshold_delete(const struct thermal_zone_device *tz, + int temperature, int direction); +int thermal_notify_threshold_flush(const struct thermal_zone_device *tz); +int thermal_notify_threshold_down(const struct thermal_zone_device *tz); +int thermal_notify_threshold_up(const struct thermal_zone_device *tz); #else static inline int thermal_netlink_init(void) { @@ -139,6 +146,33 @@ static inline int thermal_genl_cpu_capability_event(int count, struct thermal_ge return 0; } +static inline int thermal_notify_threshold_add(const struct thermal_zone_device *tz, + int temperature, int direction) +{ + return 0; +} + +static inline int thermal_notify_threshold_delete(const struct thermal_zone_device *tz, + int temperature, int direction) +{ + return 0; +} + +static inline int thermal_notify_threshold_flush(const struct thermal_zone_device *tz) +{ + return 0; +} + +static inline int thermal_notify_threshold_down(const struct thermal_zone_device *tz) +{ + return 0; +} + +static inline int thermal_notify_threshold_up(const struct thermal_zone_device *tz) +{ + return 0; +} + static inline void __init thermal_netlink_exit(void) {} #endif /* CONFIG_THERMAL_NETLINK */ diff --git a/drivers/thermal/thermal_thresholds.c b/drivers/thermal/thermal_thresholds.c index f33b6d5474d8..9b063199a789 100644 --- a/drivers/thermal/thermal_thresholds.c +++ b/drivers/thermal/thermal_thresholds.c @@ -32,6 +32,8 @@ void thermal_thresholds_flush(struct thermal_zone_device *tz) kfree(entry); } + thermal_notify_threshold_flush(tz); + __thermal_zone_device_update(tz, THERMAL_TZ_FLUSH_THRESHOLDS); } @@ -122,7 +124,6 @@ void thermal_thresholds_handle(struct thermal_zone_device *tz, int *low, int *hi int temperature = tz->temperature; int last_temperature = tz->last_temperature; - bool notify; lockdep_assert_held(&tz->lock); @@ -144,19 +145,19 @@ void thermal_thresholds_handle(struct thermal_zone_device *tz, int *low, int *hi * - increased : thresholds are crossed the way up * - decreased : thresholds are crossed the way down */ - if (temperature > last_temperature) - notify = thermal_thresholds_handle_raising(thresholds, temperature, - last_temperature, low, high); - else - notify = thermal_thresholds_handle_dropping(thresholds, temperature, - last_temperature, low, high); - - if (notify) - pr_debug("A threshold has been crossed the way %s, with a temperature=%d, last_temperature=%d\n", - temperature > last_temperature ? "up" : "down", temperature, last_temperature); + if (temperature > last_temperature) { + if (thermal_thresholds_handle_raising(thresholds, temperature, + last_temperature, low, high)) + thermal_notify_threshold_up(tz); + } else { + if (thermal_thresholds_handle_dropping(thresholds, temperature, + last_temperature, low, high)) + thermal_notify_threshold_down(tz); + } } -int thermal_thresholds_add(struct thermal_zone_device *tz, int temperature, int direction) +int thermal_thresholds_add(struct thermal_zone_device *tz, + int temperature, int direction) { struct list_head *thresholds = &tz->user_thresholds; struct user_threshold *t; @@ -182,12 +183,15 @@ int thermal_thresholds_add(struct thermal_zone_device *tz, int temperature, int list_sort(NULL, thresholds, __thermal_thresholds_cmp); } + thermal_notify_threshold_add(tz, temperature, direction); + __thermal_zone_device_update(tz, THERMAL_TZ_ADD_THRESHOLD); return 0; } -int thermal_thresholds_delete(struct thermal_zone_device *tz, int temperature, int direction) +int thermal_thresholds_delete(struct thermal_zone_device *tz, + int temperature, int direction) { struct list_head *thresholds = &tz->user_thresholds; struct user_threshold *t; @@ -205,6 +209,8 @@ int thermal_thresholds_delete(struct thermal_zone_device *tz, int temperature, i t->direction &= ~direction; } + thermal_notify_threshold_delete(tz, temperature, direction); + __thermal_zone_device_update(tz, THERMAL_TZ_DEL_THRESHOLD); return 0; @@ -217,7 +223,7 @@ int thermal_thresholds_for_each(struct thermal_zone_device *tz, struct user_threshold *entry; int ret; - lockdep_assert_held(&tz->lock); + guard(thermal_zone)(tz); list_for_each_entry(entry, thresholds, list_node) { ret = cb(entry, arg); diff --git a/drivers/thermal/thermal_thresholds.h b/drivers/thermal/thermal_thresholds.h index 232f4e8089af..cb372659a20d 100644 --- a/drivers/thermal/thermal_thresholds.h +++ b/drivers/thermal/thermal_thresholds.h @@ -10,8 +10,8 @@ struct user_threshold { int thermal_thresholds_init(struct thermal_zone_device *tz); void thermal_thresholds_exit(struct thermal_zone_device *tz); -void thermal_thresholds_flush(struct thermal_zone_device *tz); void thermal_thresholds_handle(struct thermal_zone_device *tz, int *low, int *high); +void thermal_thresholds_flush(struct thermal_zone_device *tz); int thermal_thresholds_add(struct thermal_zone_device *tz, int temperature, int direction); int thermal_thresholds_delete(struct thermal_zone_device *tz, int temperature, int direction); int thermal_thresholds_for_each(struct thermal_zone_device *tz, diff --git a/include/uapi/linux/thermal.h b/include/uapi/linux/thermal.h index 2e6f60a36173..ba8604bdf206 100644 --- a/include/uapi/linux/thermal.h +++ b/include/uapi/linux/thermal.h @@ -20,7 +20,7 @@ enum thermal_trip_type { /* Adding event notification support elements */ #define THERMAL_GENL_FAMILY_NAME "thermal" -#define THERMAL_GENL_VERSION 0x01 +#define THERMAL_GENL_VERSION 0x02 #define THERMAL_GENL_SAMPLING_GROUP_NAME "sampling" #define THERMAL_GENL_EVENT_GROUP_NAME "event" @@ -30,6 +30,7 @@ enum thermal_genl_attr { THERMAL_GENL_ATTR_TZ, THERMAL_GENL_ATTR_TZ_ID, THERMAL_GENL_ATTR_TZ_TEMP, + THERMAL_GENL_ATTR_TZ_PREV_TEMP, THERMAL_GENL_ATTR_TZ_TRIP, THERMAL_GENL_ATTR_TZ_TRIP_ID, THERMAL_GENL_ATTR_TZ_TRIP_TYPE, @@ -50,6 +51,9 @@ enum thermal_genl_attr { THERMAL_GENL_ATTR_CPU_CAPABILITY_ID, THERMAL_GENL_ATTR_CPU_CAPABILITY_PERFORMANCE, THERMAL_GENL_ATTR_CPU_CAPABILITY_EFFICIENCY, + THERMAL_GENL_ATTR_THRESHOLD, + THERMAL_GENL_ATTR_THRESHOLD_TEMP, + THERMAL_GENL_ATTR_THRESHOLD_DIRECTION, __THERMAL_GENL_ATTR_MAX, }; #define THERMAL_GENL_ATTR_MAX (__THERMAL_GENL_ATTR_MAX - 1) @@ -77,6 +81,11 @@ enum thermal_genl_event { THERMAL_GENL_EVENT_CDEV_STATE_UPDATE, /* Cdev state updated */ THERMAL_GENL_EVENT_TZ_GOV_CHANGE, /* Governor policy changed */ THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE, /* CPU capability changed */ + THERMAL_GENL_EVENT_THRESHOLD_ADD, /* A thresold has been added */ + THERMAL_GENL_EVENT_THRESHOLD_DELETE, /* A thresold has been deleted */ + THERMAL_GENL_EVENT_THRESHOLD_FLUSH, /* All thresolds have been deleted */ + THERMAL_GENL_EVENT_THRESHOLD_UP, /* A thresold has been crossed the way up */ + THERMAL_GENL_EVENT_THRESHOLD_DOWN, /* A thresold has been crossed the way down */ __THERMAL_GENL_EVENT_MAX, }; #define THERMAL_GENL_EVENT_MAX (__THERMAL_GENL_EVENT_MAX - 1) @@ -84,12 +93,16 @@ enum thermal_genl_event { /* Commands supported by the thermal_genl_family */ enum thermal_genl_cmd { THERMAL_GENL_CMD_UNSPEC, - THERMAL_GENL_CMD_TZ_GET_ID, /* List of thermal zones id */ - THERMAL_GENL_CMD_TZ_GET_TRIP, /* List of thermal trips */ - THERMAL_GENL_CMD_TZ_GET_TEMP, /* Get the thermal zone temperature */ - THERMAL_GENL_CMD_TZ_GET_GOV, /* Get the thermal zone governor */ - THERMAL_GENL_CMD_TZ_GET_MODE, /* Get the thermal zone mode */ - THERMAL_GENL_CMD_CDEV_GET, /* List of cdev id */ + THERMAL_GENL_CMD_TZ_GET_ID, /* List of thermal zones id */ + THERMAL_GENL_CMD_TZ_GET_TRIP, /* List of thermal trips */ + THERMAL_GENL_CMD_TZ_GET_TEMP, /* Get the thermal zone temperature */ + THERMAL_GENL_CMD_TZ_GET_GOV, /* Get the thermal zone governor */ + THERMAL_GENL_CMD_TZ_GET_MODE, /* Get the thermal zone mode */ + THERMAL_GENL_CMD_CDEV_GET, /* List of cdev id */ + THERMAL_GENL_CMD_THRESHOLD_GET, /* List of thresholds */ + THERMAL_GENL_CMD_THRESHOLD_ADD, /* Add a threshold */ + THERMAL_GENL_CMD_THRESHOLD_DELETE, /* Delete a threshold */ + THERMAL_GENL_CMD_THRESHOLD_FLUSH, /* Flush all the thresholds */ __THERMAL_GENL_CMD_MAX, }; #define THERMAL_GENL_CMD_MAX (__THERMAL_GENL_CMD_MAX - 1) -- cgit v1.2.3 From 68ed5c38b512b734caf3da1f87db4a99fcfe3002 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 17 Oct 2024 11:31:33 -0700 Subject: phonet: Pass net and ifindex to phonet_address_notify(). Currently, phonet_address_notify() fetches netns and ifindex from dev. Once addr_doit() is converted to RCU, phonet_address_notify() will be called outside of RCU due to GFP_KERNEL, and dev will be unavailable there. Let's pass net and ifindex to phonet_address_notify(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/net/phonet/pn_dev.h | 2 +- net/phonet/pn_dev.c | 10 +++++++--- net/phonet/pn_netlink.c | 12 ++++++------ 3 files changed, 14 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h index e9dc8dca5817..6b2102b4ece3 100644 --- a/include/net/phonet/pn_dev.h +++ b/include/net/phonet/pn_dev.h @@ -38,7 +38,7 @@ int phonet_address_add(struct net_device *dev, u8 addr); int phonet_address_del(struct net_device *dev, u8 addr); u8 phonet_address_get(struct net_device *dev, u8 addr); int phonet_address_lookup(struct net *net, u8 addr); -void phonet_address_notify(int event, struct net_device *dev, u8 addr); +void phonet_address_notify(struct net *net, int event, u32 ifindex, u8 addr); int phonet_route_add(struct net_device *dev, u8 daddr); int phonet_route_del(struct net_device *dev, u8 daddr); diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index cde671d29d5d..2e7d850dc726 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -98,10 +98,13 @@ static void phonet_device_destroy(struct net_device *dev) mutex_unlock(&pndevs->lock); if (pnd) { + struct net *net = dev_net(dev); + u32 ifindex = dev->ifindex; u8 addr; for_each_set_bit(addr, pnd->addrs, 64) - phonet_address_notify(RTM_DELADDR, dev, addr); + phonet_address_notify(net, RTM_DELADDR, ifindex, addr); + kfree(pnd); } } @@ -244,8 +247,9 @@ static int phonet_device_autoconf(struct net_device *dev) ret = phonet_address_add(dev, req.ifr_phonet_autoconf.device); if (ret) return ret; - phonet_address_notify(RTM_NEWADDR, dev, - req.ifr_phonet_autoconf.device); + + phonet_address_notify(dev_net(dev), RTM_NEWADDR, dev->ifindex, + req.ifr_phonet_autoconf.device); return 0; } diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index 3205d2457477..23097085ad38 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -22,7 +22,7 @@ static int fill_addr(struct sk_buff *skb, u32 ifindex, u8 addr, u32 portid, u32 seq, int event); -void phonet_address_notify(int event, struct net_device *dev, u8 addr) +void phonet_address_notify(struct net *net, int event, u32 ifindex, u8 addr) { struct sk_buff *skb; int err = -ENOBUFS; @@ -32,17 +32,17 @@ void phonet_address_notify(int event, struct net_device *dev, u8 addr) if (skb == NULL) goto errout; - err = fill_addr(skb, dev->ifindex, addr, 0, 0, event); + err = fill_addr(skb, ifindex, addr, 0, 0, event); if (err < 0) { WARN_ON(err == -EMSGSIZE); kfree_skb(skb); goto errout; } - rtnl_notify(skb, dev_net(dev), 0, - RTNLGRP_PHONET_IFADDR, NULL, GFP_KERNEL); + + rtnl_notify(skb, net, 0, RTNLGRP_PHONET_IFADDR, NULL, GFP_KERNEL); return; errout: - rtnl_set_sk_err(dev_net(dev), RTNLGRP_PHONET_IFADDR, err); + rtnl_set_sk_err(net, RTNLGRP_PHONET_IFADDR, err); } static const struct nla_policy ifa_phonet_policy[IFA_MAX+1] = { @@ -89,7 +89,7 @@ static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh, else err = phonet_address_del(dev, pnaddr); if (!err) - phonet_address_notify(nlh->nlmsg_type, dev, pnaddr); + phonet_address_notify(net, nlh->nlmsg_type, ifm->ifa_index, pnaddr); return err; } -- cgit v1.2.3 From 42f5fe1dc4babad1c49bcc4121983fffccee3cd9 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 17 Oct 2024 11:31:34 -0700 Subject: phonet: Convert phonet_device_list.lock to spinlock_t. addr_doit() calls phonet_address_add() or phonet_address_del() for RTM_NEWADDR or RTM_DELADDR, respectively. Both functions only touch phonet_device_list(dev_net(dev)), which is currently protected by RTNL and its dedicated mutex, phonet_device_list.lock. We will convert addr_doit() to RCU and cannot use mutex inside RCU. Let's convert the mutex to spinlock_t. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/net/phonet/pn_dev.h | 3 ++- net/phonet/pn_dev.c | 26 +++++++++++++++++--------- 2 files changed, 19 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h index 6b2102b4ece3..ac0331d83a81 100644 --- a/include/net/phonet/pn_dev.h +++ b/include/net/phonet/pn_dev.h @@ -12,12 +12,13 @@ #include #include +#include struct net; struct phonet_device_list { struct list_head list; - struct mutex lock; + spinlock_t lock; }; struct phonet_device_list *phonet_device_list(struct net *net); diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 2e7d850dc726..545279ef5910 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -54,7 +54,7 @@ static struct phonet_device *__phonet_device_alloc(struct net_device *dev) pnd->netdev = dev; bitmap_zero(pnd->addrs, 64); - BUG_ON(!mutex_is_locked(&pndevs->lock)); + lockdep_assert_held(&pndevs->lock); list_add_rcu(&pnd->list, &pndevs->list); return pnd; } @@ -64,7 +64,8 @@ static struct phonet_device *__phonet_get(struct net_device *dev) struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); struct phonet_device *pnd; - BUG_ON(!mutex_is_locked(&pndevs->lock)); + lockdep_assert_held(&pndevs->lock); + list_for_each_entry(pnd, &pndevs->list, list) { if (pnd->netdev == dev) return pnd; @@ -91,11 +92,13 @@ static void phonet_device_destroy(struct net_device *dev) ASSERT_RTNL(); - mutex_lock(&pndevs->lock); + spin_lock(&pndevs->lock); + pnd = __phonet_get(dev); if (pnd) list_del_rcu(&pnd->list); - mutex_unlock(&pndevs->lock); + + spin_unlock(&pndevs->lock); if (pnd) { struct net *net = dev_net(dev); @@ -136,7 +139,8 @@ int phonet_address_add(struct net_device *dev, u8 addr) struct phonet_device *pnd; int err = 0; - mutex_lock(&pndevs->lock); + spin_lock(&pndevs->lock); + /* Find or create Phonet-specific device data */ pnd = __phonet_get(dev); if (pnd == NULL) @@ -145,7 +149,9 @@ int phonet_address_add(struct net_device *dev, u8 addr) err = -ENOMEM; else if (test_and_set_bit(addr >> 2, pnd->addrs)) err = -EEXIST; - mutex_unlock(&pndevs->lock); + + spin_unlock(&pndevs->lock); + return err; } @@ -155,7 +161,8 @@ int phonet_address_del(struct net_device *dev, u8 addr) struct phonet_device *pnd; int err = 0; - mutex_lock(&pndevs->lock); + spin_lock(&pndevs->lock); + pnd = __phonet_get(dev); if (!pnd || !test_and_clear_bit(addr >> 2, pnd->addrs)) { err = -EADDRNOTAVAIL; @@ -164,7 +171,8 @@ int phonet_address_del(struct net_device *dev, u8 addr) list_del_rcu(&pnd->list); else pnd = NULL; - mutex_unlock(&pndevs->lock); + + spin_unlock(&pndevs->lock); if (pnd) kfree_rcu(pnd, rcu); @@ -313,7 +321,7 @@ static int __net_init phonet_init_net(struct net *net) return -ENOMEM; INIT_LIST_HEAD(&pnn->pndevs.list); - mutex_init(&pnn->pndevs.lock); + spin_lock_init(&pnn->pndevs.lock); mutex_init(&pnn->routes.lock); return 0; } -- cgit v1.2.3 From de51ad08b1177bbbb8b60cb7dd4c3c5dd50d262f Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 17 Oct 2024 11:31:38 -0700 Subject: phonet: Pass net and ifindex to rtm_phonet_notify(). Currently, rtm_phonet_notify() fetches netns and ifindex from dev. Once route_doit() is converted to RCU, rtm_phonet_notify() will be called outside of RCU due to GFP_KERNEL, and dev will be unavailable there. Let's pass net and ifindex to rtm_phonet_notify(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/net/phonet/pn_dev.h | 2 +- net/phonet/pn_dev.c | 10 +++++++--- net/phonet/pn_netlink.c | 16 +++++++++------- 3 files changed, 17 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h index ac0331d83a81..021e524fd20a 100644 --- a/include/net/phonet/pn_dev.h +++ b/include/net/phonet/pn_dev.h @@ -43,7 +43,7 @@ void phonet_address_notify(struct net *net, int event, u32 ifindex, u8 addr); int phonet_route_add(struct net_device *dev, u8 daddr); int phonet_route_del(struct net_device *dev, u8 daddr); -void rtm_phonet_notify(int event, struct net_device *dev, u8 dst); +void rtm_phonet_notify(struct net *net, int event, u32 ifindex, u8 dst); struct net_device *phonet_route_get_rcu(struct net *net, u8 daddr); struct net_device *phonet_route_output(struct net *net, u8 daddr); diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 545279ef5910..6ded0d347b9f 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -263,9 +263,13 @@ static int phonet_device_autoconf(struct net_device *dev) static void phonet_route_autodel(struct net_device *dev) { - struct phonet_net *pnn = phonet_pernet(dev_net(dev)); - unsigned int i; + struct net *net = dev_net(dev); DECLARE_BITMAP(deleted, 64); + u32 ifindex = dev->ifindex; + struct phonet_net *pnn; + unsigned int i; + + pnn = phonet_pernet(net); /* Remove left-over Phonet routes */ bitmap_zero(deleted, 64); @@ -281,7 +285,7 @@ static void phonet_route_autodel(struct net_device *dev) return; /* short-circuit RCU */ synchronize_rcu(); for_each_set_bit(i, deleted, 64) { - rtm_phonet_notify(RTM_DELROUTE, dev, i); + rtm_phonet_notify(net, RTM_DELROUTE, ifindex, i); dev_put(dev); } } diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index c9a4215ec560..bfec5bd639b6 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -200,7 +200,7 @@ nla_put_failure: return -EMSGSIZE; } -void rtm_phonet_notify(int event, struct net_device *dev, u8 dst) +void rtm_phonet_notify(struct net *net, int event, u32 ifindex, u8 dst) { struct sk_buff *skb; int err = -ENOBUFS; @@ -210,17 +210,17 @@ void rtm_phonet_notify(int event, struct net_device *dev, u8 dst) if (skb == NULL) goto errout; - err = fill_route(skb, dev->ifindex, dst, 0, 0, event); + err = fill_route(skb, ifindex, dst, 0, 0, event); if (err < 0) { WARN_ON(err == -EMSGSIZE); kfree_skb(skb); goto errout; } - rtnl_notify(skb, dev_net(dev), 0, - RTNLGRP_PHONET_ROUTE, NULL, GFP_KERNEL); + + rtnl_notify(skb, net, 0, RTNLGRP_PHONET_ROUTE, NULL, GFP_KERNEL); return; errout: - rtnl_set_sk_err(dev_net(dev), RTNLGRP_PHONET_ROUTE, err); + rtnl_set_sk_err(net, RTNLGRP_PHONET_ROUTE, err); } static const struct nla_policy rtm_phonet_policy[RTA_MAX+1] = { @@ -235,6 +235,7 @@ static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr *tb[RTA_MAX+1]; struct net_device *dev; struct rtmsg *rtm; + u32 ifindex; int err; u8 dst; @@ -260,7 +261,8 @@ static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh, if (dst & 3) /* Phonet addresses only have 6 high-order bits */ return -EINVAL; - dev = __dev_get_by_index(net, nla_get_u32(tb[RTA_OIF])); + ifindex = nla_get_u32(tb[RTA_OIF]); + dev = __dev_get_by_index(net, ifindex); if (dev == NULL) return -ENODEV; @@ -269,7 +271,7 @@ static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh, else err = phonet_route_del(dev, dst); if (!err) - rtm_phonet_notify(nlh->nlmsg_type, dev, dst); + rtm_phonet_notify(net, nlh->nlmsg_type, ifindex, dst); return err; } -- cgit v1.2.3 From 3deec3b4afb4c767007eae1eeedbcf3da599395b Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 17 Oct 2024 11:31:39 -0700 Subject: phonet: Convert phonet_routes.lock to spinlock_t. route_doit() calls phonet_route_add() or phonet_route_del() for RTM_NEWROUTE or RTM_DELROUTE, respectively. Both functions only touch phonet_pernet(dev_net(dev))->routes, which is currently protected by RTNL and its dedicated mutex, phonet_routes.lock. We will convert route_doit() to RCU and cannot use mutex inside RCU. Let's convert the mutex to spinlock_t. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/net/phonet/pn_dev.h | 1 - net/phonet/pn_dev.c | 23 ++++++++++++++--------- 2 files changed, 14 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h index 021e524fd20a..37a3e83531c6 100644 --- a/include/net/phonet/pn_dev.h +++ b/include/net/phonet/pn_dev.h @@ -11,7 +11,6 @@ #define PN_DEV_H #include -#include #include struct net; diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 6ded0d347b9f..19234d664c4f 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -22,7 +22,7 @@ #include struct phonet_routes { - struct mutex lock; + spinlock_t lock; struct net_device __rcu *table[64]; }; @@ -273,13 +273,15 @@ static void phonet_route_autodel(struct net_device *dev) /* Remove left-over Phonet routes */ bitmap_zero(deleted, 64); - mutex_lock(&pnn->routes.lock); - for (i = 0; i < 64; i++) + + spin_lock(&pnn->routes.lock); + for (i = 0; i < 64; i++) { if (rcu_access_pointer(pnn->routes.table[i]) == dev) { RCU_INIT_POINTER(pnn->routes.table[i], NULL); set_bit(i, deleted); } - mutex_unlock(&pnn->routes.lock); + } + spin_unlock(&pnn->routes.lock); if (bitmap_empty(deleted, 64)) return; /* short-circuit RCU */ @@ -326,7 +328,7 @@ static int __net_init phonet_init_net(struct net *net) INIT_LIST_HEAD(&pnn->pndevs.list); spin_lock_init(&pnn->pndevs.lock); - mutex_init(&pnn->routes.lock); + spin_lock_init(&pnn->routes.lock); return 0; } @@ -376,13 +378,15 @@ int phonet_route_add(struct net_device *dev, u8 daddr) int err = -EEXIST; daddr = daddr >> 2; - mutex_lock(&routes->lock); + + spin_lock(&routes->lock); if (routes->table[daddr] == NULL) { rcu_assign_pointer(routes->table[daddr], dev); dev_hold(dev); err = 0; } - mutex_unlock(&routes->lock); + spin_unlock(&routes->lock); + return err; } @@ -392,12 +396,13 @@ int phonet_route_del(struct net_device *dev, u8 daddr) struct phonet_routes *routes = &pnn->routes; daddr = daddr >> 2; - mutex_lock(&routes->lock); + + spin_lock(&routes->lock); if (rcu_access_pointer(routes->table[daddr]) == dev) RCU_INIT_POINTER(routes->table[daddr], NULL); else dev = NULL; - mutex_unlock(&routes->lock); + spin_unlock(&routes->lock); if (!dev) return -ENOENT; -- cgit v1.2.3 From a6021aa24f6417416d93318bbfa022ab229c33c8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 11 Oct 2024 06:18:17 +0000 Subject: ACPI: EC: make EC support compile-time conditional The embedded controller code is mainly used on x86 laptops and cannot work without PC style I/O port access. Make this a user-visible configuration option that is default enabled on x86 but otherwise disabled, and that can never be enabled unless CONFIG_HAS_IOPORT is also available. The empty stubs in internal.h help ignore the EC code in configurations that don't support it. In order to see those stubs, the sbshc code also has to include this header and drop duplicate declarations. All the direct callers of ec_read/ec_write already had an x86 dependency and now also need to depend on APCI_EC. Signed-off-by: Arnd Bergmann Acked-by: Guenter Roeck Acked-by: Hans de Goede Link: https://patch.msgid.link/20241011061948.3211423-1-arnd@kernel.org [ rjw: Subject edits ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/Kconfig | 11 ++++++++++- drivers/acpi/Makefile | 2 +- drivers/acpi/internal.h | 25 +++++++++++++++++++++++++ drivers/acpi/sbshc.c | 9 +-------- drivers/char/Kconfig | 1 + drivers/hwmon/Kconfig | 3 ++- drivers/platform/x86/Kconfig | 22 ++++++++++++---------- drivers/platform/x86/dell/Kconfig | 1 + drivers/platform/x86/hp/Kconfig | 1 + drivers/platform/x86/intel/Kconfig | 2 +- include/linux/acpi.h | 8 ++++++-- 11 files changed, 61 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index d67f63d93b2a..d65cd08ba8e1 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -132,8 +132,17 @@ config ACPI_REV_OVERRIDE_POSSIBLE makes it possible to force the kernel to return "5" as the supported ACPI revision via the "acpi_rev_override" command line switch. +config ACPI_EC + bool "Embedded Controller" + depends on HAS_IOPORT + default X86 + help + This driver handles communication with the microcontroller + on many x86 laptops and other machines. + config ACPI_EC_DEBUGFS tristate "EC read/write access through /sys/kernel/debug/ec" + depends on ACPI_EC help Say N to disable Embedded Controller /sys/kernel/debug interface @@ -433,7 +442,7 @@ config ACPI_HOTPLUG_IOAPIC config ACPI_SBS tristate "Smart Battery System" - depends on X86 + depends on X86 && ACPI_EC select POWER_SUPPLY help This driver supports the Smart Battery System, another diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index 61ca4afe83dc..40208a0f5dfb 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -41,7 +41,7 @@ acpi-y += resource.o acpi-y += acpi_processor.o acpi-y += processor_core.o acpi-$(CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC) += processor_pdc.o -acpi-y += ec.o +acpi-$(CONFIG_ACPI_EC) += ec.o acpi-$(CONFIG_ACPI_DOCK) += dock.o acpi-$(CONFIG_PCI) += pci_root.o pci_link.o pci_irq.o obj-$(CONFIG_ACPI_MCFG) += pci_mcfg.o diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index ced7dff9a5db..00910ccd7eda 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -215,6 +215,8 @@ extern struct acpi_ec *first_ec; /* External interfaces use first EC only, so remember */ typedef int (*acpi_ec_query_func) (void *data); +#ifdef CONFIG_ACPI_EC + void acpi_ec_init(void); void acpi_ec_ecdt_probe(void); void acpi_ec_dsdt_probe(void); @@ -231,6 +233,29 @@ void acpi_ec_flush_work(void); bool acpi_ec_dispatch_gpe(void); #endif +#else + +static inline void acpi_ec_init(void) {} +static inline void acpi_ec_ecdt_probe(void) {} +static inline void acpi_ec_dsdt_probe(void) {} +static inline void acpi_ec_block_transactions(void) {} +static inline void acpi_ec_unblock_transactions(void) {} +static inline int acpi_ec_add_query_handler(struct acpi_ec *ec, u8 query_bit, + acpi_handle handle, acpi_ec_query_func func, + void *data) +{ + return -ENXIO; +} +static inline void acpi_ec_remove_query_handler(struct acpi_ec *ec, u8 query_bit) {} +static inline void acpi_ec_register_opregions(struct acpi_device *adev) {} + +static inline void acpi_ec_flush_work(void) {} +static inline bool acpi_ec_dispatch_gpe(void) +{ + return false; +} + +#endif /*-------------------------------------------------------------------------- Suspend/Resume diff --git a/drivers/acpi/sbshc.c b/drivers/acpi/sbshc.c index 16f2daaa2c45..2b63cd18cca2 100644 --- a/drivers/acpi/sbshc.c +++ b/drivers/acpi/sbshc.c @@ -14,6 +14,7 @@ #include #include #include "sbshc.h" +#include "internal.h" #define ACPI_SMB_HC_CLASS "smbus_host_ctl" #define ACPI_SMB_HC_DEVICE_NAME "ACPI SMBus HC" @@ -236,12 +237,6 @@ static int smbus_alarm(void *context) return 0; } -typedef int (*acpi_ec_query_func) (void *data); - -extern int acpi_ec_add_query_handler(struct acpi_ec *ec, u8 query_bit, - acpi_handle handle, acpi_ec_query_func func, - void *data); - static int acpi_smbus_hc_add(struct acpi_device *device) { int status; @@ -278,8 +273,6 @@ static int acpi_smbus_hc_add(struct acpi_device *device) return 0; } -extern void acpi_ec_remove_query_handler(struct acpi_ec *ec, u8 query_bit); - static void acpi_smbus_hc_remove(struct acpi_device *device) { struct acpi_smb_hc *hc; diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 7c8dd0abcfdf..8fb33c90482f 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -238,6 +238,7 @@ config APPLICOM config SONYPI tristate "Sony Vaio Programmable I/O Control Device support" depends on X86_32 && PCI && INPUT + depends on ACPI_EC || !ACPI help This driver enables access to the Sony Programmable I/O Control Device which can be found in many (all ?) Sony Vaio laptops. diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 08a3c863f80a..1e3a1f4d5c5e 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -1752,7 +1752,7 @@ source "drivers/hwmon/occ/Kconfig" config SENSORS_OXP tristate "OneXPlayer EC fan control" - depends on ACPI + depends on ACPI_EC depends on X86 help If you say yes here you get support for fan readings and control over @@ -2592,6 +2592,7 @@ config SENSORS_ASUS_WMI config SENSORS_ASUS_EC tristate "ASUS EC Sensors" depends on X86 + depends on ACPI_EC help If you say yes here you get support for the ACPI embedded controller hardware monitoring interface found in ASUS motherboards. The driver diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 3875abba5a79..0258dd879d64 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -52,6 +52,7 @@ config WMI_BMOF config HUAWEI_WMI tristate "Huawei WMI laptop extras driver" depends on ACPI_BATTERY + depends on ACPI_EC depends on ACPI_WMI depends on INPUT select INPUT_SPARSEKMAP @@ -147,7 +148,7 @@ config YT2_1380 config ACERHDF tristate "Acer Aspire One temperature and fan driver" - depends on ACPI && THERMAL + depends on ACPI_EC && THERMAL select THERMAL_GOV_BANG_BANG help This is a driver for Acer Aspire One netbooks. It allows to access @@ -186,6 +187,7 @@ config ACER_WMI depends on SERIO_I8042 depends on INPUT depends on RFKILL || RFKILL = n + depends on ACPI_EC depends on ACPI_WMI depends on ACPI_VIDEO || ACPI_VIDEO = n depends on HWMON @@ -334,7 +336,7 @@ config MERAKI_MX100 config EEEPC_LAPTOP tristate "Eee PC Hotkey Driver" - depends on ACPI + depends on ACPI_EC depends on INPUT depends on RFKILL || RFKILL = n depends on ACPI_VIDEO || ACPI_VIDEO = n @@ -503,7 +505,7 @@ config SENSORS_HDAPS config THINKPAD_ACPI tristate "ThinkPad ACPI Laptop Extras" - depends on ACPI + depends on ACPI_EC depends on ACPI_BATTERY depends on INPUT depends on RFKILL || RFKILL = n @@ -682,7 +684,7 @@ config MEEGOPAD_ANX7428 config MSI_EC tristate "MSI EC Extras" - depends on ACPI + depends on ACPI_EC depends on ACPI_BATTERY help This driver allows various MSI laptops' functionalities to be @@ -690,7 +692,7 @@ config MSI_EC config MSI_LAPTOP tristate "MSI Laptop Extras" - depends on ACPI + depends on ACPI_EC depends on BACKLIGHT_CLASS_DEVICE depends on ACPI_VIDEO || ACPI_VIDEO = n depends on RFKILL @@ -796,7 +798,7 @@ config SAMSUNG_LAPTOP config SAMSUNG_Q10 tristate "Samsung Q10 Extras" - depends on ACPI + depends on ACPI_EC select BACKLIGHT_CLASS_DEVICE help This driver provides support for backlight control on Samsung Q10 @@ -804,7 +806,7 @@ config SAMSUNG_Q10 config ACPI_TOSHIBA tristate "Toshiba Laptop Extras" - depends on ACPI + depends on ACPI_EC depends on ACPI_BATTERY depends on ACPI_WMI select LEDS_CLASS @@ -904,7 +906,7 @@ config ACPI_CMPC config COMPAL_LAPTOP tristate "Compal (and others) Laptop Extras" - depends on ACPI + depends on ACPI_EC depends on BACKLIGHT_CLASS_DEVICE depends on ACPI_VIDEO || ACPI_VIDEO = n depends on RFKILL @@ -949,7 +951,7 @@ config PANASONIC_LAPTOP config SONY_LAPTOP tristate "Sony Laptop Extras" - depends on ACPI + depends on ACPI_EC depends on ACPI_VIDEO || ACPI_VIDEO = n depends on BACKLIGHT_CLASS_DEVICE depends on INPUT @@ -972,7 +974,7 @@ config SONYPI_COMPAT config SYSTEM76_ACPI tristate "System76 ACPI Driver" - depends on ACPI + depends on ACPI_EC depends on ACPI_BATTERY depends on HWMON depends on INPUT diff --git a/drivers/platform/x86/dell/Kconfig b/drivers/platform/x86/dell/Kconfig index 68a49788a396..dc21227dd66e 100644 --- a/drivers/platform/x86/dell/Kconfig +++ b/drivers/platform/x86/dell/Kconfig @@ -194,6 +194,7 @@ config DELL_WMI config DELL_WMI_PRIVACY bool "Dell WMI Hardware Privacy Support" depends on DELL_WMI + depends on ACPI_EC help This option adds integration with the "Dell Hardware Privacy" feature of Dell laptops to the dell-wmi driver. diff --git a/drivers/platform/x86/hp/Kconfig b/drivers/platform/x86/hp/Kconfig index d776761cd5fd..dd51491b9bcd 100644 --- a/drivers/platform/x86/hp/Kconfig +++ b/drivers/platform/x86/hp/Kconfig @@ -37,6 +37,7 @@ config HP_ACCEL config HP_WMI tristate "HP WMI extras" default m + depends on ACPI_EC depends on ACPI_WMI depends on INPUT depends on RFKILL || RFKILL = n diff --git a/drivers/platform/x86/intel/Kconfig b/drivers/platform/x86/intel/Kconfig index ad50bbabec61..eb698dcb9af9 100644 --- a/drivers/platform/x86/intel/Kconfig +++ b/drivers/platform/x86/intel/Kconfig @@ -62,7 +62,7 @@ config INTEL_INT0002_VGPIO config INTEL_OAKTRAIL tristate "Intel Oaktrail Platform Extras" - depends on ACPI + depends on ACPI_EC depends on ACPI_VIDEO || ACPI_VIDEO=n depends on RFKILL && BACKLIGHT_CLASS_DEVICE && ACPI help diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 4d5ee84c468b..7dd24acd9ffe 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1164,8 +1164,6 @@ int acpi_subsys_suspend_noirq(struct device *dev); int acpi_subsys_suspend(struct device *dev); int acpi_subsys_freeze(struct device *dev); int acpi_subsys_poweroff(struct device *dev); -void acpi_ec_mark_gpe_for_wake(void); -void acpi_ec_set_gpe_wake_mask(u8 action); int acpi_subsys_restore_early(struct device *dev); #else static inline int acpi_subsys_prepare(struct device *dev) { return 0; } @@ -1176,6 +1174,12 @@ static inline int acpi_subsys_suspend(struct device *dev) { return 0; } static inline int acpi_subsys_freeze(struct device *dev) { return 0; } static inline int acpi_subsys_poweroff(struct device *dev) { return 0; } static inline int acpi_subsys_restore_early(struct device *dev) { return 0; } +#endif + +#if defined(CONFIG_ACPI_EC) && defined(CONFIG_PM_SLEEP) +void acpi_ec_mark_gpe_for_wake(void); +void acpi_ec_set_gpe_wake_mask(u8 action); +#else static inline void acpi_ec_mark_gpe_for_wake(void) {} static inline void acpi_ec_set_gpe_wake_mask(u8 action) {} #endif -- cgit v1.2.3 From 1cb80d9e93f861018fabe81a69ea0ded20f5a2d0 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Wed, 23 Oct 2024 16:47:48 -0700 Subject: bpf: Support __uptr type tag in BTF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch introduces the "__uptr" type tag to BTF. It is to define a pointer pointing to the user space memory. This patch adds BTF logic to pass the "__uptr" type tag. btf_find_kptr() is reused for the "__uptr" tag. The "__uptr" will only be supported in the map_value of the task storage map. However, btf_parse_struct_meta() also uses btf_find_kptr() but it is not interested in "__uptr". This patch adds a "field_mask" argument to btf_find_kptr() which will return BTF_FIELD_IGNORE if the caller is not interested in a “__uptr” field. btf_parse_kptr() is also reused to parse the uptr. The btf_check_and_fixup_fields() is changed to do extra checks on the uptr to ensure that its struct size is not larger than PAGE_SIZE. It is not clear how a uptr pointing to a CO-RE supported kernel struct will be used, so it is also not allowed now. Signed-off-by: Kui-Feng Lee Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20241023234759.860539-2-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 5 +++++ kernel/bpf/btf.c | 34 +++++++++++++++++++++++++++++----- kernel/bpf/syscall.c | 2 ++ 3 files changed, 36 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 0c216e71cec7..bb31bc6d0c4d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -203,6 +203,7 @@ enum btf_field_type { BPF_GRAPH_ROOT = BPF_RB_ROOT | BPF_LIST_HEAD, BPF_REFCOUNT = (1 << 9), BPF_WORKQUEUE = (1 << 10), + BPF_UPTR = (1 << 11), }; typedef void (*btf_dtor_kfunc_t)(void *); @@ -322,6 +323,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type) return "kptr"; case BPF_KPTR_PERCPU: return "percpu_kptr"; + case BPF_UPTR: + return "uptr"; case BPF_LIST_HEAD: return "bpf_list_head"; case BPF_LIST_NODE: @@ -350,6 +353,7 @@ static inline u32 btf_field_type_size(enum btf_field_type type) case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: + case BPF_UPTR: return sizeof(u64); case BPF_LIST_HEAD: return sizeof(struct bpf_list_head); @@ -379,6 +383,7 @@ static inline u32 btf_field_type_align(enum btf_field_type type) case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: + case BPF_UPTR: return __alignof__(u64); case BPF_LIST_HEAD: return __alignof__(struct bpf_list_head); diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 13dd1fa1d1b9..76cafff2d99c 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3334,7 +3334,7 @@ static int btf_find_struct(const struct btf *btf, const struct btf_type *t, } static int btf_find_kptr(const struct btf *btf, const struct btf_type *t, - u32 off, int sz, struct btf_field_info *info) + u32 off, int sz, struct btf_field_info *info, u32 field_mask) { enum btf_field_type type; u32 res_id; @@ -3358,9 +3358,14 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t, type = BPF_KPTR_REF; else if (!strcmp("percpu_kptr", __btf_name_by_offset(btf, t->name_off))) type = BPF_KPTR_PERCPU; + else if (!strcmp("uptr", __btf_name_by_offset(btf, t->name_off))) + type = BPF_UPTR; else return -EINVAL; + if (!(type & field_mask)) + return BTF_FIELD_IGNORE; + /* Get the base type */ t = btf_type_skip_modifiers(btf, t->type, &res_id); /* Only pointer to struct is allowed */ @@ -3502,7 +3507,7 @@ static int btf_get_field_type(const struct btf *btf, const struct btf_type *var_ field_mask_test_name(BPF_REFCOUNT, "bpf_refcount"); /* Only return BPF_KPTR when all other types with matchable names fail */ - if (field_mask & BPF_KPTR && !__btf_type_is_struct(var_type)) { + if (field_mask & (BPF_KPTR | BPF_UPTR) && !__btf_type_is_struct(var_type)) { type = BPF_KPTR_REF; goto end; } @@ -3535,6 +3540,7 @@ static int btf_repeat_fields(struct btf_field_info *info, case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: + case BPF_UPTR: case BPF_LIST_HEAD: case BPF_RB_ROOT: break; @@ -3661,8 +3667,9 @@ static int btf_find_field_one(const struct btf *btf, case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: + case BPF_UPTR: ret = btf_find_kptr(btf, var_type, off, sz, - info_cnt ? &info[0] : &tmp); + info_cnt ? &info[0] : &tmp, field_mask); if (ret < 0) return ret; break; @@ -3985,6 +3992,7 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: + case BPF_UPTR: ret = btf_parse_kptr(btf, &rec->fields[i], &info_arr[i]); if (ret < 0) goto end; @@ -4044,12 +4052,28 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec) * Hence we only need to ensure that bpf_{list_head,rb_root} ownership * does not form cycles. */ - if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & BPF_GRAPH_ROOT)) + if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & (BPF_GRAPH_ROOT | BPF_UPTR))) return 0; for (i = 0; i < rec->cnt; i++) { struct btf_struct_meta *meta; + const struct btf_type *t; u32 btf_id; + if (rec->fields[i].type == BPF_UPTR) { + /* The uptr only supports pinning one page and cannot + * point to a kernel struct + */ + if (btf_is_kernel(rec->fields[i].kptr.btf)) + return -EINVAL; + t = btf_type_by_id(rec->fields[i].kptr.btf, + rec->fields[i].kptr.btf_id); + if (!t->size) + return -EINVAL; + if (t->size > PAGE_SIZE) + return -E2BIG; + continue; + } + if (!(rec->fields[i].type & BPF_GRAPH_ROOT)) continue; btf_id = rec->fields[i].graph_root.value_btf_id; @@ -5560,7 +5584,7 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf) goto free_aof; } - ret = btf_find_kptr(btf, t, 0, 0, &tmp); + ret = btf_find_kptr(btf, t, 0, 0, &tmp, BPF_KPTR); if (ret != BTF_FIELD_FOUND) continue; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 4d04d4d9c1f3..2d2935d9c096 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -548,6 +548,7 @@ void btf_record_free(struct btf_record *rec) case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: + case BPF_UPTR: if (rec->fields[i].kptr.module) module_put(rec->fields[i].kptr.module); if (btf_is_kernel(rec->fields[i].kptr.btf)) @@ -597,6 +598,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec) case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: + case BPF_UPTR: if (btf_is_kernel(fields[i].kptr.btf)) btf_get(fields[i].kptr.btf); if (fields[i].kptr.module && !try_module_get(fields[i].kptr.module)) { -- cgit v1.2.3 From b9a5a07aeaa2a903fb1306eb422880b2fa5f937f Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 23 Oct 2024 16:47:50 -0700 Subject: bpf: Add "bool swap_uptrs" arg to bpf_local_storage_update() and bpf_selem_alloc() In a later patch, the task local storage will only accept uptr from the syscall update_elem and will not accept uptr from the bpf prog. The reason is the bpf prog does not have a way to provide a valid user space address. bpf_local_storage_update() and bpf_selem_alloc() are used by both bpf prog bpf_task_storage_get(BPF_LOCAL_STORAGE_GET_F_CREATE) and bpf syscall update_elem. "bool swap_uptrs" arg is added to bpf_local_storage_update() and bpf_selem_alloc() to tell if it is called by the bpf prog or by the bpf syscall. When swap_uptrs==true, it is called by the syscall. The arg is named (swap_)uptrs because the later patch will swap the uptrs between the newly allocated selem and the user space provided map_value. It will make error handling easier in case map->ops->map_update_elem() fails and the caller can decide if it needs to unpin the uptr in the user space provided map_value or the bpf_local_storage_update() has already taken the uptr ownership and will take care of unpinning it also. Only swap_uptrs==false is passed now. The logic to handle the true case will be added in a later patch. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20241023234759.860539-4-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf_local_storage.h | 4 ++-- kernel/bpf/bpf_cgrp_storage.c | 4 ++-- kernel/bpf/bpf_inode_storage.c | 4 ++-- kernel/bpf/bpf_local_storage.c | 8 ++++---- kernel/bpf/bpf_task_storage.c | 4 ++-- net/core/bpf_sk_storage.c | 6 +++--- 6 files changed, 15 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index dcddb0aef7d8..0c7216c065d5 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -181,7 +181,7 @@ void bpf_selem_link_map(struct bpf_local_storage_map *smap, struct bpf_local_storage_elem * bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value, - bool charge_mem, gfp_t gfp_flags); + bool charge_mem, bool swap_uptrs, gfp_t gfp_flags); void bpf_selem_free(struct bpf_local_storage_elem *selem, struct bpf_local_storage_map *smap, @@ -195,7 +195,7 @@ bpf_local_storage_alloc(void *owner, struct bpf_local_storage_data * bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, - void *value, u64 map_flags, gfp_t gfp_flags); + void *value, u64 map_flags, bool swap_uptrs, gfp_t gfp_flags); u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map); diff --git a/kernel/bpf/bpf_cgrp_storage.c b/kernel/bpf/bpf_cgrp_storage.c index 28efd0a3f220..20f05de92e9c 100644 --- a/kernel/bpf/bpf_cgrp_storage.c +++ b/kernel/bpf/bpf_cgrp_storage.c @@ -107,7 +107,7 @@ static long bpf_cgrp_storage_update_elem(struct bpf_map *map, void *key, bpf_cgrp_storage_lock(); sdata = bpf_local_storage_update(cgroup, (struct bpf_local_storage_map *)map, - value, map_flags, GFP_ATOMIC); + value, map_flags, false, GFP_ATOMIC); bpf_cgrp_storage_unlock(); cgroup_put(cgroup); return PTR_ERR_OR_ZERO(sdata); @@ -181,7 +181,7 @@ BPF_CALL_5(bpf_cgrp_storage_get, struct bpf_map *, map, struct cgroup *, cgroup, if (!percpu_ref_is_dying(&cgroup->self.refcnt) && (flags & BPF_LOCAL_STORAGE_GET_F_CREATE)) sdata = bpf_local_storage_update(cgroup, (struct bpf_local_storage_map *)map, - value, BPF_NOEXIST, gfp_flags); + value, BPF_NOEXIST, false, gfp_flags); unlock: bpf_cgrp_storage_unlock(); diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c index 29da6d3838f6..44ccebc745e5 100644 --- a/kernel/bpf/bpf_inode_storage.c +++ b/kernel/bpf/bpf_inode_storage.c @@ -100,7 +100,7 @@ static long bpf_fd_inode_storage_update_elem(struct bpf_map *map, void *key, sdata = bpf_local_storage_update(file_inode(fd_file(f)), (struct bpf_local_storage_map *)map, - value, map_flags, GFP_ATOMIC); + value, map_flags, false, GFP_ATOMIC); return PTR_ERR_OR_ZERO(sdata); } @@ -154,7 +154,7 @@ BPF_CALL_5(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode, if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) { sdata = bpf_local_storage_update( inode, (struct bpf_local_storage_map *)map, value, - BPF_NOEXIST, gfp_flags); + BPF_NOEXIST, false, gfp_flags); return IS_ERR(sdata) ? (unsigned long)NULL : (unsigned long)sdata->data; } diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index c938dea5ddbf..1cf772cb26eb 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -73,7 +73,7 @@ static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem) struct bpf_local_storage_elem * bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, - void *value, bool charge_mem, gfp_t gfp_flags) + void *value, bool charge_mem, bool swap_uptrs, gfp_t gfp_flags) { struct bpf_local_storage_elem *selem; @@ -524,7 +524,7 @@ uncharge: */ struct bpf_local_storage_data * bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, - void *value, u64 map_flags, gfp_t gfp_flags) + void *value, u64 map_flags, bool swap_uptrs, gfp_t gfp_flags) { struct bpf_local_storage_data *old_sdata = NULL; struct bpf_local_storage_elem *alloc_selem, *selem = NULL; @@ -550,7 +550,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, if (err) return ERR_PTR(err); - selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags); + selem = bpf_selem_alloc(smap, owner, value, true, swap_uptrs, gfp_flags); if (!selem) return ERR_PTR(-ENOMEM); @@ -584,7 +584,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, /* A lookup has just been done before and concluded a new selem is * needed. The chance of an unnecessary alloc is unlikely. */ - alloc_selem = selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags); + alloc_selem = selem = bpf_selem_alloc(smap, owner, value, true, swap_uptrs, gfp_flags); if (!alloc_selem) return ERR_PTR(-ENOMEM); diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c index adf6dfe0ba68..45dc3ca334d3 100644 --- a/kernel/bpf/bpf_task_storage.c +++ b/kernel/bpf/bpf_task_storage.c @@ -147,7 +147,7 @@ static long bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key, bpf_task_storage_lock(); sdata = bpf_local_storage_update( task, (struct bpf_local_storage_map *)map, value, map_flags, - GFP_ATOMIC); + false, GFP_ATOMIC); bpf_task_storage_unlock(); err = PTR_ERR_OR_ZERO(sdata); @@ -219,7 +219,7 @@ static void *__bpf_task_storage_get(struct bpf_map *map, (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) && nobusy) { sdata = bpf_local_storage_update( task, (struct bpf_local_storage_map *)map, value, - BPF_NOEXIST, gfp_flags); + BPF_NOEXIST, false, gfp_flags); return IS_ERR(sdata) ? NULL : sdata->data; } diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index bc01b3aa6b0f..2f4ed83a75ae 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -106,7 +106,7 @@ static long bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key, if (sock) { sdata = bpf_local_storage_update( sock->sk, (struct bpf_local_storage_map *)map, value, - map_flags, GFP_ATOMIC); + map_flags, false, GFP_ATOMIC); sockfd_put(sock); return PTR_ERR_OR_ZERO(sdata); } @@ -137,7 +137,7 @@ bpf_sk_storage_clone_elem(struct sock *newsk, { struct bpf_local_storage_elem *copy_selem; - copy_selem = bpf_selem_alloc(smap, newsk, NULL, true, GFP_ATOMIC); + copy_selem = bpf_selem_alloc(smap, newsk, NULL, true, false, GFP_ATOMIC); if (!copy_selem) return NULL; @@ -243,7 +243,7 @@ BPF_CALL_5(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk, refcount_inc_not_zero(&sk->sk_refcnt)) { sdata = bpf_local_storage_update( sk, (struct bpf_local_storage_map *)map, value, - BPF_NOEXIST, gfp_flags); + BPF_NOEXIST, false, gfp_flags); /* sk must be a fullsock (guaranteed by verifier), * so sock_gen_put() is unnecessary. */ -- cgit v1.2.3 From 5bd5bab76669b1e1551f03f5fcbc165f3fa8d269 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 23 Oct 2024 16:47:51 -0700 Subject: bpf: Postpone bpf_selem_free() in bpf_selem_unlink_storage_nolock() In a later patch, bpf_selem_free() will call unpin_user_page() through bpf_obj_free_fields(). unpin_user_page() may take spin_lock. However, some bpf_selem_free() call paths have held a raw_spin_lock. Like this: raw_spin_lock_irqsave() bpf_selem_unlink_storage_nolock() bpf_selem_free() unpin_user_page() spin_lock() To avoid spinlock nested in raw_spinlock, bpf_selem_free() should be done after releasing the raw_spinlock. The "bool reuse_now" arg is replaced with "struct hlist_head *free_selem_list" in bpf_selem_unlink_storage_nolock(). The bpf_selem_unlink_storage_nolock() will append the to-be-free selem at the free_selem_list. The caller of bpf_selem_unlink_storage_nolock() will need to call the new bpf_selem_free_list(free_selem_list, reuse_now) to free the selem after releasing the raw_spinlock. Note that the selem->snode cannot be reused for linking to the free_selem_list because the selem->snode is protected by the raw_spinlock that we want to avoid holding. A new "struct hlist_node free_node;" is union-ized with the rcu_head. Only the first one successfully hlist_del_init_rcu(&selem->snode) will be able to use the free_node. After succeeding hlist_del_init_rcu(&selem->snode), the free_node and rcu_head usage is serialized such that they can share the 16 bytes in a union. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20241023234759.860539-5-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf_local_storage.h | 8 +++++++- kernel/bpf/bpf_local_storage.c | 35 ++++++++++++++++++++++++++++++----- 2 files changed, 37 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 0c7216c065d5..ab7244d8108f 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -77,7 +77,13 @@ struct bpf_local_storage_elem { struct hlist_node map_node; /* Linked to bpf_local_storage_map */ struct hlist_node snode; /* Linked to bpf_local_storage */ struct bpf_local_storage __rcu *local_storage; - struct rcu_head rcu; + union { + struct rcu_head rcu; + struct hlist_node free_node; /* used to postpone + * bpf_selem_free + * after raw_spin_unlock + */ + }; /* 8 bytes hole */ /* The data is stored in another cacheline to minimize * the number of cachelines access during a cache hit. diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 1cf772cb26eb..09a67dff2336 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -246,13 +246,30 @@ void bpf_selem_free(struct bpf_local_storage_elem *selem, } } +static void bpf_selem_free_list(struct hlist_head *list, bool reuse_now) +{ + struct bpf_local_storage_elem *selem; + struct bpf_local_storage_map *smap; + struct hlist_node *n; + + /* The "_safe" iteration is needed. + * The loop is not removing the selem from the list + * but bpf_selem_free will use the selem->rcu_head + * which is union-ized with the selem->free_node. + */ + hlist_for_each_entry_safe(selem, n, list, free_node) { + smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); + bpf_selem_free(selem, smap, reuse_now); + } +} + /* local_storage->lock must be held and selem->local_storage == local_storage. * The caller must ensure selem->smap is still valid to be * dereferenced for its smap->elem_size and smap->cache_idx. */ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage, struct bpf_local_storage_elem *selem, - bool uncharge_mem, bool reuse_now) + bool uncharge_mem, struct hlist_head *free_selem_list) { struct bpf_local_storage_map *smap; bool free_local_storage; @@ -296,7 +313,7 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor SDATA(selem)) RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL); - bpf_selem_free(selem, smap, reuse_now); + hlist_add_head(&selem->free_node, free_selem_list); if (rcu_access_pointer(local_storage->smap) == smap) RCU_INIT_POINTER(local_storage->smap, NULL); @@ -345,6 +362,7 @@ static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem, struct bpf_local_storage_map *storage_smap; struct bpf_local_storage *local_storage; bool bpf_ma, free_local_storage = false; + HLIST_HEAD(selem_free_list); unsigned long flags; if (unlikely(!selem_linked_to_storage_lockless(selem))) @@ -360,9 +378,11 @@ static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem, raw_spin_lock_irqsave(&local_storage->lock, flags); if (likely(selem_linked_to_storage(selem))) free_local_storage = bpf_selem_unlink_storage_nolock( - local_storage, selem, true, reuse_now); + local_storage, selem, true, &selem_free_list); raw_spin_unlock_irqrestore(&local_storage->lock, flags); + bpf_selem_free_list(&selem_free_list, reuse_now); + if (free_local_storage) bpf_local_storage_free(local_storage, storage_smap, bpf_ma, reuse_now); } @@ -529,6 +549,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, struct bpf_local_storage_data *old_sdata = NULL; struct bpf_local_storage_elem *alloc_selem, *selem = NULL; struct bpf_local_storage *local_storage; + HLIST_HEAD(old_selem_free_list); unsigned long flags; int err; @@ -624,11 +645,12 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, if (old_sdata) { bpf_selem_unlink_map(SELEM(old_sdata)); bpf_selem_unlink_storage_nolock(local_storage, SELEM(old_sdata), - true, false); + true, &old_selem_free_list); } unlock: raw_spin_unlock_irqrestore(&local_storage->lock, flags); + bpf_selem_free_list(&old_selem_free_list, false); if (alloc_selem) { mem_uncharge(smap, owner, smap->elem_size); bpf_selem_free(alloc_selem, smap, true); @@ -706,6 +728,7 @@ void bpf_local_storage_destroy(struct bpf_local_storage *local_storage) struct bpf_local_storage_map *storage_smap; struct bpf_local_storage_elem *selem; bool bpf_ma, free_storage = false; + HLIST_HEAD(free_selem_list); struct hlist_node *n; unsigned long flags; @@ -734,10 +757,12 @@ void bpf_local_storage_destroy(struct bpf_local_storage *local_storage) * of the loop will set the free_cgroup_storage to true. */ free_storage = bpf_selem_unlink_storage_nolock( - local_storage, selem, true, true); + local_storage, selem, true, &free_selem_list); } raw_spin_unlock_irqrestore(&local_storage->lock, flags); + bpf_selem_free_list(&free_selem_list, true); + if (free_storage) bpf_local_storage_free(local_storage, storage_smap, bpf_ma, true); } -- cgit v1.2.3 From ba512b00e5efbf7e19cfb7fa9f66ce82669b7077 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 23 Oct 2024 16:47:53 -0700 Subject: bpf: Add uptr support in the map_value of the task local storage. This patch adds uptr support in the map_value of the task local storage. struct map_value { struct user_data __uptr *uptr; }; struct { __uint(type, BPF_MAP_TYPE_TASK_STORAGE); __uint(map_flags, BPF_F_NO_PREALLOC); __type(key, int); __type(value, struct value_type); } datamap SEC(".maps"); A new bpf_obj_pin_uptrs() is added to pin the user page and also stores the kernel address back to the uptr for the bpf prog to use later. It currently does not support the uptr pointing to a user struct across two pages. It also excludes PageHighMem support to keep it simple. As of now, the 32bit bpf jit is missing other more crucial bpf features. For example, many important bpf features depend on bpf kfunc now but so far only one arch (x86-32) supports it which was added by me as an example when kfunc was first introduced to bpf. The uptr can only be stored to the task local storage by the syscall update_elem. Meaning the uptr will not be considered if it is provided by the bpf prog through bpf_task_storage_get(BPF_LOCAL_STORAGE_GET_F_CREATE). This is enforced by only calling bpf_local_storage_update(swap_uptrs==true) in bpf_pid_task_storage_update_elem. Everywhere else will have swap_uptrs==false. This will pump down to bpf_selem_alloc(swap_uptrs==true). It is the only case that bpf_selem_alloc() will take the uptr value when updating the newly allocated selem. bpf_obj_swap_uptrs() is added to swap the uptr between the SDATA(selem)->data and the user provided map_value in "void *value". bpf_obj_swap_uptrs() makes the SDATA(selem)->data takes the ownership of the uptr and the user space provided map_value will have NULL in the uptr. The bpf_obj_unpin_uptrs() is called after map->ops->map_update_elem() returning error. If the map->ops->map_update_elem has reached a state that the local storage has taken the uptr ownership, the bpf_obj_unpin_uptrs() will be a no op because the uptr is NULL. A "__"bpf_obj_unpin_uptrs is added to make this error path unpin easier such that it does not have to check the map->record is NULL or not. BPF_F_LOCK is not supported when the map_value has uptr. This can be revisited later if there is a use case. A similar swap_uptrs idea can be considered. The final bit is to do unpin_user_page in the bpf_obj_free_fields(). The earlier patch has ensured that the bpf_obj_free_fields() has gone through the rcu gp when needed. Cc: linux-mm@kvack.org Cc: Shakeel Butt Signed-off-by: Martin KaFai Lau Acked-by: Shakeel Butt Link: https://lore.kernel.org/r/20241023234759.860539-7-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 20 ++++++++ kernel/bpf/bpf_local_storage.c | 7 ++- kernel/bpf/bpf_task_storage.c | 5 +- kernel/bpf/syscall.c | 106 +++++++++++++++++++++++++++++++++++++++-- 4 files changed, 131 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index bb31bc6d0c4d..8888689aa917 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -424,6 +424,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr) case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: + case BPF_UPTR: break; default: WARN_ON_ONCE(1); @@ -512,6 +513,25 @@ static inline void copy_map_value_long(struct bpf_map *map, void *dst, void *src bpf_obj_memcpy(map->record, dst, src, map->value_size, true); } +static inline void bpf_obj_swap_uptrs(const struct btf_record *rec, void *dst, void *src) +{ + unsigned long *src_uptr, *dst_uptr; + const struct btf_field *field; + int i; + + if (!btf_record_has_field(rec, BPF_UPTR)) + return; + + for (i = 0, field = rec->fields; i < rec->cnt; i++, field++) { + if (field->type != BPF_UPTR) + continue; + + src_uptr = src + field->offset; + dst_uptr = dst + field->offset; + swap(*src_uptr, *dst_uptr); + } +} + static inline void bpf_obj_memzero(struct btf_record *rec, void *dst, u32 size) { u32 curr_off = 0; diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index ca871be1c42d..7e6a0af0afc1 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -99,9 +99,12 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, } if (selem) { - if (value) + if (value) { + /* No need to call check_and_init_map_value as memory is zero init */ copy_map_value(&smap->map, SDATA(selem)->data, value); - /* No need to call check_and_init_map_value as memory is zero init */ + if (swap_uptrs) + bpf_obj_swap_uptrs(smap->map.record, SDATA(selem)->data, value); + } return selem; } diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c index 45dc3ca334d3..09705f9988f3 100644 --- a/kernel/bpf/bpf_task_storage.c +++ b/kernel/bpf/bpf_task_storage.c @@ -129,6 +129,9 @@ static long bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key, struct pid *pid; int fd, err; + if ((map_flags & BPF_F_LOCK) && btf_record_has_field(map->record, BPF_UPTR)) + return -EOPNOTSUPP; + fd = *(int *)key; pid = pidfd_get_pid(fd, &f_flags); if (IS_ERR(pid)) @@ -147,7 +150,7 @@ static long bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key, bpf_task_storage_lock(); sdata = bpf_local_storage_update( task, (struct bpf_local_storage_map *)map, value, map_flags, - false, GFP_ATOMIC); + true, GFP_ATOMIC); bpf_task_storage_unlock(); err = PTR_ERR_OR_ZERO(sdata); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 2d2935d9c096..426a52e5c7da 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -155,6 +155,89 @@ static void maybe_wait_bpf_programs(struct bpf_map *map) synchronize_rcu(); } +static void unpin_uptr_kaddr(void *kaddr) +{ + if (kaddr) + unpin_user_page(virt_to_page(kaddr)); +} + +static void __bpf_obj_unpin_uptrs(struct btf_record *rec, u32 cnt, void *obj) +{ + const struct btf_field *field; + void **uptr_addr; + int i; + + for (i = 0, field = rec->fields; i < cnt; i++, field++) { + if (field->type != BPF_UPTR) + continue; + + uptr_addr = obj + field->offset; + unpin_uptr_kaddr(*uptr_addr); + } +} + +static void bpf_obj_unpin_uptrs(struct btf_record *rec, void *obj) +{ + if (!btf_record_has_field(rec, BPF_UPTR)) + return; + + __bpf_obj_unpin_uptrs(rec, rec->cnt, obj); +} + +static int bpf_obj_pin_uptrs(struct btf_record *rec, void *obj) +{ + const struct btf_field *field; + const struct btf_type *t; + unsigned long start, end; + struct page *page; + void **uptr_addr; + int i, err; + + if (!btf_record_has_field(rec, BPF_UPTR)) + return 0; + + for (i = 0, field = rec->fields; i < rec->cnt; i++, field++) { + if (field->type != BPF_UPTR) + continue; + + uptr_addr = obj + field->offset; + start = *(unsigned long *)uptr_addr; + if (!start) + continue; + + t = btf_type_by_id(field->kptr.btf, field->kptr.btf_id); + /* t->size was checked for zero before */ + if (check_add_overflow(start, t->size - 1, &end)) { + err = -EFAULT; + goto unpin_all; + } + + /* The uptr's struct cannot span across two pages */ + if ((start & PAGE_MASK) != (end & PAGE_MASK)) { + err = -EOPNOTSUPP; + goto unpin_all; + } + + err = pin_user_pages_fast(start, 1, FOLL_LONGTERM | FOLL_WRITE, &page); + if (err != 1) + goto unpin_all; + + if (PageHighMem(page)) { + err = -EOPNOTSUPP; + unpin_user_page(page); + goto unpin_all; + } + + *uptr_addr = page_address(page) + offset_in_page(start); + } + + return 0; + +unpin_all: + __bpf_obj_unpin_uptrs(rec, i, obj); + return err; +} + static int bpf_map_update_value(struct bpf_map *map, struct file *map_file, void *key, void *value, __u64 flags) { @@ -199,9 +282,14 @@ static int bpf_map_update_value(struct bpf_map *map, struct file *map_file, map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { err = map->ops->map_push_elem(map, value, flags); } else { - rcu_read_lock(); - err = map->ops->map_update_elem(map, key, value, flags); - rcu_read_unlock(); + err = bpf_obj_pin_uptrs(map->record, value); + if (!err) { + rcu_read_lock(); + err = map->ops->map_update_elem(map, key, value, flags); + rcu_read_unlock(); + if (err) + bpf_obj_unpin_uptrs(map->record, value); + } } bpf_enable_instrumentation(); @@ -716,6 +804,10 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj) field->kptr.dtor(xchgd_field); } break; + case BPF_UPTR: + /* The caller ensured that no one is using the uptr */ + unpin_uptr_kaddr(*(void **)field_ptr); + break; case BPF_LIST_HEAD: if (WARN_ON_ONCE(rec->spin_lock_off < 0)) continue; @@ -1107,7 +1199,7 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token, map->record = btf_parse_fields(btf, value_type, BPF_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD | - BPF_RB_ROOT | BPF_REFCOUNT | BPF_WORKQUEUE, + BPF_RB_ROOT | BPF_REFCOUNT | BPF_WORKQUEUE | BPF_UPTR, map->value_size); if (!IS_ERR_OR_NULL(map->record)) { int i; @@ -1163,6 +1255,12 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token, goto free_map_tab; } break; + case BPF_UPTR: + if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE) { + ret = -EOPNOTSUPP; + goto free_map_tab; + } + break; case BPF_LIST_HEAD: case BPF_RB_ROOT: if (map->map_type != BPF_MAP_TYPE_HASH && -- cgit v1.2.3 From 09d6775f503b393d0457c7126aa43208e1724004 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Wed, 16 Oct 2024 13:27:45 -0700 Subject: riscv: Add support for userspace pointer masking RISC-V supports pointer masking with a variable number of tag bits (which is called "PMLEN" in the specification) and which is configured at the next higher privilege level. Wire up the PR_SET_TAGGED_ADDR_CTRL and PR_GET_TAGGED_ADDR_CTRL prctls so userspace can request a lower bound on the number of tag bits and determine the actual number of tag bits. As with arm64's PR_TAGGED_ADDR_ENABLE, the pointer masking configuration is thread-scoped, inherited on clone() and fork() and cleared on execve(). Reviewed-by: Charlie Jenkins Tested-by: Charlie Jenkins Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20241016202814.4061541-5-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- Documentation/arch/riscv/uabi.rst | 12 +++++ arch/riscv/Kconfig | 11 +++++ arch/riscv/include/asm/processor.h | 8 ++++ arch/riscv/include/asm/switch_to.h | 11 +++++ arch/riscv/kernel/process.c | 91 ++++++++++++++++++++++++++++++++++++++ include/uapi/linux/prctl.h | 5 ++- 6 files changed, 137 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/Documentation/arch/riscv/uabi.rst b/Documentation/arch/riscv/uabi.rst index 2b420bab0527..ddb8359a46ed 100644 --- a/Documentation/arch/riscv/uabi.rst +++ b/Documentation/arch/riscv/uabi.rst @@ -68,3 +68,15 @@ Misaligned accesses Misaligned scalar accesses are supported in userspace, but they may perform poorly. Misaligned vector accesses are only supported if the Zicclsm extension is supported. + +Pointer masking +--------------- + +Support for pointer masking in userspace (the Supm extension) is provided via +the ``PR_SET_TAGGED_ADDR_CTRL`` and ``PR_GET_TAGGED_ADDR_CTRL`` ``prctl()`` +operations. Pointer masking is disabled by default. To enable it, userspace +must call ``PR_SET_TAGGED_ADDR_CTRL`` with the ``PR_PMLEN`` field set to the +number of mask/tag bits needed by the application. ``PR_PMLEN`` is interpreted +as a lower bound; if the kernel is unable to satisfy the request, the +``PR_SET_TAGGED_ADDR_CTRL`` operation will fail. The actual number of tag bits +is returned in ``PR_PMLEN`` by the ``PR_GET_TAGGED_ADDR_CTRL`` operation. diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 22dc5ea4196c..0ef449465378 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -531,6 +531,17 @@ config RISCV_ISA_C If you don't know what to do here, say Y. +config RISCV_ISA_SUPM + bool "Supm extension for userspace pointer masking" + depends on 64BIT + default y + help + Add support for pointer masking in userspace (Supm) when the + underlying hardware extension (Smnpm or Ssnpm) is detected at boot. + + If this option is disabled, userspace will be unable to use + the prctl(PR_{SET,GET}_TAGGED_ADDR_CTRL) API. + config RISCV_ISA_SVNAPOT bool "Svnapot extension support for supervisor mode NAPOT pages" depends on 64BIT && MMU diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index c1a492508835..5f56eb9d114a 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -178,6 +178,14 @@ extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val); #define RISCV_SET_ICACHE_FLUSH_CTX(arg1, arg2) riscv_set_icache_flush_ctx(arg1, arg2) extern int riscv_set_icache_flush_ctx(unsigned long ctx, unsigned long per_thread); +#ifdef CONFIG_RISCV_ISA_SUPM +/* PR_{SET,GET}_TAGGED_ADDR_CTRL prctl */ +long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg); +long get_tagged_addr_ctrl(struct task_struct *task); +#define SET_TAGGED_ADDR_CTRL(arg) set_tagged_addr_ctrl(current, arg) +#define GET_TAGGED_ADDR_CTRL() get_tagged_addr_ctrl(current) +#endif + #endif /* __ASSEMBLY__ */ #endif /* _ASM_RISCV_PROCESSOR_H */ diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h index 9685cd85e57c..94e33216b2d9 100644 --- a/arch/riscv/include/asm/switch_to.h +++ b/arch/riscv/include/asm/switch_to.h @@ -70,6 +70,17 @@ static __always_inline bool has_fpu(void) { return false; } #define __switch_to_fpu(__prev, __next) do { } while (0) #endif +static inline void envcfg_update_bits(struct task_struct *task, + unsigned long mask, unsigned long val) +{ + unsigned long envcfg; + + envcfg = (task->thread.envcfg & ~mask) | val; + task->thread.envcfg = envcfg; + if (task == current) + csr_write(CSR_ENVCFG, envcfg); +} + static inline void __switch_to_envcfg(struct task_struct *next) { asm volatile (ALTERNATIVE("nop", "csrw " __stringify(CSR_ENVCFG) ", %0", diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index e3142d8a6e28..200d2ed64dfe 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -7,6 +7,7 @@ * Copyright (C) 2017 SiFive */ +#include #include #include #include @@ -180,6 +181,10 @@ void flush_thread(void) memset(¤t->thread.vstate, 0, sizeof(struct __riscv_v_ext_state)); clear_tsk_thread_flag(current, TIF_RISCV_V_DEFER_RESTORE); #endif +#ifdef CONFIG_RISCV_ISA_SUPM + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SUPM)) + envcfg_update_bits(current, ENVCFG_PMM, ENVCFG_PMM_PMLEN_0); +#endif } void arch_release_task_struct(struct task_struct *tsk) @@ -242,3 +247,89 @@ void __init arch_task_cache_init(void) { riscv_v_setup_ctx_cache(); } + +#ifdef CONFIG_RISCV_ISA_SUPM +enum { + PMLEN_0 = 0, + PMLEN_7 = 7, + PMLEN_16 = 16, +}; + +static bool have_user_pmlen_7; +static bool have_user_pmlen_16; + +long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg) +{ + unsigned long valid_mask = PR_PMLEN_MASK; + struct thread_info *ti = task_thread_info(task); + unsigned long pmm; + u8 pmlen; + + if (is_compat_thread(ti)) + return -EINVAL; + + if (arg & ~valid_mask) + return -EINVAL; + + /* + * Prefer the smallest PMLEN that satisfies the user's request, + * in case choosing a larger PMLEN has a performance impact. + */ + pmlen = FIELD_GET(PR_PMLEN_MASK, arg); + if (pmlen == PMLEN_0) + pmm = ENVCFG_PMM_PMLEN_0; + else if (pmlen <= PMLEN_7 && have_user_pmlen_7) + pmm = ENVCFG_PMM_PMLEN_7; + else if (pmlen <= PMLEN_16 && have_user_pmlen_16) + pmm = ENVCFG_PMM_PMLEN_16; + else + return -EINVAL; + + envcfg_update_bits(task, ENVCFG_PMM, pmm); + + return 0; +} + +long get_tagged_addr_ctrl(struct task_struct *task) +{ + struct thread_info *ti = task_thread_info(task); + long ret = 0; + + if (is_compat_thread(ti)) + return -EINVAL; + + switch (task->thread.envcfg & ENVCFG_PMM) { + case ENVCFG_PMM_PMLEN_7: + ret = FIELD_PREP(PR_PMLEN_MASK, PMLEN_7); + break; + case ENVCFG_PMM_PMLEN_16: + ret = FIELD_PREP(PR_PMLEN_MASK, PMLEN_16); + break; + } + + return ret; +} + +static bool try_to_set_pmm(unsigned long value) +{ + csr_set(CSR_ENVCFG, value); + return (csr_read_clear(CSR_ENVCFG, ENVCFG_PMM) & ENVCFG_PMM) == value; +} + +static int __init tagged_addr_init(void) +{ + if (!riscv_has_extension_unlikely(RISCV_ISA_EXT_SUPM)) + return 0; + + /* + * envcfg.PMM is a WARL field. Detect which values are supported. + * Assume the supported PMLEN values are the same on all harts. + */ + csr_clear(CSR_ENVCFG, ENVCFG_PMM); + have_user_pmlen_7 = try_to_set_pmm(ENVCFG_PMM_PMLEN_7); + have_user_pmlen_16 = try_to_set_pmm(ENVCFG_PMM_PMLEN_16); + + return 0; +} +core_initcall(tagged_addr_init); +#endif /* CONFIG_RISCV_ISA_SUPM */ diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 35791791a879..cefd656ebf43 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -230,7 +230,7 @@ struct prctl_mm_map { # define PR_PAC_APDBKEY (1UL << 3) # define PR_PAC_APGAKEY (1UL << 4) -/* Tagged user address controls for arm64 */ +/* Tagged user address controls for arm64 and RISC-V */ #define PR_SET_TAGGED_ADDR_CTRL 55 #define PR_GET_TAGGED_ADDR_CTRL 56 # define PR_TAGGED_ADDR_ENABLE (1UL << 0) @@ -244,6 +244,9 @@ struct prctl_mm_map { # define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) /* Unused; kept only for source compatibility */ # define PR_MTE_TCF_SHIFT 1 +/* RISC-V pointer masking tag length */ +# define PR_PMLEN_SHIFT 24 +# define PR_PMLEN_MASK (0x7fUL << PR_PMLEN_SHIFT) /* Control reclaim behavior when allocating memory */ #define PR_SET_IO_FLUSHER 57 -- cgit v1.2.3 From 78844482a1c939a972681842f8ee2a8ddb202441 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Wed, 16 Oct 2024 13:27:47 -0700 Subject: riscv: Allow ptrace control of the tagged address ABI This allows a tracer to control the ABI of the tracee, as on arm64. Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20241016202814.4061541-7-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/ptrace.c | 42 ++++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/elf.h | 1 + 2 files changed, 43 insertions(+) (limited to 'include') diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index 92731ff8c79a..ea67e9fb7a58 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -28,6 +28,9 @@ enum riscv_regset { #ifdef CONFIG_RISCV_ISA_V REGSET_V, #endif +#ifdef CONFIG_RISCV_ISA_SUPM + REGSET_TAGGED_ADDR_CTRL, +#endif }; static int riscv_gpr_get(struct task_struct *target, @@ -152,6 +155,35 @@ static int riscv_vr_set(struct task_struct *target, } #endif +#ifdef CONFIG_RISCV_ISA_SUPM +static int tagged_addr_ctrl_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + long ctrl = get_tagged_addr_ctrl(target); + + if (IS_ERR_VALUE(ctrl)) + return ctrl; + + return membuf_write(&to, &ctrl, sizeof(ctrl)); +} + +static int tagged_addr_ctrl_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + long ctrl; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1); + if (ret) + return ret; + + return set_tagged_addr_ctrl(target, ctrl); +} +#endif + static const struct user_regset riscv_user_regset[] = { [REGSET_X] = { .core_note_type = NT_PRSTATUS, @@ -182,6 +214,16 @@ static const struct user_regset riscv_user_regset[] = { .set = riscv_vr_set, }, #endif +#ifdef CONFIG_RISCV_ISA_SUPM + [REGSET_TAGGED_ADDR_CTRL] = { + .core_note_type = NT_RISCV_TAGGED_ADDR_CTRL, + .n = 1, + .size = sizeof(long), + .align = sizeof(long), + .regset_get = tagged_addr_ctrl_get, + .set = tagged_addr_ctrl_set, + }, +#endif }; static const struct user_regset_view riscv_user_native_view = { diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index b9935988da5c..a920cf8934dc 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -450,6 +450,7 @@ typedef struct elf64_shdr { #define NT_MIPS_MSA 0x802 /* MIPS SIMD registers */ #define NT_RISCV_CSR 0x900 /* RISC-V Control and Status Registers */ #define NT_RISCV_VECTOR 0x901 /* RISC-V vector registers */ +#define NT_RISCV_TAGGED_ADDR_CTRL 0x902 /* RISC-V tagged address control (prctl()) */ #define NT_LOONGARCH_CPUCFG 0xa00 /* LoongArch CPU config registers */ #define NT_LOONGARCH_CSR 0xa01 /* LoongArch control and status registers */ #define NT_LOONGARCH_LSX 0xa02 /* LoongArch Loongson SIMD Extension registers */ -- cgit v1.2.3 From 02ac5f9d6caec96071103f7c62b5117526e47b64 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 24 Oct 2024 02:20:02 +0000 Subject: of: property: add of_graph_get_next_port() We have endpoint base functions - of_graph_get_next_endpoint() - of_graph_get_endpoint_count() - for_each_endpoint_of_node() Here, for_each_endpoint_of_node() loop finds each endpoints ports { port@0 { (1) endpoint {...}; }; port@1 { (2) endpoint {...}; }; ... }; In above case, it finds endpoint as (1) -> (2) -> ... Basically, user/driver knows which port is used for what, but not in all cases. For example on flexible/generic driver case, how many ports are used is not fixed. For example Sound Generic Card driver which is very flexible/generic and used from many venders can't know how many ports are used, and used for what, because it depends on each vender SoC and/or its used board. And more, the port can have multi endpoints. For example Generic Sound Card case, it supports many type of connection between CPU / Codec, and some of them uses multi endpoint in one port. see below. ports { (A) port@0 { (1) endpoint@0 {...}; (2) endpoint@1 {...}; }; (B) port@1 { (3) endpoint {...}; }; ... }; Generic Sound Card want to handle each connection via "port" base instead of "endpoint" base. But, it is very difficult to handle each "port" via existing for_each_endpoint_of_node(). Because getting each "port" via of_get_parent() from each "endpoint" doesn't work. For example in above case, both (1) (2) endpoint has same "port" (= A). Add "port" base functions. Signed-off-by: Kuninori Morimoto Link: https://lore.kernel.org/r/87ldyeb5t9.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Rob Herring (Arm) --- drivers/of/property.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/of_graph.h | 28 +++++++++++++++++++++++++ 2 files changed, 82 insertions(+) (limited to 'include') diff --git a/drivers/of/property.c b/drivers/of/property.c index 33dd72cad4ab..6dd5461ea359 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -630,6 +630,43 @@ struct device_node *of_graph_get_port_by_id(struct device_node *parent, u32 id) } EXPORT_SYMBOL(of_graph_get_port_by_id); +/** + * of_graph_get_next_port() - get next port node. + * @parent: pointer to the parent device node, or parent ports node + * @prev: previous port node, or NULL to get first + * + * Parent device node can be used as @parent whether device node has ports node + * or not. It will work same as ports@0 node. + * + * Return: A 'port' node pointer with refcount incremented. Refcount + * of the passed @prev node is decremented. + */ +struct device_node *of_graph_get_next_port(const struct device_node *parent, + struct device_node *prev) +{ + if (!parent) + return NULL; + + if (!prev) { + struct device_node *node __free(device_node) = + of_get_child_by_name(parent, "ports"); + + if (node) + parent = node; + + return of_get_child_by_name(parent, "port"); + } + + do { + prev = of_get_next_child(parent, prev); + if (!prev) + break; + } while (!of_node_name_eq(prev, "port")); + + return prev; +} +EXPORT_SYMBOL(of_graph_get_next_port); + /** * of_graph_get_next_endpoint() - get next endpoint node * @parent: pointer to the parent device node @@ -823,6 +860,23 @@ unsigned int of_graph_get_endpoint_count(const struct device_node *np) } EXPORT_SYMBOL(of_graph_get_endpoint_count); +/** + * of_graph_get_port_count() - get the number of port in a device or ports node + * @np: pointer to the device or ports node + * + * Return: count of port of this device or ports node + */ +unsigned int of_graph_get_port_count(struct device_node *np) +{ + unsigned int num = 0; + + for_each_of_graph_port(np, port) + num++; + + return num; +} +EXPORT_SYMBOL(of_graph_get_port_count); + /** * of_graph_get_remote_node() - get remote parent device_node for given port/endpoint * @node: pointer to parent device_node containing graph port/endpoint diff --git a/include/linux/of_graph.h b/include/linux/of_graph.h index a4bea62bfa29..44518f3583a4 100644 --- a/include/linux/of_graph.h +++ b/include/linux/of_graph.h @@ -11,6 +11,7 @@ #ifndef __LINUX_OF_GRAPH_H #define __LINUX_OF_GRAPH_H +#include #include #include @@ -37,14 +38,29 @@ struct of_endpoint { for (child = of_graph_get_next_endpoint(parent, NULL); child != NULL; \ child = of_graph_get_next_endpoint(parent, child)) +/** + * for_each_of_graph_port - iterate over every port in a device or ports node + * @parent: parent device or ports node containing port + * @child: loop variable pointing to the current port node + * + * When breaking out of the loop, and continue to use the @child, you need to + * use return_ptr(@child) or no_free_ptr(@child) not to call __free() for it. + */ +#define for_each_of_graph_port(parent, child) \ + for (struct device_node *child __free(device_node) = of_graph_get_next_port(parent, NULL);\ + child != NULL; child = of_graph_get_next_port(parent, child)) + #ifdef CONFIG_OF bool of_graph_is_present(const struct device_node *node); int of_graph_parse_endpoint(const struct device_node *node, struct of_endpoint *endpoint); unsigned int of_graph_get_endpoint_count(const struct device_node *np); +unsigned int of_graph_get_port_count(struct device_node *np); struct device_node *of_graph_get_port_by_id(struct device_node *node, u32 id); struct device_node *of_graph_get_next_endpoint(const struct device_node *parent, struct device_node *previous); +struct device_node *of_graph_get_next_port(const struct device_node *parent, + struct device_node *port); struct device_node *of_graph_get_endpoint_by_regs( const struct device_node *parent, int port_reg, int reg); struct device_node *of_graph_get_remote_endpoint( @@ -73,6 +89,11 @@ static inline unsigned int of_graph_get_endpoint_count(const struct device_node return 0; } +static inline unsigned int of_graph_get_port_count(struct device_node *np) +{ + return 0; +} + static inline struct device_node *of_graph_get_port_by_id( struct device_node *node, u32 id) { @@ -86,6 +107,13 @@ static inline struct device_node *of_graph_get_next_endpoint( return NULL; } +static inline struct device_node *of_graph_get_next_port( + const struct device_node *parent, + struct device_node *previous) +{ + return NULL; +} + static inline struct device_node *of_graph_get_endpoint_by_regs( const struct device_node *parent, int port_reg, int reg) { -- cgit v1.2.3 From 58fe47d6ac7413172e1fa88324fb1b4c0eb2c0a2 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 24 Oct 2024 02:20:06 +0000 Subject: of: property: add of_graph_get_next_port_endpoint() We already have of_graph_get_next_endpoint(), but it is not intuitive to use in some case. (X) node { (Y) ports { (P0) port@0 { endpoint { remote-endpoint = ...; };}; (P10) port@1 { endpoint { remote-endpoint = ...; }; (P11) endpoint { remote-endpoint = ...; };}; (P2) port@2 { endpoint { remote-endpoint = ...; };}; }; }; For example, if I want to handle port@1's 2 endpoints (= P10, P11), I want to use like below P10 = of_graph_get_next_endpoint(port1, NULL); P11 = of_graph_get_next_endpoint(port1, P10); But 1st one will be error, because of_graph_get_next_endpoint() requested 1st parameter is "node" (X) or "ports" (Y), not but "port". Below works well, but it will get P0 P0 = of_graph_get_next_endpoint(node, NULL); P0 = of_graph_get_next_endpoint(ports, NULL); In other words, we can't handle P10/P11 directly via of_graph_get_next_endpoint(). There is another non intuitive behavior on of_graph_get_next_endpoint(). In case of if I could get P10 pointer for some way, and if I want to handle port@1 things by loop, I would like use it like below /* * "ep" is now P10, and handle port1 things here, * but we don't know how many endpoints port1 have. * * Because "ep" is non NULL now, we can use port1 * as of_graph_get_next_endpoint(port1, xxx) */ do { /* do something for port1 specific things here */ } while (ep = of_graph_get_next_endpoint(port1, ep)) But it also not worked as I expected. I expect it will be P10 -> P11 -> NULL, but it will be P10 -> P11 -> P2, because of_graph_get_next_endpoint() will fetch "endpoint" beyond the "port". It is not useful for generic driver. To handle endpoint more intuitive, create of_graph_get_next_port_endpoint() of_graph_get_next_port_endpoint(port1, NULL); // P10 of_graph_get_next_port_endpoint(port1, P10); // P11 of_graph_get_next_port_endpoint(port1, P11); // NULL Signed-off-by: Kuninori Morimoto Link: https://lore.kernel.org/r/87jzdyb5t5.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Rob Herring (Arm) --- drivers/of/property.c | 27 +++++++++++++++++++++++++++ include/linux/of_graph.h | 21 +++++++++++++++++++++ 2 files changed, 48 insertions(+) (limited to 'include') diff --git a/drivers/of/property.c b/drivers/of/property.c index 6dd5461ea359..ce0e08176fca 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -667,6 +667,33 @@ struct device_node *of_graph_get_next_port(const struct device_node *parent, } EXPORT_SYMBOL(of_graph_get_next_port); +/** + * of_graph_get_next_port_endpoint() - get next endpoint node in port. + * If it reached to end of the port, it will return NULL. + * @port: pointer to the target port node + * @prev: previous endpoint node, or NULL to get first + * + * Return: An 'endpoint' node pointer with refcount incremented. Refcount + * of the passed @prev node is decremented. + */ +struct device_node *of_graph_get_next_port_endpoint(const struct device_node *port, + struct device_node *prev) +{ + while (1) { + prev = of_get_next_child(port, prev); + if (!prev) + break; + if (WARN(!of_node_name_eq(prev, "endpoint"), + "non endpoint node is used (%pOF)", prev)) + continue; + + break; + } + + return prev; +} +EXPORT_SYMBOL(of_graph_get_next_port_endpoint); + /** * of_graph_get_next_endpoint() - get next endpoint node * @parent: pointer to the parent device node diff --git a/include/linux/of_graph.h b/include/linux/of_graph.h index 44518f3583a4..a692d9d979a6 100644 --- a/include/linux/of_graph.h +++ b/include/linux/of_graph.h @@ -50,6 +50,18 @@ struct of_endpoint { for (struct device_node *child __free(device_node) = of_graph_get_next_port(parent, NULL);\ child != NULL; child = of_graph_get_next_port(parent, child)) +/** + * for_each_of_graph_port_endpoint - iterate over every endpoint in a port node + * @parent: parent port node + * @child: loop variable pointing to the current endpoint node + * + * When breaking out of the loop, and continue to use the @child, you need to + * use return_ptr(@child) or no_free_ptr(@child) not to call __free() for it. + */ +#define for_each_of_graph_port_endpoint(parent, child) \ + for (struct device_node *child __free(device_node) = of_graph_get_next_port_endpoint(parent, NULL);\ + child != NULL; child = of_graph_get_next_port_endpoint(parent, child)) + #ifdef CONFIG_OF bool of_graph_is_present(const struct device_node *node); int of_graph_parse_endpoint(const struct device_node *node, @@ -61,6 +73,8 @@ struct device_node *of_graph_get_next_endpoint(const struct device_node *parent, struct device_node *previous); struct device_node *of_graph_get_next_port(const struct device_node *parent, struct device_node *port); +struct device_node *of_graph_get_next_port_endpoint(const struct device_node *port, + struct device_node *prev); struct device_node *of_graph_get_endpoint_by_regs( const struct device_node *parent, int port_reg, int reg); struct device_node *of_graph_get_remote_endpoint( @@ -114,6 +128,13 @@ static inline struct device_node *of_graph_get_next_port( return NULL; } +static inline struct device_node *of_graph_get_next_port_endpoint( + const struct device_node *parent, + struct device_node *previous) +{ + return NULL; +} + static inline struct device_node *of_graph_get_endpoint_by_regs( const struct device_node *parent, int port_reg, int reg) { -- cgit v1.2.3 From 78cd57bbb4529690d152a5dce409dd91fa1a8b9b Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 24 Oct 2024 02:20:35 +0000 Subject: fbdev: omapfb: use new of_graph functions Now we can use new port related functions for port parsing. Use it. Signed-off-by: Kuninori Morimoto Acked-by: Helge Deller Link: https://lore.kernel.org/r/87bjzab5sd.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Rob Herring (Arm) --- drivers/video/fbdev/omap2/omapfb/dss/dpi.c | 3 +- drivers/video/fbdev/omap2/omapfb/dss/dss-of.c | 66 --------------------------- drivers/video/fbdev/omap2/omapfb/dss/dss.c | 20 ++++---- drivers/video/fbdev/omap2/omapfb/dss/sdi.c | 3 +- include/video/omapfb_dss.h | 8 ---- 5 files changed, 13 insertions(+), 87 deletions(-) (limited to 'include') diff --git a/drivers/video/fbdev/omap2/omapfb/dss/dpi.c b/drivers/video/fbdev/omap2/omapfb/dss/dpi.c index 7c1b7d89389a..395b1139a5ae 100644 --- a/drivers/video/fbdev/omap2/omapfb/dss/dpi.c +++ b/drivers/video/fbdev/omap2/omapfb/dss/dpi.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -845,7 +846,7 @@ int dpi_init_port(struct platform_device *pdev, struct device_node *port) if (!dpi) return -ENOMEM; - ep = omapdss_of_get_next_endpoint(port, NULL); + ep = of_graph_get_next_port_endpoint(port, NULL); if (!ep) return 0; diff --git a/drivers/video/fbdev/omap2/omapfb/dss/dss-of.c b/drivers/video/fbdev/omap2/omapfb/dss/dss-of.c index d5a43b3bf45e..c04cbe0ef173 100644 --- a/drivers/video/fbdev/omap2/omapfb/dss/dss-of.c +++ b/drivers/video/fbdev/omap2/omapfb/dss/dss-of.c @@ -15,72 +15,6 @@ #include "dss.h" -struct device_node * -omapdss_of_get_next_port(const struct device_node *parent, - struct device_node *prev) -{ - struct device_node *port = NULL; - - if (!parent) - return NULL; - - if (!prev) { - struct device_node *ports; - /* - * It's the first call, we have to find a port subnode - * within this node or within an optional 'ports' node. - */ - ports = of_get_child_by_name(parent, "ports"); - if (ports) - parent = ports; - - port = of_get_child_by_name(parent, "port"); - - /* release the 'ports' node */ - of_node_put(ports); - } else { - struct device_node *ports; - - ports = of_get_parent(prev); - if (!ports) - return NULL; - - do { - port = of_get_next_child(ports, prev); - if (!port) { - of_node_put(ports); - return NULL; - } - prev = port; - } while (!of_node_name_eq(port, "port")); - - of_node_put(ports); - } - - return port; -} -EXPORT_SYMBOL_GPL(omapdss_of_get_next_port); - -struct device_node * -omapdss_of_get_next_endpoint(const struct device_node *parent, - struct device_node *prev) -{ - struct device_node *ep = NULL; - - if (!parent) - return NULL; - - do { - ep = of_get_next_child(parent, prev); - if (!ep) - return NULL; - prev = ep; - } while (!of_node_name_eq(ep, "endpoint")); - - return ep; -} -EXPORT_SYMBOL_GPL(omapdss_of_get_next_endpoint); - struct device_node *dss_of_port_get_parent_device(struct device_node *port) { struct device_node *np; diff --git a/drivers/video/fbdev/omap2/omapfb/dss/dss.c b/drivers/video/fbdev/omap2/omapfb/dss/dss.c index d814e4baa4b3..70bb35ef48b5 100644 --- a/drivers/video/fbdev/omap2/omapfb/dss/dss.c +++ b/drivers/video/fbdev/omap2/omapfb/dss/dss.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -919,10 +920,7 @@ static int dss_init_ports(struct platform_device *pdev) struct device_node *port; int r, ret = 0; - if (parent == NULL) - return 0; - - port = omapdss_of_get_next_port(parent, NULL); + port = of_graph_get_next_port(parent, NULL); if (!port) return 0; @@ -952,8 +950,9 @@ static int dss_init_ports(struct platform_device *pdev) default: break; } - } while (!ret && - (port = omapdss_of_get_next_port(parent, port)) != NULL); + + port = of_graph_get_next_port(parent, port); + } while (!ret && port); if (ret) dss_uninit_ports(pdev); @@ -966,10 +965,7 @@ static void dss_uninit_ports(struct platform_device *pdev) struct device_node *parent = pdev->dev.of_node; struct device_node *port; - if (parent == NULL) - return; - - port = omapdss_of_get_next_port(parent, NULL); + port = of_graph_get_next_port(parent, NULL); if (!port) return; @@ -1000,7 +996,9 @@ static void dss_uninit_ports(struct platform_device *pdev) default: break; } - } while ((port = omapdss_of_get_next_port(parent, port)) != NULL); + + port = of_graph_get_next_port(parent, port); + } while (port); } static int dss_video_pll_probe(struct platform_device *pdev) diff --git a/drivers/video/fbdev/omap2/omapfb/dss/sdi.c b/drivers/video/fbdev/omap2/omapfb/dss/sdi.c index d527931b2b16..22a6243d7abf 100644 --- a/drivers/video/fbdev/omap2/omapfb/dss/sdi.c +++ b/drivers/video/fbdev/omap2/omapfb/dss/sdi.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include