From 534b9bdeb4b80d843ca9f924524d4d103ad6605e Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sun, 29 Jun 2025 22:52:45 -0700 Subject: Input: tca6416-keypad - remove the driver This input driver predates proper GPIO driver for the chip and now can be replaced with the generic gpio-keys. Remove the driver. Link: https://lore.kernel.org/r/ajfsei3keh4jjasd4lshjicgqixew7bak3cmty3suoliskzgz4@vj3ijycfxy4i Signed-off-by: Dmitry Torokhov --- include/linux/tca6416_keypad.h | 30 ------------------------------ 1 file changed, 30 deletions(-) delete mode 100644 include/linux/tca6416_keypad.h (limited to 'include/linux') diff --git a/include/linux/tca6416_keypad.h b/include/linux/tca6416_keypad.h deleted file mode 100644 index 5cf6f6f82aa7..000000000000 --- a/include/linux/tca6416_keypad.h +++ /dev/null @@ -1,30 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * tca6416 keypad platform support - * - * Copyright (C) 2010 Texas Instruments - * - * Author: Sriramakrishnan - */ - -#ifndef _TCA6416_KEYS_H -#define _TCA6416_KEYS_H - -#include - -struct tca6416_button { - /* Configuration parameters */ - int code; /* input event code (KEY_*, SW_*) */ - int active_low; - int type; /* input event type (EV_KEY, EV_SW) */ -}; - -struct tca6416_keys_platform_data { - struct tca6416_button *buttons; - int nbuttons; - unsigned int rep:1; /* enable input subsystem auto repeat */ - uint16_t pinmask; - uint16_t invert; - int use_polling; /* use polling if Interrupt is not connected*/ -}; -#endif -- cgit v1.2.3 From dab32f2576a39d5f54f3dbbbc718d92fa5109ce9 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Thu, 7 Aug 2025 15:55:39 +0200 Subject: s390/pci: Use pci_uevent_ers() in PCI recovery Issue uevents on s390 during PCI recovery using pci_uevent_ers() as done by EEH and AER PCIe recovery routines. Signed-off-by: Niklas Schnelle Signed-off-by: Bjorn Helgaas Reviewed-by: Lukas Wunner Link: https://patch.msgid.link/20250807-add_err_uevents-v5-2-adf85b0620b0@linux.ibm.com --- arch/s390/pci/pci_event.c | 3 +++ drivers/pci/pci-driver.c | 2 +- include/linux/pci.h | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index d930416d4c90..b95376041501 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -88,6 +88,7 @@ static pci_ers_result_t zpci_event_notify_error_detected(struct pci_dev *pdev, pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT; ers_res = driver->err_handler->error_detected(pdev, pdev->error_state); + pci_uevent_ers(pdev, ers_res); if (ers_result_indicates_abort(ers_res)) pr_info("%s: Automatic recovery failed after initial reporting\n", pci_name(pdev)); else if (ers_res == PCI_ERS_RESULT_NEED_RESET) @@ -244,6 +245,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) ers_res = PCI_ERS_RESULT_RECOVERED; if (ers_res != PCI_ERS_RESULT_RECOVERED) { + pci_uevent_ers(pdev, PCI_ERS_RESULT_DISCONNECT); pr_err("%s: Automatic recovery failed; operator intervention is required\n", pci_name(pdev)); status_str = "failed (driver can't recover)"; @@ -253,6 +255,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) pr_info("%s: The device is ready to resume operations\n", pci_name(pdev)); if (driver->err_handler->resume) driver->err_handler->resume(pdev); + pci_uevent_ers(pdev, PCI_ERS_RESULT_RECOVERED); out_unlock: pci_dev_unlock(pdev); zpci_report_status(zdev, "recovery", status_str); diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 6405acdb5d0f..302d61783f6c 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -1582,7 +1582,7 @@ static int pci_uevent(const struct device *dev, struct kobj_uevent_env *env) return 0; } -#if defined(CONFIG_PCIEAER) || defined(CONFIG_EEH) +#if defined(CONFIG_PCIEAER) || defined(CONFIG_EEH) || defined(CONFIG_S390) /** * pci_uevent_ers - emit a uevent during recovery path of PCI device * @pdev: PCI device undergoing error recovery diff --git a/include/linux/pci.h b/include/linux/pci.h index 59876de13860..7735acf6f349 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2764,7 +2764,7 @@ static inline bool pci_is_thunderbolt_attached(struct pci_dev *pdev) return false; } -#if defined(CONFIG_PCIEPORTBUS) || defined(CONFIG_EEH) +#if defined(CONFIG_PCIEPORTBUS) || defined(CONFIG_EEH) || defined(CONFIG_S390) void pci_uevent_ers(struct pci_dev *pdev, enum pci_ers_result err_type); #endif -- cgit v1.2.3 From f39494089aaa1022008eee245fb83ef1ae911b6d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 15 Jul 2025 21:09:13 -0700 Subject: srcu: Move rcu_is_watching() checks to srcu_read_{,un}lock_fast() The rcu_is_watching() warnings are currently in the SRCU-tree implementations of __srcu_read_lock_fast() and __srcu_read_unlock_fast(). However, this makes it difficult to create _notrace variants of srcu_read_lock_fast() and srcu_read_unlock_fast(). This commit therefore moves these checks to srcu_read_lock_fast(), srcu_read_unlock_fast(), srcu_down_read_fast(), and srcu_up_read_fast(). Signed-off-by: Paul E. McKenney Reviewed-by: Joel Fernandes Cc: Mathieu Desnoyers Cc: Steven Rostedt Cc: Sebastian Andrzej Siewior Cc: --- include/linux/srcu.h | 4 ++++ include/linux/srcutree.h | 2 -- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index f179700fecaf..478c73d067f7 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -275,6 +275,7 @@ static inline struct srcu_ctr __percpu *srcu_read_lock_fast(struct srcu_struct * { struct srcu_ctr __percpu *retval; + RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_fast()."); srcu_check_read_flavor_force(ssp, SRCU_READ_FLAVOR_FAST); retval = __srcu_read_lock_fast(ssp); rcu_try_lock_acquire(&ssp->dep_map); @@ -295,6 +296,7 @@ static inline struct srcu_ctr __percpu *srcu_read_lock_fast(struct srcu_struct * static inline struct srcu_ctr __percpu *srcu_down_read_fast(struct srcu_struct *ssp) __acquires(ssp) { WARN_ON_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && in_nmi()); + RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_down_read_fast()."); srcu_check_read_flavor_force(ssp, SRCU_READ_FLAVOR_FAST); return __srcu_read_lock_fast(ssp); } @@ -389,6 +391,7 @@ static inline void srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ct srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_FAST); srcu_lock_release(&ssp->dep_map); __srcu_read_unlock_fast(ssp, scp); + RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_fast()."); } /** @@ -405,6 +408,7 @@ static inline void srcu_up_read_fast(struct srcu_struct *ssp, struct srcu_ctr __ WARN_ON_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && in_nmi()); srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_FAST); __srcu_read_unlock_fast(ssp, scp); + RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_up_read_fast()."); } /** diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index bf44d8d1e69e..043b5a67ef71 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -244,7 +244,6 @@ static inline struct srcu_ctr __percpu *__srcu_read_lock_fast(struct srcu_struct { struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp); - RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_fast()."); if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE)) this_cpu_inc(scp->srcu_locks.counter); /* Y */ else @@ -275,7 +274,6 @@ static inline void __srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ this_cpu_inc(scp->srcu_unlocks.counter); /* Z */ else atomic_long_inc(raw_cpu_ptr(&scp->srcu_unlocks)); /* Z */ - RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_fast()."); } void __srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor); -- cgit v1.2.3 From 7e2a2d060da4860af37e1000dc62a30a1551d9e8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 16 Jul 2025 09:12:16 -0700 Subject: srcu: Add srcu_read_lock_fast_notrace() and srcu_read_unlock_fast_notrace() This commit adds no-trace variants of the srcu_read_lock_fast() and srcu_read_unlock_fast() functions for tracing use. [ paulmck: Apply notrace feedback from Joel Fernandes, Steven Rostedt, and Mathieu Desnoyers. ] [ paulmck: Apply excess-notrace feedback from Boqun Feng. ] Link: https://lore.kernel.org/all/20250721162433.10454-1-paulmck@kernel.org Signed-off-by: Paul E. McKenney Reviewed-by: Joel Fernandes Cc: Mathieu Desnoyers Cc: Steven Rostedt Cc: Sebastian Andrzej Siewior Cc: --- include/linux/srcu.h | 25 +++++++++++++++++++++++++ include/linux/srcutree.h | 5 +++-- 2 files changed, 28 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 478c73d067f7..7a692bf8f99b 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -282,6 +282,20 @@ static inline struct srcu_ctr __percpu *srcu_read_lock_fast(struct srcu_struct * return retval; } +/* + * Used by tracing, cannot be traced and cannot call lockdep. + * See srcu_read_lock_fast() for more information. + */ +static inline struct srcu_ctr __percpu *srcu_read_lock_fast_notrace(struct srcu_struct *ssp) + __acquires(ssp) +{ + struct srcu_ctr __percpu *retval; + + srcu_check_read_flavor_force(ssp, SRCU_READ_FLAVOR_FAST); + retval = __srcu_read_lock_fast(ssp); + return retval; +} + /** * srcu_down_read_fast - register a new reader for an SRCU-protected structure. * @ssp: srcu_struct in which to register the new reader. @@ -394,6 +408,17 @@ static inline void srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ct RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_fast()."); } +/* + * Used by tracing, cannot be traced and cannot call lockdep. + * See srcu_read_unlock_fast() for more information. + */ +static inline void srcu_read_unlock_fast_notrace(struct srcu_struct *ssp, + struct srcu_ctr __percpu *scp) __releases(ssp) +{ + srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_FAST); + __srcu_read_unlock_fast(ssp, scp); +} + /** * srcu_up_read_fast - unregister a old reader from an SRCU-protected structure. * @ssp: srcu_struct in which to unregister the old reader. diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 043b5a67ef71..4d2fee4d3828 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -240,7 +240,7 @@ static inline struct srcu_ctr __percpu *__srcu_ctr_to_ptr(struct srcu_struct *ss * on architectures that support NMIs but do not supply NMI-safe * implementations of this_cpu_inc(). */ -static inline struct srcu_ctr __percpu *__srcu_read_lock_fast(struct srcu_struct *ssp) +static inline struct srcu_ctr __percpu notrace *__srcu_read_lock_fast(struct srcu_struct *ssp) { struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp); @@ -267,7 +267,8 @@ static inline struct srcu_ctr __percpu *__srcu_read_lock_fast(struct srcu_struct * on architectures that support NMIs but do not supply NMI-safe * implementations of this_cpu_inc(). */ -static inline void __srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp) +static inline void notrace +__srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp) { barrier(); /* Avoid leaking the critical section. */ if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE)) -- cgit v1.2.3 From cacadb630375b8c30ca4d0300812178bb884c0b0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 16 Jul 2025 09:19:39 -0700 Subject: srcu: Add guards for notrace variants of SRCU-fast readers This adds the usual scoped_guard(srcu_fast_notrace, &my_srcu) and guard(srcu_fast_notrace)(&my_srcu). Signed-off-by: Paul E. McKenney Reviewed-by: Joel Fernandes Cc: Mathieu Desnoyers Cc: Steven Rostedt Cc: Sebastian Andrzej Siewior Cc: --- include/linux/srcu.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 7a692bf8f99b..ada65b58bc4c 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -515,4 +515,9 @@ DEFINE_LOCK_GUARD_1(srcu_fast, struct srcu_struct, srcu_read_unlock_fast(_T->lock, _T->scp), struct srcu_ctr __percpu *scp) +DEFINE_LOCK_GUARD_1(srcu_fast_notrace, struct srcu_struct, + _T->scp = srcu_read_lock_fast_notrace(_T->lock), + srcu_read_unlock_fast_notrace(_T->lock, _T->scp), + struct srcu_ctr __percpu *scp) + #endif -- cgit v1.2.3 From 33cfb80d1910b41d1a25cef89b159c945aff0f24 Mon Sep 17 00:00:00 2001 From: Ashish Kalra Date: Mon, 21 Jul 2025 14:13:10 +0000 Subject: crypto: ccp - Add support for SNP_FEATURE_INFO command The FEATURE_INFO command provides hypervisors with a programmatic means to learn about the supported features of the currently loaded firmware. This command mimics the CPUID instruction relative to sub-leaf input and the four unsigned integer output values. To obtain information regarding the features present in the currently loaded SEV firmware, use the SNP_FEATURE_INFO command. Cache the SNP platform status and feature information from CPUID 0x8000_0024 in the sev_device structure. If SNP is enabled, utilize this cached SNP platform status for the API major, minor and build version. Reviewed-by: Tom Lendacky Signed-off-by: Ashish Kalra Reviewed-by: Kim Phillips Signed-off-by: Herbert Xu --- drivers/crypto/ccp/sev-dev.c | 72 ++++++++++++++++++++++++++++++++++++++++++++ drivers/crypto/ccp/sev-dev.h | 3 ++ include/linux/psp-sev.h | 29 ++++++++++++++++++ 3 files changed, 104 insertions(+) (limited to 'include/linux') diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index 528013be1c0a..a3941254d61f 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -233,6 +233,7 @@ static int sev_cmd_buffer_len(int cmd) case SEV_CMD_SNP_GUEST_REQUEST: return sizeof(struct sev_data_snp_guest_request); case SEV_CMD_SNP_CONFIG: return sizeof(struct sev_user_data_snp_config); case SEV_CMD_SNP_COMMIT: return sizeof(struct sev_data_snp_commit); + case SEV_CMD_SNP_FEATURE_INFO: return sizeof(struct sev_data_snp_feature_info); default: return 0; } @@ -1073,6 +1074,67 @@ static void snp_set_hsave_pa(void *arg) wrmsrq(MSR_VM_HSAVE_PA, 0); } +static int snp_get_platform_data(struct sev_device *sev, int *error) +{ + struct sev_data_snp_feature_info snp_feat_info; + struct snp_feature_info *feat_info; + struct sev_data_snp_addr buf; + struct page *page; + int rc; + + /* + * This function is expected to be called before SNP is + * initialized. + */ + if (sev->snp_initialized) + return -EINVAL; + + buf.address = __psp_pa(&sev->snp_plat_status); + rc = sev_do_cmd(SEV_CMD_SNP_PLATFORM_STATUS, &buf, error); + if (rc) { + dev_err(sev->dev, "SNP PLATFORM_STATUS command failed, ret = %d, error = %#x\n", + rc, *error); + return rc; + } + + sev->api_major = sev->snp_plat_status.api_major; + sev->api_minor = sev->snp_plat_status.api_minor; + sev->build = sev->snp_plat_status.build_id; + + /* + * Do feature discovery of the currently loaded firmware, + * and cache feature information from CPUID 0x8000_0024, + * sub-function 0. + */ + if (!sev->snp_plat_status.feature_info) + return 0; + + /* + * Use dynamically allocated structure for the SNP_FEATURE_INFO + * command to ensure structure is 8-byte aligned, and does not + * cross a page boundary. + */ + page = alloc_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + feat_info = page_address(page); + snp_feat_info.length = sizeof(snp_feat_info); + snp_feat_info.ecx_in = 0; + snp_feat_info.feature_info_paddr = __psp_pa(feat_info); + + rc = sev_do_cmd(SEV_CMD_SNP_FEATURE_INFO, &snp_feat_info, error); + if (!rc) + sev->snp_feat_info_0 = *feat_info; + else + dev_err(sev->dev, "SNP FEATURE_INFO command failed, ret = %d, error = %#x\n", + rc, *error); + + __free_page(page); + + return rc; +} + static int snp_filter_reserved_mem_regions(struct resource *rs, void *arg) { struct sev_data_range_list *range_list = arg; @@ -1599,6 +1661,16 @@ static int sev_get_api_version(void) struct sev_user_data_status status; int error = 0, ret; + /* + * Cache SNP platform status and SNP feature information + * if SNP is available. + */ + if (cc_platform_has(CC_ATTR_HOST_SEV_SNP)) { + ret = snp_get_platform_data(sev, &error); + if (ret) + return 1; + } + ret = sev_platform_status(&status, &error); if (ret) { dev_err(sev->dev, diff --git a/drivers/crypto/ccp/sev-dev.h b/drivers/crypto/ccp/sev-dev.h index 24dd8ff8afaa..5aed2595c9ae 100644 --- a/drivers/crypto/ccp/sev-dev.h +++ b/drivers/crypto/ccp/sev-dev.h @@ -58,6 +58,9 @@ struct sev_device { bool snp_initialized; struct sev_user_data_status sev_plat_status; + + struct sev_user_data_snp_status snp_plat_status; + struct snp_feature_info snp_feat_info_0; }; int sev_dev_init(struct psp_device *psp); diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index 0f5f94137f6d..5fb6ae0f51cc 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -107,6 +107,7 @@ enum sev_cmd { SEV_CMD_SNP_DOWNLOAD_FIRMWARE_EX = 0x0CA, SEV_CMD_SNP_COMMIT = 0x0CB, SEV_CMD_SNP_VLEK_LOAD = 0x0CD, + SEV_CMD_SNP_FEATURE_INFO = 0x0CE, SEV_CMD_MAX, }; @@ -814,6 +815,34 @@ struct sev_data_snp_commit { u32 len; } __packed; +/** + * struct sev_data_snp_feature_info - SEV_SNP_FEATURE_INFO structure + * + * @length: len of the command buffer read by the PSP + * @ecx_in: subfunction index + * @feature_info_paddr : System Physical Address of the FEATURE_INFO structure + */ +struct sev_data_snp_feature_info { + u32 length; + u32 ecx_in; + u64 feature_info_paddr; +} __packed; + +/** + * struct feature_info - FEATURE_INFO structure + * + * @eax: output of SNP_FEATURE_INFO command + * @ebx: output of SNP_FEATURE_INFO command + * @ecx: output of SNP_FEATURE_INFO command + * #edx: output of SNP_FEATURE_INFO command + */ +struct snp_feature_info { + u32 eax; + u32 ebx; + u32 ecx; + u32 edx; +} __packed; + #ifdef CONFIG_CRYPTO_DEV_SP_PSP /** -- cgit v1.2.3 From 45d59bd4a3e0f0475b3646e8b9936d34794e503d Mon Sep 17 00:00:00 2001 From: Ashish Kalra Date: Mon, 21 Jul 2025 14:13:27 +0000 Subject: crypto: ccp - Introduce new API interface to indicate SEV-SNP Ciphertext hiding feature Implement an API that checks the overall feature support for SEV-SNP ciphertext hiding. This API verifies both the support of the SEV firmware for the feature and its enablement in the platform's BIOS. Reviewed-by: Tom Lendacky Signed-off-by: Ashish Kalra Reviewed-by: Kim Phillips Signed-off-by: Herbert Xu --- drivers/crypto/ccp/sev-dev.c | 21 +++++++++++++++++++++ include/linux/psp-sev.h | 5 +++++ 2 files changed, 26 insertions(+) (limited to 'include/linux') diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index a3941254d61f..58c9e040e9ac 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -1074,6 +1074,27 @@ static void snp_set_hsave_pa(void *arg) wrmsrq(MSR_VM_HSAVE_PA, 0); } +bool sev_is_snp_ciphertext_hiding_supported(void) +{ + struct psp_device *psp = psp_master; + struct sev_device *sev; + + if (!psp || !psp->sev_data) + return false; + + sev = psp->sev_data; + + /* + * Feature information indicates if CipherTextHiding feature is + * supported by the SEV firmware and additionally platform status + * indicates if CipherTextHiding feature is enabled in the + * Platform BIOS. + */ + return ((sev->snp_feat_info_0.ecx & SNP_CIPHER_TEXT_HIDING_SUPPORTED) && + sev->snp_plat_status.ciphertext_hiding_cap); +} +EXPORT_SYMBOL_GPL(sev_is_snp_ciphertext_hiding_supported); + static int snp_get_platform_data(struct sev_device *sev, int *error) { struct sev_data_snp_feature_info snp_feat_info; diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index 5fb6ae0f51cc..d83185b4268b 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -843,6 +843,8 @@ struct snp_feature_info { u32 edx; } __packed; +#define SNP_CIPHER_TEXT_HIDING_SUPPORTED BIT(3) + #ifdef CONFIG_CRYPTO_DEV_SP_PSP /** @@ -986,6 +988,7 @@ void *psp_copy_user_blob(u64 uaddr, u32 len); void *snp_alloc_firmware_page(gfp_t mask); void snp_free_firmware_page(void *addr); void sev_platform_shutdown(void); +bool sev_is_snp_ciphertext_hiding_supported(void); #else /* !CONFIG_CRYPTO_DEV_SP_PSP */ @@ -1022,6 +1025,8 @@ static inline void snp_free_firmware_page(void *addr) { } static inline void sev_platform_shutdown(void) { } +static inline bool sev_is_snp_ciphertext_hiding_supported(void) { return false; } + #endif /* CONFIG_CRYPTO_DEV_SP_PSP */ #endif /* __PSP_SEV_H__ */ -- cgit v1.2.3 From c9760b0fca6bfa250c02e14bfe81c542f3626a72 Mon Sep 17 00:00:00 2001 From: Ashish Kalra Date: Mon, 21 Jul 2025 14:13:55 +0000 Subject: crypto: ccp - Add support to enable CipherTextHiding on SNP_INIT_EX To enable ciphertext hiding, it must be specified in the SNP_INIT_EX command as part of SNP initialization. Modify the sev_platform_init_args structure, which is used as input to sev_platform_init(), to include a field that, when non-zero, indicates that ciphertext hiding should be enabled and specifies the maximum ASID that can be used for an SEV-SNP guest. Reviewed-by: Tom Lendacky Signed-off-by: Ashish Kalra Reviewed-by: Kim Phillips Signed-off-by: Herbert Xu --- drivers/crypto/ccp/sev-dev.c | 12 +++++++++--- include/linux/psp-sev.h | 10 ++++++++-- 2 files changed, 17 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index 58c9e040e9ac..334405461657 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -1186,7 +1186,7 @@ static int snp_filter_reserved_mem_regions(struct resource *rs, void *arg) return 0; } -static int __sev_snp_init_locked(int *error) +static int __sev_snp_init_locked(int *error, unsigned int max_snp_asid) { struct psp_device *psp = psp_master; struct sev_data_snp_init_ex data; @@ -1247,6 +1247,12 @@ static int __sev_snp_init_locked(int *error) } memset(&data, 0, sizeof(data)); + + if (max_snp_asid) { + data.ciphertext_hiding_en = 1; + data.max_snp_asid = max_snp_asid; + } + data.init_rmp = 1; data.list_paddr_en = 1; data.list_paddr = __psp_pa(snp_range_list); @@ -1433,7 +1439,7 @@ static int _sev_platform_init_locked(struct sev_platform_init_args *args) if (sev->sev_plat_status.state == SEV_STATE_INIT) return 0; - rc = __sev_snp_init_locked(&args->error); + rc = __sev_snp_init_locked(&args->error, args->max_snp_asid); if (rc && rc != -ENODEV) return rc; @@ -1516,7 +1522,7 @@ static int snp_move_to_init_state(struct sev_issue_cmd *argp, bool *shutdown_req { int error, rc; - rc = __sev_snp_init_locked(&error); + rc = __sev_snp_init_locked(&error, 0); if (rc) { argp->error = SEV_RET_INVALID_PLATFORM_STATE; return rc; diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index d83185b4268b..e0dbcb4b4fd9 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -748,10 +748,13 @@ struct sev_data_snp_guest_request { struct sev_data_snp_init_ex { u32 init_rmp:1; u32 list_paddr_en:1; - u32 rsvd:30; + u32 rapl_dis:1; + u32 ciphertext_hiding_en:1; + u32 rsvd:28; u32 rsvd1; u64 list_paddr; - u8 rsvd2[48]; + u16 max_snp_asid; + u8 rsvd2[46]; } __packed; /** @@ -800,10 +803,13 @@ struct sev_data_snp_shutdown_ex { * @probe: True if this is being called as part of CCP module probe, which * will defer SEV_INIT/SEV_INIT_EX firmware initialization until needed * unless psp_init_on_probe module param is set + * @max_snp_asid: When non-zero, enable ciphertext hiding and specify the + * maximum ASID that can be used for an SEV-SNP guest. */ struct sev_platform_init_args { int error; bool probe; + unsigned int max_snp_asid; }; /** -- cgit v1.2.3 From b76c739c3d11d1dacc8efe7fa873bee28ac991f1 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Tue, 22 Jul 2025 16:52:38 -0500 Subject: iio: fix iio_push_to_buffers_with_ts() typo Replace iio_push_to_buffer_with_ts() with iio_push_to_buffers_with_ts() in some documentation comments in iio.h. The latter is the correct name of the function, the former doesn't exist. Signed-off-by: David Lechner Link: https://patch.msgid.link/20250722-iio-fix-iio_push_to_buffer_with_ts-typo-v1-1-6ac9efb856d3@baylibre.com Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index d11668f14a3e..2f5560646ee4 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -779,7 +779,7 @@ static inline void *iio_device_get_drvdata(const struct iio_dev *indio_dev) * them safe for use with non-coherent DMA. * * A number of drivers also use this on buffers that include a 64-bit timestamp - * that is used with iio_push_to_buffer_with_ts(). Therefore, in the case where + * that is used with iio_push_to_buffers_with_ts(). Therefore, in the case where * DMA alignment is not sufficient for proper timestamp alignment, we align to * 8 bytes instead. */ @@ -794,7 +794,7 @@ static inline void *iio_device_get_drvdata(const struct iio_dev *indio_dev) * @name: identifier name of the buffer * @count: number of elements in the buffer * - * Declares a buffer that is safe to use with iio_push_to_buffer_with_ts(). In + * Declares a buffer that is safe to use with iio_push_to_buffers_with_ts(). In * addition to allocating enough space for @count elements of @type, it also * allocates space for a s64 timestamp at the end of the buffer and ensures * proper alignment of the timestamp. -- cgit v1.2.3 From 4847d1187402a5027d9a04393f12d52a5a1d7f98 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Thu, 14 Aug 2025 09:24:41 +0200 Subject: console: introduce console_lock guard()s Having this, guards like these work: guard(console_lock)(); or scoped_guard(console_lock) { ... } See e.g. "vc_screen: use guard()s" later in this series. Signed-off-by: "Jiri Slaby (SUSE)" Link: https://lore.kernel.org/r/20250814072456.182853-2-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/console.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index 8f10d0a85bb4..031a58dc2b91 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -666,6 +666,8 @@ void vcs_remove_sysfs(int index); */ extern atomic_t ignore_console_lock_warning; +DEFINE_LOCK_GUARD_0(console_lock, console_lock(), console_unlock()); + extern void console_init(void); /* For deferred console takeover */ -- cgit v1.2.3 From e8398b8aed50382c21fcec77e80a5314e7c45c25 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Thu, 14 Aug 2025 09:24:42 +0200 Subject: tty: introduce tty_port_tty guard() Having this, guards like these work: scoped_guard(tty_port_tty, port) tty_wakeup(scoped_tty()); See e.g. "tty_port: use scoped_guard()" later in this series. The definitions depend on CONFIG_TTY. It's due to tty_kref_put(). On !CONFIG_TTY, it is an inline and its declaration would conflict. The guards are not needed in that case, of course. Signed-off-by: "Jiri Slaby (SUSE)" Link: https://lore.kernel.org/r/20250814072456.182853-3-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/tty_port.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tty_port.h b/include/linux/tty_port.h index 332ddb93603e..660c254f1efe 100644 --- a/include/linux/tty_port.h +++ b/include/linux/tty_port.h @@ -270,4 +270,18 @@ static inline void tty_port_tty_vhangup(struct tty_port *port) __tty_port_tty_hangup(port, false, false); } +#ifdef CONFIG_TTY +void tty_kref_put(struct tty_struct *tty); +__DEFINE_CLASS_IS_CONDITIONAL(tty_port_tty, true); +__DEFINE_UNLOCK_GUARD(tty_port_tty, struct tty_struct, tty_kref_put(_T->lock)); +static inline class_tty_port_tty_t class_tty_port_tty_constructor(struct tty_port *tport) +{ + class_tty_port_tty_t _t = { + .lock = tty_port_tty_get(tport), + }; + return _t; +} +#define scoped_tty() ((struct tty_struct *)(__guard_ptr(tty_port_tty)(&scope))) +#endif + #endif -- cgit v1.2.3 From 0fd60b689b0dacce659253ec15cb3d3bf660e30b Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Thu, 14 Aug 2025 09:24:43 +0200 Subject: serial: introduce uart_port_lock() guard()s Having this, guards like these work: guard(uart_port_lock_irq)(&up->port); or scoped_guard(uart_port_lock_irqsave, port) { ... } See e.g. "serial: 8250: use guard()s" later in this series. Signed-off-by: "Jiri Slaby (SUSE)" Link: https://lore.kernel.org/r/20250814072456.182853-4-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 84b4648ead7e..666430b47899 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -788,6 +788,19 @@ static inline void uart_port_unlock_irqrestore(struct uart_port *up, unsigned lo spin_unlock_irqrestore(&up->lock, flags); } +DEFINE_GUARD(uart_port_lock, struct uart_port *, uart_port_lock(_T), uart_port_unlock(_T)); +DEFINE_GUARD_COND(uart_port_lock, _try, uart_port_trylock(_T)); + +DEFINE_GUARD(uart_port_lock_irq, struct uart_port *, uart_port_lock_irq(_T), + uart_port_unlock_irq(_T)); + +DEFINE_LOCK_GUARD_1(uart_port_lock_irqsave, struct uart_port, + uart_port_lock_irqsave(_T->lock, &_T->flags), + uart_port_unlock_irqrestore(_T->lock, _T->flags), + unsigned long flags); +DEFINE_LOCK_GUARD_1_COND(uart_port_lock_irqsave, _try, + uart_port_trylock_irqsave(_T->lock, &_T->flags)); + static inline int serial_port_in(struct uart_port *up, int offset) { return up->serial_in(up, offset); -- cgit v1.2.3 From 894af4a1cde61c3401f237184fb770f72ff12df8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 12 Apr 2025 13:56:01 +0200 Subject: objtool: Validate kCFI calls Validate that all indirect calls adhere to kCFI rules. Notably doing nocfi indirect call to a cfi function is broken. Apparently some Rust 'core' code violates this and explodes when ran with FineIBT. All the ANNOTATE_NOCFI_SYM sites are prime targets for attackers. - runtime EFI is especially henous because it also needs to disable IBT. Basically calling unknown code without CFI protection at runtime is a massice security issue. - Kexec image handover; if you can exploit this, you get to keep it :-) Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Acked-by: Sean Christopherson Link: https://lkml.kernel.org/r/20250714103441.496787279@infradead.org --- arch/x86/kernel/machine_kexec_64.c | 4 ++++ arch/x86/kvm/vmx/vmenter.S | 4 ++++ arch/x86/platform/efi/efi_stub_64.S | 4 ++++ drivers/misc/lkdtm/perms.c | 5 +++++ include/linux/objtool.h | 10 +++++++++ include/linux/objtool_types.h | 1 + tools/include/linux/objtool_types.h | 1 + tools/objtool/check.c | 42 +++++++++++++++++++++++++++++++++++++ tools/objtool/include/objtool/elf.h | 1 + 9 files changed, 72 insertions(+) (limited to 'include/linux') diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 697fb99406e6..8593760c255a 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -453,6 +453,10 @@ void __nocfi machine_kexec(struct kimage *image) __ftrace_enabled_restore(save_ftrace_enabled); } +/* + * Handover to the next kernel, no CFI concern. + */ +ANNOTATE_NOCFI_SYM(machine_kexec); /* arch-dependent functionality related to kexec file-based syscall */ diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 0a6cf5bff2aa..bc255d709d8a 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -361,6 +361,10 @@ SYM_FUNC_END(vmread_error_trampoline) .section .text, "ax" +#ifndef CONFIG_X86_FRED + SYM_FUNC_START(vmx_do_interrupt_irqoff) VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1 SYM_FUNC_END(vmx_do_interrupt_irqoff) + +#endif diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index 2206b8bc47b8..f0a5fba0717e 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S @@ -11,6 +11,10 @@ #include SYM_FUNC_START(__efi_call) + /* + * The EFI code doesn't have any CFI, annotate away the CFI violation. + */ + ANNOTATE_NOCFI_SYM pushq %rbp movq %rsp, %rbp and $~0xf, %rsp diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c index 6c24426104ba..e1f5e9abb301 100644 --- a/drivers/misc/lkdtm/perms.c +++ b/drivers/misc/lkdtm/perms.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -86,6 +87,10 @@ static noinline __nocfi void execute_location(void *dst, bool write) func(); pr_err("FAIL: func returned\n"); } +/* + * Explicitly doing the wrong thing for testing. + */ +ANNOTATE_NOCFI_SYM(execute_location); static void execute_user_location(void *dst) { diff --git a/include/linux/objtool.h b/include/linux/objtool.h index 366ad004d794..46ebaa46e6c5 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -184,6 +184,15 @@ * WARN using UD2. */ #define ANNOTATE_REACHABLE(label) __ASM_ANNOTATE(label, ANNOTYPE_REACHABLE) +/* + * This should not be used; it annotates away CFI violations. There are a few + * valid use cases like kexec handover to the next kernel image, and there is + * no security concern there. + * + * There are also a few real issues annotated away, like EFI because we can't + * control the EFI code. + */ +#define ANNOTATE_NOCFI_SYM(sym) asm(__ASM_ANNOTATE(sym, ANNOTYPE_NOCFI)) #else #define ANNOTATE_NOENDBR ANNOTATE type=ANNOTYPE_NOENDBR @@ -194,6 +203,7 @@ #define ANNOTATE_INTRA_FUNCTION_CALL ANNOTATE type=ANNOTYPE_INTRA_FUNCTION_CALL #define ANNOTATE_UNRET_BEGIN ANNOTATE type=ANNOTYPE_UNRET_BEGIN #define ANNOTATE_REACHABLE ANNOTATE type=ANNOTYPE_REACHABLE +#define ANNOTATE_NOCFI_SYM ANNOTATE type=ANNOTYPE_NOCFI #endif #if defined(CONFIG_NOINSTR_VALIDATION) && \ diff --git a/include/linux/objtool_types.h b/include/linux/objtool_types.h index df5d9fa84dba..aceac94632c8 100644 --- a/include/linux/objtool_types.h +++ b/include/linux/objtool_types.h @@ -65,5 +65,6 @@ struct unwind_hint { #define ANNOTYPE_IGNORE_ALTS 6 #define ANNOTYPE_INTRA_FUNCTION_CALL 7 #define ANNOTYPE_REACHABLE 8 +#define ANNOTYPE_NOCFI 9 #endif /* _LINUX_OBJTOOL_TYPES_H */ diff --git a/tools/include/linux/objtool_types.h b/tools/include/linux/objtool_types.h index df5d9fa84dba..aceac94632c8 100644 --- a/tools/include/linux/objtool_types.h +++ b/tools/include/linux/objtool_types.h @@ -65,5 +65,6 @@ struct unwind_hint { #define ANNOTYPE_IGNORE_ALTS 6 #define ANNOTYPE_INTRA_FUNCTION_CALL 7 #define ANNOTYPE_REACHABLE 8 +#define ANNOTYPE_NOCFI 9 #endif /* _LINUX_OBJTOOL_TYPES_H */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index d14f20ef1db1..79eab61cd944 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2392,6 +2392,8 @@ static int __annotate_ifc(struct objtool_file *file, int type, struct instructio static int __annotate_late(struct objtool_file *file, int type, struct instruction *insn) { + struct symbol *sym; + switch (type) { case ANNOTYPE_NOENDBR: /* early */ @@ -2433,6 +2435,15 @@ static int __annotate_late(struct objtool_file *file, int type, struct instructi insn->dead_end = false; break; + case ANNOTYPE_NOCFI: + sym = insn->sym; + if (!sym) { + ERROR_INSN(insn, "dodgy NOCFI annotation"); + return -1; + } + insn->sym->nocfi = 1; + break; + default: ERROR_INSN(insn, "Unknown annotation type: %d", type); return -1; @@ -4002,6 +4013,37 @@ static int validate_retpoline(struct objtool_file *file) warnings++; } + if (!opts.cfi) + return warnings; + + /* + * kCFI call sites look like: + * + * movl $(-0x12345678), %r10d + * addl -4(%r11), %r10d + * jz 1f + * ud2 + * 1: cs call __x86_indirect_thunk_r11 + * + * Verify all indirect calls are kCFI adorned by checking for the + * UD2. Notably, doing __nocfi calls to regular (cfi) functions is + * broken. + */ + list_for_each_entry(insn, &file->retpoline_call_list, call_node) { + struct symbol *sym = insn->sym; + + if (sym && (sym->type == STT_NOTYPE || + sym->type == STT_FUNC) && !sym->nocfi) { + struct instruction *prev = + prev_insn_same_sym(file, insn); + + if (!prev || prev->type != INSN_BUG) { + WARN_INSN(insn, "no-cfi indirect call!"); + warnings++; + } + } + } + return warnings; } diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index 0a2fa3ac0079..df8434d3b744 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -70,6 +70,7 @@ struct symbol { u8 local_label : 1; u8 frame_pointer : 1; u8 ignore : 1; + u8 nocfi : 1; struct list_head pv_target; struct reloc *relocs; struct section *group_sec; -- cgit v1.2.3 From 807221d3c5ff6e3c91ff57bc82a0b7a541462e20 Mon Sep 17 00:00:00 2001 From: Ricky Wu Date: Tue, 12 Aug 2025 14:35:21 +0800 Subject: misc: rtsx_pci: Add separate CD/WP pin polarity reversal support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, the Card Detect (CD) and Write Protect (WP) pins shared the same reverse polarity setting in the configuration space. This meant both signals were reversed together, without the ability to configure them individually. This patch introduces two new parameters: sd_cd_reverse_en – enable reverse polarity for the CD pin. sd_wp_reverse_en – enable reverse polarity for the WP pin. With this change, the controller can now support: 1.Reversing both CD and WP pins together (original behavior). 2.Reversing CD and WP pins separately (newly added behavior), if supported by the configuration space. This provides greater flexibility when dealing with devices that have independent polarity requirements for CD and WP pins. Signed-off-by: Ricky Wu Link: https://lore.kernel.org/r/20250812063521.2427696-1-ricky_wu@realtek.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/cardreader/rts5227.c | 13 ++++++++++--- drivers/misc/cardreader/rts5228.c | 12 ++++++++++-- drivers/misc/cardreader/rts5249.c | 16 +++++++++++++--- drivers/misc/cardreader/rts5264.c | 20 ++++++++++++++++---- drivers/misc/cardreader/rts5264.h | 1 + drivers/misc/cardreader/rtsx_pcr.h | 2 ++ include/linux/rtsx_pci.h | 2 ++ 7 files changed, 54 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/misc/cardreader/rts5227.c b/drivers/misc/cardreader/rts5227.c index cd512284bfb3..46444bb47f65 100644 --- a/drivers/misc/cardreader/rts5227.c +++ b/drivers/misc/cardreader/rts5227.c @@ -79,6 +79,10 @@ static void rts5227_fetch_vendor_settings(struct rtsx_pcr *pcr) pcr->sd30_drive_sel_3v3 = rtsx_reg_to_sd30_drive_sel_3v3(reg); if (rtsx_reg_check_reverse_socket(reg)) pcr->flags |= PCR_REVERSE_SOCKET; + if (rtsx_reg_check_cd_reverse(reg)) + pcr->option.sd_cd_reverse_en = 1; + if (rtsx_reg_check_wp_reverse(reg)) + pcr->option.sd_wp_reverse_en = 1; } static void rts5227_init_from_cfg(struct rtsx_pcr *pcr) @@ -127,8 +131,10 @@ static int rts5227_extra_init_hw(struct rtsx_pcr *pcr) /* Configure force_clock_req */ if (pcr->flags & PCR_REVERSE_SOCKET) rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0x30, 0x30); - else - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0x30, 0x00); + else { + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0x20, option->sd_cd_reverse_en << 5); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0x10, option->sd_wp_reverse_en << 4); + } if (CHK_PCI_PID(pcr, 0x522A)) rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, RTS522A_AUTOLOAD_CFG1, @@ -350,6 +356,8 @@ void rts5227_init_params(struct rtsx_pcr *pcr) pcr->ms_pull_ctl_disable_tbl = rts5227_ms_pull_ctl_disable_tbl; pcr->reg_pm_ctrl3 = PM_CTRL3; + pcr->option.sd_cd_reverse_en = 0; + pcr->option.sd_wp_reverse_en = 0; } static int rts522a_optimize_phy(struct rtsx_pcr *pcr) @@ -508,5 +516,4 @@ void rts522a_init_params(struct rtsx_pcr *pcr) pcr->hw_param.interrupt_en |= SD_OC_INT_EN; pcr->hw_param.ocp_glitch = SD_OCP_GLITCH_10M; pcr->option.sd_800mA_ocp_thd = RTS522A_OCP_THD_800; - } diff --git a/drivers/misc/cardreader/rts5228.c b/drivers/misc/cardreader/rts5228.c index 0c7f10bcf6f1..db7e735ac24f 100644 --- a/drivers/misc/cardreader/rts5228.c +++ b/drivers/misc/cardreader/rts5228.c @@ -84,6 +84,10 @@ static void rtsx5228_fetch_vendor_settings(struct rtsx_pcr *pcr) pcr->sd30_drive_sel_3v3 = rtsx_reg_to_sd30_drive_sel_3v3(reg); if (rtsx_reg_check_reverse_socket(reg)) pcr->flags |= PCR_REVERSE_SOCKET; + if (rtsx_reg_check_cd_reverse(reg)) + pcr->option.sd_cd_reverse_en = 1; + if (rtsx_reg_check_wp_reverse(reg)) + pcr->option.sd_wp_reverse_en = 1; } static int rts5228_optimize_phy(struct rtsx_pcr *pcr) @@ -432,8 +436,10 @@ static int rts5228_extra_init_hw(struct rtsx_pcr *pcr) if (pcr->flags & PCR_REVERSE_SOCKET) rtsx_pci_write_register(pcr, PETXCFG, 0x30, 0x30); - else - rtsx_pci_write_register(pcr, PETXCFG, 0x30, 0x00); + else { + rtsx_pci_write_register(pcr, PETXCFG, 0x20, option->sd_cd_reverse_en << 5); + rtsx_pci_write_register(pcr, PETXCFG, 0x10, option->sd_wp_reverse_en << 4); + } /* * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced @@ -720,4 +726,6 @@ void rts5228_init_params(struct rtsx_pcr *pcr) hw_param->interrupt_en |= SD_OC_INT_EN; hw_param->ocp_glitch = SD_OCP_GLITCH_800U; option->sd_800mA_ocp_thd = RTS5228_LDO1_OCP_THD_930; + option->sd_cd_reverse_en = 0; + option->sd_wp_reverse_en = 0; } diff --git a/drivers/misc/cardreader/rts5249.c b/drivers/misc/cardreader/rts5249.c index 6c81040e18be..38aefd8db452 100644 --- a/drivers/misc/cardreader/rts5249.c +++ b/drivers/misc/cardreader/rts5249.c @@ -60,6 +60,7 @@ static void rtsx_base_fetch_vendor_settings(struct rtsx_pcr *pcr) pci_read_config_dword(pdev, PCR_SETTING_REG1, ®); pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg); + pci_write_config_dword(pdev, 0x718, 0x0007C000); if (!rtsx_vendor_setting_valid(reg)) { pcr_dbg(pcr, "skip fetch vendor setting\n"); @@ -82,6 +83,10 @@ static void rtsx_base_fetch_vendor_settings(struct rtsx_pcr *pcr) pcr->sd30_drive_sel_3v3 = rtsx_reg_to_sd30_drive_sel_3v3(reg); if (rtsx_reg_check_reverse_socket(reg)) pcr->flags |= PCR_REVERSE_SOCKET; + if (rtsx_reg_check_cd_reverse(reg)) + pcr->option.sd_cd_reverse_en = 1; + if (rtsx_reg_check_wp_reverse(reg)) + pcr->option.sd_wp_reverse_en = 1; } static void rts5249_init_from_cfg(struct rtsx_pcr *pcr) @@ -254,9 +259,11 @@ static int rts5249_extra_init_hw(struct rtsx_pcr *pcr) /* Configure driving */ rts5249_fill_driving(pcr, OUTPUT_3V3); if (pcr->flags & PCR_REVERSE_SOCKET) - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0xB0, 0xB0); - else - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0xB0, 0x80); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0x30, 0x30); + else { + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0x20, option->sd_cd_reverse_en << 5); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0x10, option->sd_wp_reverse_en << 4); + } rtsx_pci_send_cmd(pcr, CMD_TIMEOUT_DEF); @@ -572,6 +579,9 @@ void rts5249_init_params(struct rtsx_pcr *pcr) option->ltr_l1off_sspwrgate = LTR_L1OFF_SSPWRGATE_5249_DEF; option->ltr_l1off_snooze_sspwrgate = LTR_L1OFF_SNOOZE_SSPWRGATE_5249_DEF; + + option->sd_cd_reverse_en = 0; + option->sd_wp_reverse_en = 0; } static int rts524a_write_phy(struct rtsx_pcr *pcr, u8 addr, u16 val) diff --git a/drivers/misc/cardreader/rts5264.c b/drivers/misc/cardreader/rts5264.c index d050c9fff7ac..99a2d5ea6421 100644 --- a/drivers/misc/cardreader/rts5264.c +++ b/drivers/misc/cardreader/rts5264.c @@ -527,8 +527,16 @@ static void rts5264_init_from_hw(struct rtsx_pcr *pcr) pcr->rtd3_en = rts5264_reg_to_rtd3(lval2); - if (rts5264_reg_check_reverse_socket(lval2)) - pcr->flags |= PCR_REVERSE_SOCKET; + if (rts5264_reg_check_reverse_socket(lval2)) { + if (is_version_higher_than(pcr, PID_5264, RTS5264_IC_VER_B)) + pcr->option.sd_cd_reverse_en = 1; + else + pcr->flags |= PCR_REVERSE_SOCKET; + } + + if (rts5264_reg_check_wp_reverse(lval2) && + is_version_higher_than(pcr, PID_5264, RTS5264_IC_VER_B)) + pcr->option.sd_wp_reverse_en = 1; pci_read_config_dword(pdev, setting_reg1, &lval1); pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", setting_reg1, lval1); @@ -622,8 +630,10 @@ static int rts5264_extra_init_hw(struct rtsx_pcr *pcr) if (pcr->flags & PCR_REVERSE_SOCKET) rtsx_pci_write_register(pcr, PETXCFG, 0x30, 0x30); - else - rtsx_pci_write_register(pcr, PETXCFG, 0x30, 0x00); + else { + rtsx_pci_write_register(pcr, PETXCFG, 0x20, option->sd_cd_reverse_en << 5); + rtsx_pci_write_register(pcr, PETXCFG, 0x10, option->sd_wp_reverse_en << 4); + } /* * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced @@ -957,4 +967,6 @@ void rts5264_init_params(struct rtsx_pcr *pcr) hw_param->interrupt_en |= (SD_OC_INT_EN | SD_OVP_INT_EN); hw_param->ocp_glitch = SD_OCP_GLITCH_800U | SDVIO_OCP_GLITCH_800U; option->sd_800mA_ocp_thd = RTS5264_LDO1_OCP_THD_1150; + option->sd_cd_reverse_en = 0; + option->sd_wp_reverse_en = 0; } diff --git a/drivers/misc/cardreader/rts5264.h b/drivers/misc/cardreader/rts5264.h index f3e81daa708d..611ee253367c 100644 --- a/drivers/misc/cardreader/rts5264.h +++ b/drivers/misc/cardreader/rts5264.h @@ -14,6 +14,7 @@ #define rts5264_reg_to_aspm(reg) \ (((~(reg) >> 28) & 0x02) | (((reg) >> 28) & 0x01)) #define rts5264_reg_check_reverse_socket(reg) ((reg) & 0x04) +#define rts5264_reg_check_wp_reverse(reg) ((reg) & 0x8000) #define rts5264_reg_to_sd30_drive_sel_1v8(reg) (((reg) >> 22) & 0x03) #define rts5264_reg_to_sd30_drive_sel_3v3(reg) (((reg) >> 16) & 0x03) #define rts5264_reg_to_rtd3(reg) ((reg) & 0x08) diff --git a/drivers/misc/cardreader/rtsx_pcr.h b/drivers/misc/cardreader/rtsx_pcr.h index 8e5951b61143..40562ff2be13 100644 --- a/drivers/misc/cardreader/rtsx_pcr.h +++ b/drivers/misc/cardreader/rtsx_pcr.h @@ -100,6 +100,8 @@ static inline u8 map_sd_drive(int idx) #define rtsx_reg_to_sd30_drive_sel_3v3(reg) (((reg) >> 5) & 0x03) #define rtsx_reg_to_card_drive_sel(reg) ((((reg) >> 25) & 0x01) << 6) #define rtsx_reg_check_reverse_socket(reg) ((reg) & 0x4000) +#define rtsx_reg_check_cd_reverse(reg) ((reg) & 0x800000) +#define rtsx_reg_check_wp_reverse(reg) ((reg) & 0x400000) #define rts5209_reg_to_aspm(reg) (((reg) >> 5) & 0x03) #define rts5209_reg_check_ms_pmos(reg) (!((reg) & 0x08)) #define rts5209_reg_to_sd30_drive_sel_1v8(reg) (((reg) >> 3) & 0x07) diff --git a/include/linux/rtsx_pci.h b/include/linux/rtsx_pci.h index 3b4c36705a9b..3c5689356004 100644 --- a/include/linux/rtsx_pci.h +++ b/include/linux/rtsx_pci.h @@ -1160,6 +1160,8 @@ struct rtsx_cr_option { bool ocp_en; u8 sd_400mA_ocp_thd; u8 sd_800mA_ocp_thd; + u8 sd_cd_reverse_en; + u8 sd_wp_reverse_en; }; /* -- cgit v1.2.3 From f5597840ac907858ad2a462b00e4a68fd199121e Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Mon, 14 Jul 2025 23:34:14 +0800 Subject: char: misc: Disallow registering miscdevice whose minor > MISC_DYNAMIC_MINOR Currently, It is allowed to register miscdevice with minor > 255 which is defined by macro MISC_DYNAMIC_MINOR, and cause: - Chaos regarding division and management of minor codes. - Registering failure if the minor was allocated to other dynamic request. Fortunately, in-kernel users have not had such usage yet. Fix by refusing to register miscdevice whose minor > 255. Also bring in a very simple minor code space division and management: < 255 : Fixed minor code == 255 : Indicator to request dynamic minor code > 255 : Dynamic minor code requested, 1048320 minor codes totally And all fixed minors allocated should be registered in 'linux/miscdevice.h' Signed-off-by: Zijun Hu Link: https://lore.kernel.org/r/20250714-rfc_miscdev-v6-3-2ed949665bde@oss.qualcomm.com Signed-off-by: Greg Kroah-Hartman --- drivers/char/misc.c | 6 ++++++ include/linux/miscdevice.h | 8 ++++++++ 2 files changed, 14 insertions(+) (limited to 'include/linux') diff --git a/drivers/char/misc.c b/drivers/char/misc.c index 558302a64dd9..b8e66466184f 100644 --- a/drivers/char/misc.c +++ b/drivers/char/misc.c @@ -210,6 +210,12 @@ int misc_register(struct miscdevice *misc) int err = 0; bool is_dynamic = (misc->minor == MISC_DYNAMIC_MINOR); + if (misc->minor > MISC_DYNAMIC_MINOR) { + pr_err("Invalid fixed minor %d for miscdevice '%s'\n", + misc->minor, misc->name); + return -EINVAL; + } + INIT_LIST_HEAD(&misc->list); mutex_lock(&misc_mtx); diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 3e6deb00fc85..565b88efeb23 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -71,6 +71,14 @@ #define USERIO_MINOR 240 #define VHOST_VSOCK_MINOR 241 #define RFKILL_MINOR 242 + +/* + * Misc char device minor code space division related to below macro: + * + * < 255 : Fixed minor code + * == 255 : Indicator to request dynamic minor code + * > 255 : Dynamic minor code requested, 1048320 minor codes totally. + */ #define MISC_DYNAMIC_MINOR 255 struct miscdevice { -- cgit v1.2.3 From d7f8d0758b975db8406c91cf242d46cd9611ba3e Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Mon, 14 Jul 2025 23:34:18 +0800 Subject: char: misc: Register fixed minor EISA_EEPROM_MINOR in linux/miscdevice.h Move fixed minor EISA_EEPROM_MINOR definition to linux/miscdevice.h. Signed-off-by: Zijun Hu Link: https://lore.kernel.org/r/20250714-rfc_miscdev-v6-7-2ed949665bde@oss.qualcomm.com Signed-off-by: Greg Kroah-Hartman --- drivers/parisc/eisa_eeprom.c | 2 -- include/linux/miscdevice.h | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/parisc/eisa_eeprom.c b/drivers/parisc/eisa_eeprom.c index 443b15422fc1..601cbb22574f 100644 --- a/drivers/parisc/eisa_eeprom.c +++ b/drivers/parisc/eisa_eeprom.c @@ -15,8 +15,6 @@ #include #include -#define EISA_EEPROM_MINOR 241 - static loff_t eisa_eeprom_llseek(struct file *file, loff_t offset, int origin) { return fixed_size_llseek(file, offset, origin, HPEE_MAX_LENGTH); diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 565b88efeb23..7d0aa718499c 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -70,6 +70,7 @@ #define UHID_MINOR 239 #define USERIO_MINOR 240 #define VHOST_VSOCK_MINOR 241 +#define EISA_EEPROM_MINOR 241 #define RFKILL_MINOR 242 /* -- cgit v1.2.3 From 60ad9a07319283e6e1094cef3e972e754315c024 Mon Sep 17 00:00:00 2001 From: Junjie Cao Date: Wed, 20 Aug 2025 08:47:55 +0800 Subject: iio: core: switch info_mask fields to unsigned long to match find_bit helpers for_each_set_bit()/find_*_bit() expect arrays of unsigned long (see include/linux/find.h), but industrialio-core passed const long * into iio_device_add_info_mask_type{,_avail}(). These masks are used purely as bit arrays and are populated via BIT() (1UL << n). Switch the info_mask_* fields and the corresponding function parameters to unsigned long so the types match the helpers. This removes sparse warnings about signedness mismatches (seen with 'make C=1' CF='-Wsparse-all') without changing behavior or struct layout. No functional change intended. Suggested-by: Jonathan Cameron Signed-off-by: Junjie Cao Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/20250820004755.69627-1-junjie.cao@intel.com Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-core.c | 4 ++-- include/linux/iio/iio.h | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index eb6a54f8115d..38848d753a33 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -1244,7 +1244,7 @@ static int iio_device_add_channel_label(struct iio_dev *indio_dev, static int iio_device_add_info_mask_type(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, enum iio_shared_by shared_by, - const long *infomask) + const unsigned long *infomask) { struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); int i, ret, attrcount = 0; @@ -1274,7 +1274,7 @@ static int iio_device_add_info_mask_type(struct iio_dev *indio_dev, static int iio_device_add_info_mask_type_avail(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, enum iio_shared_by shared_by, - const long *infomask) + const unsigned long *infomask) { struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); int i, ret, attrcount = 0; diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 2f5560646ee4..872ebdf0dd77 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -271,14 +271,14 @@ struct iio_chan_spec { unsigned int num_ext_scan_type; }; }; - long info_mask_separate; - long info_mask_separate_available; - long info_mask_shared_by_type; - long info_mask_shared_by_type_available; - long info_mask_shared_by_dir; - long info_mask_shared_by_dir_available; - long info_mask_shared_by_all; - long info_mask_shared_by_all_available; + unsigned long info_mask_separate; + unsigned long info_mask_separate_available; + unsigned long info_mask_shared_by_type; + unsigned long info_mask_shared_by_type_available; + unsigned long info_mask_shared_by_dir; + unsigned long info_mask_shared_by_dir_available; + unsigned long info_mask_shared_by_all; + unsigned long info_mask_shared_by_all_available; const struct iio_event_spec *event_spec; unsigned int num_event_specs; const struct iio_chan_spec_ext_info *ext_info; -- cgit v1.2.3 From a214365140cc3009f07d4e14a8b481fd3dc41d31 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 10 Jul 2025 15:15:28 +0300 Subject: rculist: move list_for_each_rcu() to where it belongs The list_for_each_rcu() relies on the rcu_dereference() API which is not provided by the list.h. At the same time list.h is a low-level basic header that must not have dependencies like RCU, besides the fact of the potential circular dependencies in some cases. With all that said, move RCU related API to the rculist.h where it belongs. Signed-off-by: Andy Shevchenko Reviewed-by: Simona Vetter Reviewed-by: "Paul E. McKenney" Signed-off-by: Neeraj Upadhyay (AMD) Signed-off-by: "Paul E. McKenney" --- include/linux/list.h | 10 ---------- include/linux/rculist.h | 10 ++++++++++ kernel/cgroup/dmem.c | 1 + 3 files changed, 11 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index e7e28afd28f8..e7bdad9b8618 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -686,16 +686,6 @@ static inline void list_splice_tail_init(struct list_head *list, #define list_for_each(pos, head) \ for (pos = (head)->next; !list_is_head(pos, (head)); pos = pos->next) -/** - * list_for_each_rcu - Iterate over a list in an RCU-safe fashion - * @pos: the &struct list_head to use as a loop cursor. - * @head: the head for your list. - */ -#define list_for_each_rcu(pos, head) \ - for (pos = rcu_dereference((head)->next); \ - !list_is_head(pos, (head)); \ - pos = rcu_dereference(pos->next)) - /** * list_for_each_continue - continue iteration over a list * @pos: the &struct list_head to use as a loop cursor. diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 1b11926ddd47..2abba7552605 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -42,6 +42,16 @@ static inline void INIT_LIST_HEAD_RCU(struct list_head *list) */ #define list_bidir_prev_rcu(list) (*((struct list_head __rcu **)(&(list)->prev))) +/** + * list_for_each_rcu - Iterate over a list in an RCU-safe fashion + * @pos: the &struct list_head to use as a loop cursor. + * @head: the head for your list. + */ +#define list_for_each_rcu(pos, head) \ + for (pos = rcu_dereference((head)->next); \ + !list_is_head(pos, (head)); \ + pos = rcu_dereference(pos->next)) + /** * list_tail_rcu - returns the prev pointer of the head of the list * @head: the head of the list diff --git a/kernel/cgroup/dmem.c b/kernel/cgroup/dmem.c index 10b63433f057..e12b946278b6 100644 --- a/kernel/cgroup/dmem.c +++ b/kernel/cgroup/dmem.c @@ -14,6 +14,7 @@ #include #include #include +#include #include struct dmem_cgroup_region { -- cgit v1.2.3 From 19a9a1ab5c3dce65fff4ac50700117039c23d525 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 29 Jul 2025 15:54:32 -0700 Subject: KVM: Rename CONFIG_KVM_PRIVATE_MEM to CONFIG_KVM_GUEST_MEMFD Rename the Kconfig option CONFIG_KVM_PRIVATE_MEM to CONFIG_KVM_GUEST_MEMFD. The original name implied that the feature only supported "private" memory. However, CONFIG_KVM_PRIVATE_MEM enables guest_memfd in general, which is not exclusively for private memory. Subsequent patches in this series will add guest_memfd support for non-CoCo VMs, whose memory is not private. Renaming the Kconfig option to CONFIG_KVM_GUEST_MEMFD more accurately reflects its broader scope as the main Kconfig option for all guest_memfd-backed memory. This provides clearer semantics for the option and avoids confusion as new features are introduced. Reviewed-by: Ira Weiny Reviewed-by: Gavin Shan Reviewed-by: Shivank Garg Reviewed-by: Vlastimil Babka Reviewed-by: Xiaoyao Li Co-developed-by: David Hildenbrand Signed-off-by: David Hildenbrand Signed-off-by: Fuad Tabba Signed-off-by: Sean Christopherson Message-ID: <20250729225455.670324-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 +- include/linux/kvm_host.h | 14 +++++++------- virt/kvm/Kconfig | 8 ++++---- virt/kvm/Makefile.kvm | 2 +- virt/kvm/kvm_main.c | 4 ++-- virt/kvm/kvm_mm.h | 4 ++-- 6 files changed, 17 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f19a76d3ca0e..7b0f2b3e492d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -2276,7 +2276,7 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level, int tdp_max_root_level, int tdp_huge_page_level); -#ifdef CONFIG_KVM_PRIVATE_MEM +#ifdef CONFIG_KVM_GUEST_MEMFD #define kvm_arch_has_private_mem(kvm) ((kvm)->arch.has_private_mem) #else #define kvm_arch_has_private_mem(kvm) false diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 15656b7fba6c..8cdc0b3cc1b1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -602,7 +602,7 @@ struct kvm_memory_slot { short id; u16 as_id; -#ifdef CONFIG_KVM_PRIVATE_MEM +#ifdef CONFIG_KVM_GUEST_MEMFD struct { /* * Writes protected by kvm->slots_lock. Acquiring a @@ -720,10 +720,10 @@ static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu) #endif /* - * Arch code must define kvm_arch_has_private_mem if support for private memory - * is enabled. + * Arch code must define kvm_arch_has_private_mem if support for guest_memfd is + * enabled. */ -#if !defined(kvm_arch_has_private_mem) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM) +#if !defined(kvm_arch_has_private_mem) && !IS_ENABLED(CONFIG_KVM_GUEST_MEMFD) static inline bool kvm_arch_has_private_mem(struct kvm *kvm) { return false; @@ -2505,7 +2505,7 @@ bool kvm_arch_post_set_memory_attributes(struct kvm *kvm, static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) { - return IS_ENABLED(CONFIG_KVM_PRIVATE_MEM) && + return IS_ENABLED(CONFIG_KVM_GUEST_MEMFD) && kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE; } #else @@ -2515,7 +2515,7 @@ static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) } #endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */ -#ifdef CONFIG_KVM_PRIVATE_MEM +#ifdef CONFIG_KVM_GUEST_MEMFD int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, kvm_pfn_t *pfn, struct page **page, int *max_order); @@ -2528,7 +2528,7 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm, KVM_BUG_ON(1, kvm); return -EIO; } -#endif /* CONFIG_KVM_PRIVATE_MEM */ +#endif /* CONFIG_KVM_GUEST_MEMFD */ #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order); diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 727b542074e7..e4b400feff94 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -112,19 +112,19 @@ config KVM_GENERIC_MEMORY_ATTRIBUTES depends on KVM_GENERIC_MMU_NOTIFIER bool -config KVM_PRIVATE_MEM +config KVM_GUEST_MEMFD select XARRAY_MULTI bool config KVM_GENERIC_PRIVATE_MEM select KVM_GENERIC_MEMORY_ATTRIBUTES - select KVM_PRIVATE_MEM + select KVM_GUEST_MEMFD bool config HAVE_KVM_ARCH_GMEM_PREPARE bool - depends on KVM_PRIVATE_MEM + depends on KVM_GUEST_MEMFD config HAVE_KVM_ARCH_GMEM_INVALIDATE bool - depends on KVM_PRIVATE_MEM + depends on KVM_GUEST_MEMFD diff --git a/virt/kvm/Makefile.kvm b/virt/kvm/Makefile.kvm index 724c89af78af..d047d4cf58c9 100644 --- a/virt/kvm/Makefile.kvm +++ b/virt/kvm/Makefile.kvm @@ -12,4 +12,4 @@ kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o kvm-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(KVM)/irqchip.o kvm-$(CONFIG_HAVE_KVM_DIRTY_RING) += $(KVM)/dirty_ring.o kvm-$(CONFIG_HAVE_KVM_PFNCACHE) += $(KVM)/pfncache.o -kvm-$(CONFIG_KVM_PRIVATE_MEM) += $(KVM)/guest_memfd.o +kvm-$(CONFIG_KVM_GUEST_MEMFD) += $(KVM)/guest_memfd.o diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6c07dd423458..25a94eed75fd 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4915,7 +4915,7 @@ static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) case KVM_CAP_MEMORY_ATTRIBUTES: return kvm_supported_mem_attributes(kvm); #endif -#ifdef CONFIG_KVM_PRIVATE_MEM +#ifdef CONFIG_KVM_GUEST_MEMFD case KVM_CAP_GUEST_MEMFD: return !kvm || kvm_arch_has_private_mem(kvm); #endif @@ -5352,7 +5352,7 @@ static long kvm_vm_ioctl(struct file *filp, case KVM_GET_STATS_FD: r = kvm_vm_ioctl_get_stats_fd(kvm); break; -#ifdef CONFIG_KVM_PRIVATE_MEM +#ifdef CONFIG_KVM_GUEST_MEMFD case KVM_CREATE_GUEST_MEMFD: { struct kvm_create_guest_memfd guest_memfd; diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h index acef3f5c582a..31defb08ccba 100644 --- a/virt/kvm/kvm_mm.h +++ b/virt/kvm/kvm_mm.h @@ -67,7 +67,7 @@ static inline void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, } #endif /* HAVE_KVM_PFNCACHE */ -#ifdef CONFIG_KVM_PRIVATE_MEM +#ifdef CONFIG_KVM_GUEST_MEMFD void kvm_gmem_init(struct module *module); int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args); int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot, @@ -91,6 +91,6 @@ static inline void kvm_gmem_unbind(struct kvm_memory_slot *slot) { WARN_ON_ONCE(1); } -#endif /* CONFIG_KVM_PRIVATE_MEM */ +#endif /* CONFIG_KVM_GUEST_MEMFD */ #endif /* __KVM_MM_H__ */ -- cgit v1.2.3 From 36cf63bb5df68836e55e2839f8174b404d47670b Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 29 Jul 2025 15:54:36 -0700 Subject: KVM: Rename CONFIG_KVM_GENERIC_PRIVATE_MEM to CONFIG_HAVE_KVM_ARCH_GMEM_POPULATE The original name was vague regarding its functionality. This Kconfig option specifically enables and gates the kvm_gmem_populate() function, which is responsible for populating a GPA range with guest data. The new name, HAVE_KVM_ARCH_GMEM_POPULATE, describes the purpose of the option: to enable arch-specific guest_memfd population mechanisms. It also follows the same pattern as the other HAVE_KVM_ARCH_* configuration options. This improves clarity for developers and ensures the name accurately reflects the functionality it controls, especially as guest_memfd support expands beyond purely "private" memory scenarios. Temporarily keep KVM_GENERIC_PRIVATE_MEM as an x86-only config so as to minimize churn, and to hopefully make it easier to see what features require HAVE_KVM_ARCH_GMEM_POPULATE. On that note, omit GMEM_POPULATE for KVM_X86_SW_PROTECTED_VM, as regular ol' memset() suffices for software-protected VMs. As for KVM_GENERIC_PRIVATE_MEM, a future change will select KVM_GUEST_MEMFD for all 64-bit KVM builds, at which point the intermediate config will become obsolete and can/will be dropped. Reviewed-by: Ira Weiny Reviewed-by: Gavin Shan Reviewed-by: Shivank Garg Reviewed-by: Vlastimil Babka Co-developed-by: David Hildenbrand Signed-off-by: David Hildenbrand Signed-off-by: Fuad Tabba Reviewed-by: Xiaoyao Li Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Message-ID: <20250729225455.670324-6-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/Kconfig | 14 ++++++++++---- include/linux/kvm_host.h | 2 +- virt/kvm/Kconfig | 9 ++++----- virt/kvm/guest_memfd.c | 2 +- 4 files changed, 16 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 13ab7265b505..c763446d9b9f 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -79,11 +79,16 @@ config KVM_WERROR If in doubt, say "N". +config KVM_X86_PRIVATE_MEM + select KVM_GENERIC_MEMORY_ATTRIBUTES + select KVM_GUEST_MEMFD + bool + config KVM_SW_PROTECTED_VM bool "Enable support for KVM software-protected VMs" depends on EXPERT depends on KVM_X86 && X86_64 - select KVM_GENERIC_PRIVATE_MEM + select KVM_X86_PRIVATE_MEM help Enable support for KVM software-protected VMs. Currently, software- protected VMs are purely a development and testing vehicle for @@ -133,8 +138,8 @@ config KVM_INTEL_TDX bool "Intel Trust Domain Extensions (TDX) support" default y depends on INTEL_TDX_HOST - select KVM_GENERIC_PRIVATE_MEM - select KVM_GENERIC_MEMORY_ATTRIBUTES + select KVM_X86_PRIVATE_MEM + select HAVE_KVM_ARCH_GMEM_POPULATE help Provides support for launching Intel Trust Domain Extensions (TDX) confidential VMs on Intel processors. @@ -157,9 +162,10 @@ config KVM_AMD_SEV depends on KVM_AMD && X86_64 depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m) select ARCH_HAS_CC_PLATFORM - select KVM_GENERIC_PRIVATE_MEM + select KVM_X86_PRIVATE_MEM select HAVE_KVM_ARCH_GMEM_PREPARE select HAVE_KVM_ARCH_GMEM_INVALIDATE + select HAVE_KVM_ARCH_GMEM_POPULATE help Provides support for launching encrypted VMs which use Secure Encrypted Virtualization (SEV), Secure Encrypted Virtualization with diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8cdc0b3cc1b1..ddfb6cfe20a6 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2534,7 +2534,7 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm, int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order); #endif -#ifdef CONFIG_KVM_GENERIC_PRIVATE_MEM +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_POPULATE /** * kvm_gmem_populate() - Populate/prepare a GPA range with guest data * diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index e4b400feff94..1b7d5be0b6c4 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -116,11 +116,6 @@ config KVM_GUEST_MEMFD select XARRAY_MULTI bool -config KVM_GENERIC_PRIVATE_MEM - select KVM_GENERIC_MEMORY_ATTRIBUTES - select KVM_GUEST_MEMFD - bool - config HAVE_KVM_ARCH_GMEM_PREPARE bool depends on KVM_GUEST_MEMFD @@ -128,3 +123,7 @@ config HAVE_KVM_ARCH_GMEM_PREPARE config HAVE_KVM_ARCH_GMEM_INVALIDATE bool depends on KVM_GUEST_MEMFD + +config HAVE_KVM_ARCH_GMEM_POPULATE + bool + depends on KVM_GUEST_MEMFD diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index 7d85cc33c0bb..b2b50560e80e 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -627,7 +627,7 @@ out: } EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn); -#ifdef CONFIG_KVM_GENERIC_PRIVATE_MEM +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_POPULATE long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages, kvm_gmem_populate_cb post_populate, void *opaque) { -- cgit v1.2.3 From 923310be23b275f730e8869abc783db6296fc043 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 29 Jul 2025 15:54:37 -0700 Subject: KVM: Rename kvm_slot_can_be_private() to kvm_slot_has_gmem() Rename kvm_slot_can_be_private() to kvm_slot_has_gmem() to improve clarity and accurately reflect its purpose. The function kvm_slot_can_be_private() was previously used to check if a given kvm_memory_slot is backed by guest_memfd. However, its name implied that the memory in such a slot was exclusively "private". As guest_memfd support expands to include non-private memory (e.g., shared host mappings), it's important to remove this association. The new name, kvm_slot_has_gmem(), states that the slot is backed by guest_memfd without making assumptions about the memory's privacy attributes. Reviewed-by: Ira Weiny Reviewed-by: Gavin Shan Reviewed-by: Shivank Garg Reviewed-by: Vlastimil Babka Reviewed-by: Xiaoyao Li Co-developed-by: David Hildenbrand Signed-off-by: David Hildenbrand Signed-off-by: Fuad Tabba Signed-off-by: Sean Christopherson Message-ID: <20250729225455.670324-7-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 4 ++-- arch/x86/kvm/svm/sev.c | 4 ++-- include/linux/kvm_host.h | 2 +- virt/kvm/guest_memfd.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 6e838cb6c9e1..fdc2824755ee 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3312,7 +3312,7 @@ static int __kvm_mmu_max_mapping_level(struct kvm *kvm, int kvm_mmu_max_mapping_level(struct kvm *kvm, const struct kvm_memory_slot *slot, gfn_t gfn) { - bool is_private = kvm_slot_can_be_private(slot) && + bool is_private = kvm_slot_has_gmem(slot) && kvm_mem_is_private(kvm, gfn); return __kvm_mmu_max_mapping_level(kvm, slot, gfn, PG_LEVEL_NUM, is_private); @@ -4551,7 +4551,7 @@ static int kvm_mmu_faultin_pfn_private(struct kvm_vcpu *vcpu, { int max_order, r; - if (!kvm_slot_can_be_private(fault->slot)) { + if (!kvm_slot_has_gmem(fault->slot)) { kvm_mmu_prepare_memory_fault_exit(vcpu, fault); return -EFAULT; } diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 0635bd71c10e..966a330dd294 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2361,7 +2361,7 @@ static int snp_launch_update(struct kvm *kvm, struct kvm_sev_cmd *argp) mutex_lock(&kvm->slots_lock); memslot = gfn_to_memslot(kvm, params.gfn_start); - if (!kvm_slot_can_be_private(memslot)) { + if (!kvm_slot_has_gmem(memslot)) { ret = -EINVAL; goto out; } @@ -4715,7 +4715,7 @@ void sev_handle_rmp_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code) } slot = gfn_to_memslot(kvm, gfn); - if (!kvm_slot_can_be_private(slot)) { + if (!kvm_slot_has_gmem(slot)) { pr_warn_ratelimited("SEV: Unexpected RMP fault, non-private slot for GPA 0x%llx\n", gpa); return; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ddfb6cfe20a6..4c5e0a898652 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -615,7 +615,7 @@ struct kvm_memory_slot { #endif }; -static inline bool kvm_slot_can_be_private(const struct kvm_memory_slot *slot) +static inline bool kvm_slot_has_gmem(const struct kvm_memory_slot *slot) { return slot && (slot->flags & KVM_MEM_GUEST_MEMFD); } diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index b2b50560e80e..a99e11b8b77f 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -643,7 +643,7 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long return -EINVAL; slot = gfn_to_memslot(kvm, start_gfn); - if (!kvm_slot_can_be_private(slot)) + if (!kvm_slot_has_gmem(slot)) return -EINVAL; file = kvm_gmem_get_file(slot); -- cgit v1.2.3 From 69116e01f6fee030db45d269f28f9c300b8dc9d6 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 29 Jul 2025 15:54:38 -0700 Subject: KVM: Fix comments that refer to slots_lock Fix comments so that they refer to slots_lock instead of slots_locks (remove trailing s). Reviewed-by: David Hildenbrand Reviewed-by: Ira Weiny Reviewed-by: Gavin Shan Reviewed-by: Shivank Garg Reviewed-by: Vlastimil Babka Reviewed-by: Xiaoyao Li Signed-off-by: Fuad Tabba Signed-off-by: Sean Christopherson Message-ID: <20250729225455.670324-8-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- virt/kvm/kvm_main.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4c5e0a898652..5c25b03d3d50 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -860,7 +860,7 @@ struct kvm { struct notifier_block pm_notifier; #endif #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES - /* Protected by slots_locks (for writes) and RCU (for reads) */ + /* Protected by slots_lock (for writes) and RCU (for reads) */ struct xarray mem_attr_array; #endif char stats_id[KVM_STATS_NAME_SIZE]; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 25a94eed75fd..aa86dfd757db 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -331,7 +331,7 @@ void kvm_flush_remote_tlbs_memslot(struct kvm *kvm, * All current use cases for flushing the TLBs for a specific memslot * are related to dirty logging, and many do the TLB flush out of * mmu_lock. The interaction between the various operations on memslot - * must be serialized by slots_locks to ensure the TLB flush from one + * must be serialized by slots_lock to ensure the TLB flush from one * operation is observed by any other operation on the same memslot. */ lockdep_assert_held(&kvm->slots_lock); -- cgit v1.2.3 From 68d189938709a5918d7308eb922c30bcbf16ebb9 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 29 Jul 2025 15:54:39 -0700 Subject: KVM: Fix comment that refers to kvm uapi header path The comment that points to the path where the user-visible memslot flags are refers to an outdated path and has a typo. Update the comment to refer to the correct path. Reviewed-by: David Hildenbrand Reviewed-by: Gavin Shan Reviewed-by: Shivank Garg Reviewed-by: Vlastimil Babka Reviewed-by: Xiaoyao Li Signed-off-by: Fuad Tabba Signed-off-by: Sean Christopherson Message-ID: <20250729225455.670324-9-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5c25b03d3d50..56ea8c862cfd 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -52,7 +52,7 @@ /* * The bit 16 ~ bit 31 of kvm_userspace_memory_region::flags are internally * used in kvm, other bits are visible for userspace which are defined in - * include/linux/kvm_h. + * include/uapi/linux/kvm.h. */ #define KVM_MEMSLOT_INVALID (1UL << 16) -- cgit v1.2.3 From d1e54dd08f163a9021433020d16a8f8f70ddc41c Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 29 Jul 2025 15:54:40 -0700 Subject: KVM: x86: Enable KVM_GUEST_MEMFD for all 64-bit builds Enable KVM_GUEST_MEMFD for all KVM x86 64-bit builds, i.e. for "default" VM types when running on 64-bit KVM. This will allow using guest_memfd to back non-private memory for all VM shapes, by supporting mmap() on guest_memfd. Opportunistically clean up various conditionals that become tautologies once x86 selects KVM_GUEST_MEMFD more broadly. Specifically, because SW protected VMs, SEV, and TDX are all 64-bit only, private memory no longer needs to take explicit dependencies on KVM_GUEST_MEMFD, because it is effectively a prerequisite. Suggested-by: Sean Christopherson Signed-off-by: Fuad Tabba Reviewed-by: Xiaoyao Li Reviewed-by: David Hildenbrand Signed-off-by: Sean Christopherson Message-ID: <20250729225455.670324-10-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 4 +--- arch/x86/kvm/Kconfig | 12 ++++-------- include/linux/kvm_host.h | 9 ++------- virt/kvm/kvm_main.c | 4 ++-- 4 files changed, 9 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 7b0f2b3e492d..50366a1ca192 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -2276,10 +2276,8 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level, int tdp_max_root_level, int tdp_huge_page_level); -#ifdef CONFIG_KVM_GUEST_MEMFD +#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES #define kvm_arch_has_private_mem(kvm) ((kvm)->arch.has_private_mem) -#else -#define kvm_arch_has_private_mem(kvm) false #endif #define kvm_arch_has_readonly_mem(kvm) (!(kvm)->arch.has_protected_state) diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index c763446d9b9f..4e43923656d0 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -47,6 +47,7 @@ config KVM_X86 select KVM_GENERIC_HARDWARE_ENABLING select KVM_GENERIC_PRE_FAULT_MEMORY select KVM_WERROR if WERROR + select KVM_GUEST_MEMFD if X86_64 config KVM tristate "Kernel-based Virtual Machine (KVM) support" @@ -79,16 +80,11 @@ config KVM_WERROR If in doubt, say "N". -config KVM_X86_PRIVATE_MEM - select KVM_GENERIC_MEMORY_ATTRIBUTES - select KVM_GUEST_MEMFD - bool - config KVM_SW_PROTECTED_VM bool "Enable support for KVM software-protected VMs" depends on EXPERT depends on KVM_X86 && X86_64 - select KVM_X86_PRIVATE_MEM + select KVM_GENERIC_MEMORY_ATTRIBUTES help Enable support for KVM software-protected VMs. Currently, software- protected VMs are purely a development and testing vehicle for @@ -138,7 +134,7 @@ config KVM_INTEL_TDX bool "Intel Trust Domain Extensions (TDX) support" default y depends on INTEL_TDX_HOST - select KVM_X86_PRIVATE_MEM + select KVM_GENERIC_MEMORY_ATTRIBUTES select HAVE_KVM_ARCH_GMEM_POPULATE help Provides support for launching Intel Trust Domain Extensions (TDX) @@ -162,7 +158,7 @@ config KVM_AMD_SEV depends on KVM_AMD && X86_64 depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m) select ARCH_HAS_CC_PLATFORM - select KVM_X86_PRIVATE_MEM + select KVM_GENERIC_MEMORY_ATTRIBUTES select HAVE_KVM_ARCH_GMEM_PREPARE select HAVE_KVM_ARCH_GMEM_INVALIDATE select HAVE_KVM_ARCH_GMEM_POPULATE diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 56ea8c862cfd..4d1c44622056 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -719,11 +719,7 @@ static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu) } #endif -/* - * Arch code must define kvm_arch_has_private_mem if support for guest_memfd is - * enabled. - */ -#if !defined(kvm_arch_has_private_mem) && !IS_ENABLED(CONFIG_KVM_GUEST_MEMFD) +#ifndef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES static inline bool kvm_arch_has_private_mem(struct kvm *kvm) { return false; @@ -2505,8 +2501,7 @@ bool kvm_arch_post_set_memory_attributes(struct kvm *kvm, static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) { - return IS_ENABLED(CONFIG_KVM_GUEST_MEMFD) && - kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE; + return kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE; } #else static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index aa86dfd757db..4f57cb92e109 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1588,7 +1588,7 @@ static int check_memory_region_flags(struct kvm *kvm, { u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES; - if (kvm_arch_has_private_mem(kvm)) + if (IS_ENABLED(CONFIG_KVM_GUEST_MEMFD)) valid_flags |= KVM_MEM_GUEST_MEMFD; /* Dirty logging private memory is not currently supported. */ @@ -4917,7 +4917,7 @@ static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) #endif #ifdef CONFIG_KVM_GUEST_MEMFD case KVM_CAP_GUEST_MEMFD: - return !kvm || kvm_arch_has_private_mem(kvm); + return 1; #endif default: break; -- cgit v1.2.3 From a12578e1477cbfb547256ed8dee6d5142a59cdcd Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 29 Jul 2025 15:54:41 -0700 Subject: KVM: guest_memfd: Add plumbing to host to map guest_memfd pages Introduce the core infrastructure to enable host userspace to mmap() guest_memfd-backed memory. This is needed for several evolving KVM use cases: * Non-CoCo VM backing: Allows VMMs like Firecracker to run guests entirely backed by guest_memfd, even for non-CoCo VMs [1]. This provides a unified memory management model and simplifies guest memory handling. * Direct map removal for enhanced security: This is an important step for direct map removal of guest memory [2]. By allowing host userspace to fault in guest_memfd pages directly, we can avoid maintaining host kernel direct maps of guest memory. This provides additional hardening against Spectre-like transient execution attacks by removing a potential attack surface within the kernel. * Future guest_memfd features: This also lays the groundwork for future enhancements to guest_memfd, such as supporting huge pages and enabling in-place sharing of guest memory with the host for CoCo platforms that permit it [3]. Enable the basic mmap and fault handling logic within guest_memfd, but hold off on allow userspace to actually do mmap() until the architecture support is also in place. [1] https://github.com/firecracker-microvm/firecracker/tree/feature/secret-hiding [2] https://lore.kernel.org/linux-mm/cc1bb8e9bc3e1ab637700a4d3defeec95b55060a.camel@amazon.com [3] https://lore.kernel.org/all/c1c9591d-218a-495c-957b-ba356c8f8e09@redhat.com/T/#u Reviewed-by: Gavin Shan Reviewed-by: Shivank Garg Acked-by: David Hildenbrand Co-developed-by: Ackerley Tng Signed-off-by: Ackerley Tng Signed-off-by: Fuad Tabba Reviewed-by: Xiaoyao Li Signed-off-by: Sean Christopherson Message-ID: <20250729225455.670324-11-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 11 ++++++++ include/linux/kvm_host.h | 4 +++ virt/kvm/guest_memfd.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 85 insertions(+) (limited to 'include/linux') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 604490b1cb19..33fba801b205 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -13521,6 +13521,16 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_arch_no_poll); +#ifdef CONFIG_KVM_GUEST_MEMFD +/* + * KVM doesn't yet support mmap() on guest_memfd for VMs with private memory + * (the private vs. shared tracking needs to be moved into guest_memfd). + */ +bool kvm_arch_supports_gmem_mmap(struct kvm *kvm) +{ + return !kvm_arch_has_private_mem(kvm); +} + #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order) { @@ -13534,6 +13544,7 @@ void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end) kvm_x86_call(gmem_invalidate)(start, end); } #endif +#endif int kvm_spec_ctrl_test_value(u64 value) { diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4d1c44622056..26bad600f9fa 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -726,6 +726,10 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm) } #endif +#ifdef CONFIG_KVM_GUEST_MEMFD +bool kvm_arch_supports_gmem_mmap(struct kvm *kvm); +#endif + #ifndef kvm_arch_has_readonly_mem static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm) { diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index a99e11b8b77f..67e7cd7210ef 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -312,7 +312,72 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn) return gfn - slot->base_gfn + slot->gmem.pgoff; } +static bool kvm_gmem_supports_mmap(struct inode *inode) +{ + return false; +} + +static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf) +{ + struct inode *inode = file_inode(vmf->vma->vm_file); + struct folio *folio; + vm_fault_t ret = VM_FAULT_LOCKED; + + if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode)) + return VM_FAULT_SIGBUS; + + folio = kvm_gmem_get_folio(inode, vmf->pgoff); + if (IS_ERR(folio)) { + int err = PTR_ERR(folio); + + if (err == -EAGAIN) + return VM_FAULT_RETRY; + + return vmf_error(err); + } + + if (WARN_ON_ONCE(folio_test_large(folio))) { + ret = VM_FAULT_SIGBUS; + goto out_folio; + } + + if (!folio_test_uptodate(folio)) { + clear_highpage(folio_page(folio, 0)); + kvm_gmem_mark_prepared(folio); + } + + vmf->page = folio_file_page(folio, vmf->pgoff); + +out_folio: + if (ret != VM_FAULT_LOCKED) { + folio_unlock(folio); + folio_put(folio); + } + + return ret; +} + +static const struct vm_operations_struct kvm_gmem_vm_ops = { + .fault = kvm_gmem_fault_user_mapping, +}; + +static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma) +{ + if (!kvm_gmem_supports_mmap(file_inode(file))) + return -ENODEV; + + if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) != + (VM_SHARED | VM_MAYSHARE)) { + return -EINVAL; + } + + vma->vm_ops = &kvm_gmem_vm_ops; + + return 0; +} + static struct file_operations kvm_gmem_fops = { + .mmap = kvm_gmem_mmap, .open = generic_file_open, .release = kvm_gmem_release, .fallocate = kvm_gmem_fallocate, @@ -391,6 +456,11 @@ static const struct inode_operations kvm_gmem_iops = { .setattr = kvm_gmem_setattr, }; +bool __weak kvm_arch_supports_gmem_mmap(struct kvm *kvm) +{ + return true; +} + static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags) { const char *anon_name = "[kvm-gmem]"; -- cgit v1.2.3 From 576d035e2aef52f8d8d3ce29af556d4c6bd2e0fe Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 29 Jul 2025 15:54:42 -0700 Subject: KVM: guest_memfd: Track guest_memfd mmap support in memslot Add a new internal flag, KVM_MEMSLOT_GMEM_ONLY, to the top half of memslot->flags (which makes it strictly for KVM's internal use). This flag tracks when a guest_memfd-backed memory slot supports host userspace mmap operations, which implies that all memory, not just private memory for CoCo VMs, is consumed through guest_memfd: "gmem only". This optimization avoids repeatedly checking the underlying guest_memfd file for mmap support, which would otherwise require taking and releasing a reference on the file for each check. By caching this information directly in the memslot, we reduce overhead and simplify the logic involved in handling guest_memfd-backed pages for host mappings. Reviewed-by: Gavin Shan Reviewed-by: Shivank Garg Reviewed-by: Xiaoyao Li Acked-by: David Hildenbrand Suggested-by: David Hildenbrand Signed-off-by: Fuad Tabba Signed-off-by: Sean Christopherson Message-ID: <20250729225455.670324-12-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 11 ++++++++++- virt/kvm/guest_memfd.c | 2 ++ 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 26bad600f9fa..8b47891adca1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -54,7 +54,8 @@ * used in kvm, other bits are visible for userspace which are defined in * include/uapi/linux/kvm.h. */ -#define KVM_MEMSLOT_INVALID (1UL << 16) +#define KVM_MEMSLOT_INVALID (1UL << 16) +#define KVM_MEMSLOT_GMEM_ONLY (1UL << 17) /* * Bit 63 of the memslot generation number is an "update in-progress flag", @@ -2490,6 +2491,14 @@ static inline void kvm_prepare_memory_fault_exit(struct kvm_vcpu *vcpu, vcpu->run->memory_fault.flags |= KVM_MEMORY_EXIT_FLAG_PRIVATE; } +static inline bool kvm_memslot_is_gmem_only(const struct kvm_memory_slot *slot) +{ + if (!IS_ENABLED(CONFIG_KVM_GUEST_MEMFD)) + return false; + + return slot->flags & KVM_MEMSLOT_GMEM_ONLY; +} + #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES static inline unsigned long kvm_get_memory_attributes(struct kvm *kvm, gfn_t gfn) { diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index 67e7cd7210ef..d5b445548af4 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -578,6 +578,8 @@ int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot, */ WRITE_ONCE(slot->gmem.file, file); slot->gmem.pgoff = start; + if (kvm_gmem_supports_mmap(inode)) + slot->flags |= KVM_MEMSLOT_GMEM_ONLY; xa_store_range(&gmem->bindings, start, end - 1, slot, GFP_KERNEL); filemap_invalidate_unlock(inode->i_mapping); -- cgit v1.2.3 From 08383cd479f8212fafee2f557b58cfd48818bee0 Mon Sep 17 00:00:00 2001 From: Christian Bruel Date: Wed, 20 Aug 2025 09:54:02 +0200 Subject: pinctrl: Add pinctrl_pm_select_init_state helper function If a platform requires an initial pinctrl state during probing, this helper function provides the client with access to the same initial state. eg: xxx_suspend_noirq ... pinctrl_pm_select_sleep_state xxx resume_noirq pinctrl_pm_select_init_state ... pinctrl_pm_select_default_state Signed-off-by: Christian Bruel Signed-off-by: Manivannan Sadhasivam Reviewed-by: Linus Walleij Link: https://patch.msgid.link/20250820075411.1178729-3-christian.bruel@foss.st.com --- drivers/pinctrl/core.c | 13 +++++++++++++ include/linux/pinctrl/consumer.h | 10 ++++++++++ 2 files changed, 23 insertions(+) (limited to 'include/linux') diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index 73b78d6eac67..c5dbf4e9db84 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -1655,6 +1655,19 @@ int pinctrl_pm_select_default_state(struct device *dev) } EXPORT_SYMBOL_GPL(pinctrl_pm_select_default_state); +/** + * pinctrl_pm_select_init_state() - select init pinctrl state for PM + * @dev: device to select init state for + */ +int pinctrl_pm_select_init_state(struct device *dev) +{ + if (!dev->pins) + return 0; + + return pinctrl_select_bound_state(dev, dev->pins->init_state); +} +EXPORT_SYMBOL_GPL(pinctrl_pm_select_init_state); + /** * pinctrl_pm_select_sleep_state() - select sleep pinctrl state for PM * @dev: device to select sleep state for diff --git a/include/linux/pinctrl/consumer.h b/include/linux/pinctrl/consumer.h index 73de70362b98..63ce16191eb9 100644 --- a/include/linux/pinctrl/consumer.h +++ b/include/linux/pinctrl/consumer.h @@ -48,6 +48,7 @@ int pinctrl_select_default_state(struct device *dev); #ifdef CONFIG_PM int pinctrl_pm_select_default_state(struct device *dev); +int pinctrl_pm_select_init_state(struct device *dev); int pinctrl_pm_select_sleep_state(struct device *dev); int pinctrl_pm_select_idle_state(struct device *dev); #else @@ -55,6 +56,10 @@ static inline int pinctrl_pm_select_default_state(struct device *dev) { return 0; } +static inline int pinctrl_pm_select_init_state(struct device *dev) +{ + return 0; +} static inline int pinctrl_pm_select_sleep_state(struct device *dev) { return 0; @@ -143,6 +148,11 @@ static inline int pinctrl_pm_select_default_state(struct device *dev) return 0; } +static inline int pinctrl_pm_select_init_state(struct device *dev) +{ + return 0; +} + static inline int pinctrl_pm_select_sleep_state(struct device *dev) { return 0; -- cgit v1.2.3 From 3ea299d3dccdb8554057d0a87552e7673baea95d Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Mon, 11 Aug 2025 09:40:40 +0200 Subject: mtd: nand: qpic-common: remove a bunch of unused defines A bunch of definitions in the 'nand-qpic-common.h' header became unused after the conversion of the 'qcom_nandc' and 'spi-qpic-snand' drivers to use the FIELD_PREP() macro, so remove those. No functional changes. Signed-off-by: Gabor Juhos Signed-off-by: Miquel Raynal --- include/linux/mtd/nand-qpic-common.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand-qpic-common.h b/include/linux/mtd/nand-qpic-common.h index 4e694b1aabbd..e8201d1b7cf9 100644 --- a/include/linux/mtd/nand-qpic-common.h +++ b/include/linux/mtd/nand-qpic-common.h @@ -71,14 +71,10 @@ /* NAND_DEVn_CFG0 bits */ #define DISABLE_STATUS_AFTER_WRITE BIT(4) -#define CW_PER_PAGE 6 #define CW_PER_PAGE_MASK GENMASK(8, 6) -#define UD_SIZE_BYTES 9 #define UD_SIZE_BYTES_MASK GENMASK(18, 9) #define ECC_PARITY_SIZE_BYTES_RS GENMASK(22, 19) -#define SPARE_SIZE_BYTES 23 #define SPARE_SIZE_BYTES_MASK GENMASK(26, 23) -#define NUM_ADDR_CYCLES 27 #define NUM_ADDR_CYCLES_MASK GENMASK(29, 27) #define STATUS_BFR_READ BIT(30) #define SET_RD_MODE_AFTER_STATUS BIT(31) @@ -86,26 +82,20 @@ /* NAND_DEVn_CFG0 bits */ #define DEV0_CFG1_ECC_DISABLE BIT(0) #define WIDE_FLASH BIT(1) -#define NAND_RECOVERY_CYCLES 2 #define NAND_RECOVERY_CYCLES_MASK GENMASK(4, 2) #define CS_ACTIVE_BSY BIT(5) -#define BAD_BLOCK_BYTE_NUM 6 #define BAD_BLOCK_BYTE_NUM_MASK GENMASK(15, 6) #define BAD_BLOCK_IN_SPARE_AREA BIT(16) -#define WR_RD_BSY_GAP 17 #define WR_RD_BSY_GAP_MASK GENMASK(22, 17) #define ENABLE_BCH_ECC BIT(27) /* NAND_DEV0_ECC_CFG bits */ #define ECC_CFG_ECC_DISABLE BIT(0) #define ECC_SW_RESET BIT(1) -#define ECC_MODE 4 #define ECC_MODE_MASK GENMASK(5, 4) #define ECC_MODE_4BIT 0 #define ECC_MODE_8BIT 1 -#define ECC_PARITY_SIZE_BYTES_BCH 8 #define ECC_PARITY_SIZE_BYTES_BCH_MASK GENMASK(12, 8) -#define ECC_NUM_DATA_BYTES 16 #define ECC_NUM_DATA_BYTES_MASK GENMASK(25, 16) #define ECC_FORCE_CLK_OPEN BIT(30) @@ -120,7 +110,6 @@ #define SEQ_READ_START_VLD BIT(4) /* NAND_EBI2_ECC_BUF_CFG bits */ -#define NUM_STEPS 0 #define NUM_STEPS_MASK GENMASK(9, 0) /* NAND_ERASED_CW_DETECT_CFG bits */ @@ -141,11 +130,8 @@ #define ERASED_CW (CODEWORD_ALL_ERASED | CODEWORD_ERASED) /* NAND_READ_LOCATION_n bits */ -#define READ_LOCATION_OFFSET 0 #define READ_LOCATION_OFFSET_MASK GENMASK(9, 0) -#define READ_LOCATION_SIZE 16 #define READ_LOCATION_SIZE_MASK GENMASK(25, 16) -#define READ_LOCATION_LAST 31 #define READ_LOCATION_LAST_MASK BIT(31) /* Version Mask */ -- cgit v1.2.3 From 5f284dc15ca8695d0394414045ac64616a3b0e69 Mon Sep 17 00:00:00 2001 From: Tianling Shen Date: Mon, 25 Aug 2025 01:00:13 +0800 Subject: mtd: spinand: add support for FudanMicro FM25S01A Add support for FudanMicro FM25S01A SPI NAND. Datasheet: http://eng.fmsh.com/nvm/FM25S01A_ds_eng.pdf Signed-off-by: Tianling Shen Signed-off-by: Miquel Raynal --- drivers/mtd/nand/spi/Makefile | 2 +- drivers/mtd/nand/spi/core.c | 1 + drivers/mtd/nand/spi/fmsh.c | 74 +++++++++++++++++++++++++++++++++++++++++++ include/linux/mtd/spinand.h | 1 + 4 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 drivers/mtd/nand/spi/fmsh.c (limited to 'include/linux') diff --git a/drivers/mtd/nand/spi/Makefile b/drivers/mtd/nand/spi/Makefile index 258da42451a4..6d3d203df048 100644 --- a/drivers/mtd/nand/spi/Makefile +++ b/drivers/mtd/nand/spi/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 spinand-objs := core.o otp.o -spinand-objs += alliancememory.o ato.o esmt.o foresee.o gigadevice.o macronix.o +spinand-objs += alliancememory.o ato.o esmt.o fmsh.o foresee.o gigadevice.o macronix.o spinand-objs += micron.o paragon.o skyhigh.o toshiba.o winbond.o xtx.o obj-$(CONFIG_MTD_SPI_NAND) += spinand.o diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c index b0898990b2a5..ea47028d021a 100644 --- a/drivers/mtd/nand/spi/core.c +++ b/drivers/mtd/nand/spi/core.c @@ -1184,6 +1184,7 @@ static const struct spinand_manufacturer *spinand_manufacturers[] = { &alliancememory_spinand_manufacturer, &ato_spinand_manufacturer, &esmt_c8_spinand_manufacturer, + &fmsh_spinand_manufacturer, &foresee_spinand_manufacturer, &gigadevice_spinand_manufacturer, ¯onix_spinand_manufacturer, diff --git a/drivers/mtd/nand/spi/fmsh.c b/drivers/mtd/nand/spi/fmsh.c new file mode 100644 index 000000000000..8b2097bfc771 --- /dev/null +++ b/drivers/mtd/nand/spi/fmsh.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2020-2021 Rockchip Electronics Co., Ltd. + * + * Author: Dingqiang Lin + */ + +#include +#include +#include + +#define SPINAND_MFR_FMSH 0xA1 + +static SPINAND_OP_VARIANTS(read_cache_variants, + SPINAND_PAGE_READ_FROM_CACHE_1S_4S_4S_OP(0, 2, NULL, 0, 0), + SPINAND_PAGE_READ_FROM_CACHE_1S_1S_4S_OP(0, 1, NULL, 0, 0), + SPINAND_PAGE_READ_FROM_CACHE_1S_2S_2S_OP(0, 1, NULL, 0, 0), + SPINAND_PAGE_READ_FROM_CACHE_1S_1S_2S_OP(0, 1, NULL, 0, 0), + SPINAND_PAGE_READ_FROM_CACHE_FAST_1S_1S_1S_OP(0, 1, NULL, 0, 0), + SPINAND_PAGE_READ_FROM_CACHE_1S_1S_1S_OP(0, 1, NULL, 0, 0)); + +static SPINAND_OP_VARIANTS(write_cache_variants, + SPINAND_PROG_LOAD_1S_1S_4S_OP(true, 0, NULL, 0), + SPINAND_PROG_LOAD_1S_1S_1S_OP(true, 0, NULL, 0)); + +static SPINAND_OP_VARIANTS(update_cache_variants, + SPINAND_PROG_LOAD_1S_1S_4S_OP(false, 0, NULL, 0), + SPINAND_PROG_LOAD_1S_1S_1S_OP(false, 0, NULL, 0)); + +static int fm25s01a_ooblayout_ecc(struct mtd_info *mtd, int section, + struct mtd_oob_region *region) +{ + return -ERANGE; +} + +static int fm25s01a_ooblayout_free(struct mtd_info *mtd, int section, + struct mtd_oob_region *region) +{ + if (section) + return -ERANGE; + + region->offset = 2; + region->length = 62; + + return 0; +} + +static const struct mtd_ooblayout_ops fm25s01a_ooblayout = { + .ecc = fm25s01a_ooblayout_ecc, + .free = fm25s01a_ooblayout_free, +}; + +static const struct spinand_info fmsh_spinand_table[] = { + SPINAND_INFO("FM25S01A", + SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0xE4), + NAND_MEMORG(1, 2048, 64, 64, 1024, 20, 1, 1, 1), + NAND_ECCREQ(1, 512), + SPINAND_INFO_OP_VARIANTS(&read_cache_variants, + &write_cache_variants, + &update_cache_variants), + SPINAND_HAS_QE_BIT, + SPINAND_ECCINFO(&fm25s01a_ooblayout, NULL)), +}; + +static const struct spinand_manufacturer_ops fmsh_spinand_manuf_ops = { +}; + +const struct spinand_manufacturer fmsh_spinand_manufacturer = { + .id = SPINAND_MFR_FMSH, + .name = "Fudan Micro", + .chips = fmsh_spinand_table, + .nchips = ARRAY_SIZE(fmsh_spinand_table), + .ops = &fmsh_spinand_manuf_ops, +}; diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 27a45bdab7ec..927c10d78769 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -355,6 +355,7 @@ struct spinand_manufacturer { extern const struct spinand_manufacturer alliancememory_spinand_manufacturer; extern const struct spinand_manufacturer ato_spinand_manufacturer; extern const struct spinand_manufacturer esmt_c8_spinand_manufacturer; +extern const struct spinand_manufacturer fmsh_spinand_manufacturer; extern const struct spinand_manufacturer foresee_spinand_manufacturer; extern const struct spinand_manufacturer gigadevice_spinand_manufacturer; extern const struct spinand_manufacturer macronix_spinand_manufacturer; -- cgit v1.2.3 From 1d8fdabe19267338f29b58f968499e5b55e6a3b6 Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Fri, 29 Aug 2025 12:25:43 +0100 Subject: iio: frequency: adf4350: Fix ADF4350_REG3_12BIT_CLKDIV_MODE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The clk div bits (2 bits wide) do not start in bit 16 but in bit 15. Fix it accordingly. Fixes: e31166f0fd48 ("iio: frequency: New driver for Analog Devices ADF4350/ADF4351 Wideband Synthesizers") Signed-off-by: Michael Hennerich Signed-off-by: Nuno Sá Link: https://patch.msgid.link/20250829-adf4350-fix-v2-2-0bf543ba797d@analog.com Cc: Signed-off-by: Jonathan Cameron --- include/linux/iio/frequency/adf4350.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iio/frequency/adf4350.h b/include/linux/iio/frequency/adf4350.h index de45cf2ee1e4..ce2086f97e3f 100644 --- a/include/linux/iio/frequency/adf4350.h +++ b/include/linux/iio/frequency/adf4350.h @@ -51,7 +51,7 @@ /* REG3 Bit Definitions */ #define ADF4350_REG3_12BIT_CLKDIV(x) ((x) << 3) -#define ADF4350_REG3_12BIT_CLKDIV_MODE(x) ((x) << 16) +#define ADF4350_REG3_12BIT_CLKDIV_MODE(x) ((x) << 15) #define ADF4350_REG3_12BIT_CSR_EN (1 << 18) #define ADF4351_REG3_CHARGE_CANCELLATION_EN (1 << 21) #define ADF4351_REG3_ANTI_BACKLASH_3ns_EN (1 << 22) -- cgit v1.2.3 From d9a3e9929452780df16f3414f0d59b5f69d058cf Mon Sep 17 00:00:00 2001 From: Thomas Andreatta Date: Wed, 27 Aug 2025 17:24:43 +0200 Subject: dmaengine: sh: setup_xref error handling This patch modifies the type of setup_xref from void to int and handles errors since the function can fail. `setup_xref` now returns the (eventual) error from `dmae_set_dmars`|`dmae_set_chcr`, while `shdma_tx_submit` handles the result, removing the chunks from the queue and marking PM as idle in case of an error. Signed-off-by: Thomas Andreatta Link: https://lore.kernel.org/r/20250827152442.90962-1-thomas.andreatta2000@gmail.com Signed-off-by: Vinod Koul --- drivers/dma/sh/shdma-base.c | 25 +++++++++++++++++++------ drivers/dma/sh/shdmac.c | 17 +++++++++++++---- include/linux/shdma-base.h | 2 +- 3 files changed, 33 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma/sh/shdma-base.c b/drivers/dma/sh/shdma-base.c index 6b4fce453c85..834741adadaa 100644 --- a/drivers/dma/sh/shdma-base.c +++ b/drivers/dma/sh/shdma-base.c @@ -129,12 +129,25 @@ static dma_cookie_t shdma_tx_submit(struct dma_async_tx_descriptor *tx) const struct shdma_ops *ops = sdev->ops; dev_dbg(schan->dev, "Bring up channel %d\n", schan->id); - /* - * TODO: .xfer_setup() might fail on some platforms. - * Make it int then, on error remove chunks from the - * queue again - */ - ops->setup_xfer(schan, schan->slave_id); + + ret = ops->setup_xfer(schan, schan->slave_id); + if (ret < 0) { + dev_err(schan->dev, "setup_xfer failed: %d\n", ret); + + /* Remove chunks from the queue and mark them as idle */ + list_for_each_entry_safe(chunk, c, &schan->ld_queue, node) { + if (chunk->cookie == cookie) { + chunk->mark = DESC_IDLE; + list_move(&chunk->node, &schan->ld_free); + } + } + + schan->pm_state = SHDMA_PM_ESTABLISHED; + ret = pm_runtime_put(schan->dev); + + spin_unlock_irq(&schan->chan_lock); + return ret; + } if (schan->pm_state == SHDMA_PM_PENDING) shdma_chan_xfer_ld_queue(schan); diff --git a/drivers/dma/sh/shdmac.c b/drivers/dma/sh/shdmac.c index 093e449e19ee..603e15102e45 100644 --- a/drivers/dma/sh/shdmac.c +++ b/drivers/dma/sh/shdmac.c @@ -300,21 +300,30 @@ static bool sh_dmae_channel_busy(struct shdma_chan *schan) return dmae_is_busy(sh_chan); } -static void sh_dmae_setup_xfer(struct shdma_chan *schan, - int slave_id) +static int sh_dmae_setup_xfer(struct shdma_chan *schan, int slave_id) { struct sh_dmae_chan *sh_chan = container_of(schan, struct sh_dmae_chan, shdma_chan); + int ret = 0; if (slave_id >= 0) { const struct sh_dmae_slave_config *cfg = sh_chan->config; - dmae_set_dmars(sh_chan, cfg->mid_rid); - dmae_set_chcr(sh_chan, cfg->chcr); + ret = dmae_set_dmars(sh_chan, cfg->mid_rid); + if (ret < 0) + goto END; + + ret = dmae_set_chcr(sh_chan, cfg->chcr); + if (ret < 0) + goto END; + } else { dmae_init(sh_chan); } + +END: + return ret; } /* diff --git a/include/linux/shdma-base.h b/include/linux/shdma-base.h index 6dfd05ef5c2d..03ba4dab2ef7 100644 --- a/include/linux/shdma-base.h +++ b/include/linux/shdma-base.h @@ -96,7 +96,7 @@ struct shdma_ops { int (*desc_setup)(struct shdma_chan *, struct shdma_desc *, dma_addr_t, dma_addr_t, size_t *); int (*set_slave)(struct shdma_chan *, int, dma_addr_t, bool); - void (*setup_xfer)(struct shdma_chan *, int); + int (*setup_xfer)(struct shdma_chan *, int); void (*start_xfer)(struct shdma_chan *, struct shdma_desc *); struct shdma_desc *(*embedded_desc)(void *, int); bool (*chan_irq)(struct shdma_chan *, int); -- cgit v1.2.3 From 7ea95d55e63176899eb96f7aaa34a5646f501b2c Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Tue, 26 Aug 2025 11:07:38 -0500 Subject: dmaengine: Fix dma_async_tx_descriptor->tx_submit documentation Commit 790fb9956eea ("linux/dmaengine.h: fix a few kernel-doc warnings") inserted new documentation for @desc_free in the middle of @tx_submit's description. Put @tx_submit's description back together, matching the indentation style of the rest of the documentation for dma_async_tx_descriptor. Fixes: 790fb9956eea ("linux/dmaengine.h: fix a few kernel-doc warnings") Reviewed-by: Dave Jiang Signed-off-by: Nathan Lynch Link: https://lore.kernel.org/r/20250826-dma_async_tx_desc-tx_submit-doc-fix-v1-1-18a4b51697db@amd.com Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 6de7c05d6bd8..99efe2b9b4ea 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -594,9 +594,9 @@ struct dma_descriptor_metadata_ops { * @phys: physical address of the descriptor * @chan: target channel for this operation * @tx_submit: accept the descriptor, assign ordered cookie and mark the + * descriptor pending. To be pushed on .issue_pending() call * @desc_free: driver's callback function to free a resusable descriptor * after completion - * descriptor pending. To be pushed on .issue_pending() call * @callback: routine to call after this operation is complete * @callback_result: error result from a DMA transaction * @callback_param: general parameter to pass to the callback routine -- cgit v1.2.3 From c9f62564252c21d739a5003e9b2d6ad0828aa7bd Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 30 Aug 2025 19:01:11 +0200 Subject: mtd: rawnand: s3c2410: Drop driver (no actual S3C64xx user) The s3c2410 NAND driver still supports S3C64xx platform, which in general is supported in the kernel. There are however no references of "s3c6400-nand" platform device ID or "s3c24xx-nand" driver, thus this driver cannot be instantiated for S3C64xx platform and is basically unused. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Miquel Raynal --- drivers/mtd/nand/raw/Kconfig | 26 - drivers/mtd/nand/raw/Makefile | 1 - drivers/mtd/nand/raw/s3c2410.c | 1192 ------------------------ include/linux/platform_data/mtd-nand-s3c2410.h | 70 -- 4 files changed, 1289 deletions(-) delete mode 100644 drivers/mtd/nand/raw/s3c2410.c delete mode 100644 include/linux/platform_data/mtd-nand-s3c2410.h (limited to 'include/linux') diff --git a/drivers/mtd/nand/raw/Kconfig b/drivers/mtd/nand/raw/Kconfig index 4b99d9c422c3..f053806333c3 100644 --- a/drivers/mtd/nand/raw/Kconfig +++ b/drivers/mtd/nand/raw/Kconfig @@ -77,32 +77,6 @@ config MTD_NAND_NDFC help NDFC Nand Flash Controllers are integrated in IBM/AMCC's 4xx SoCs -config MTD_NAND_S3C2410 - tristate "Samsung S3C NAND controller" - depends on ARCH_S3C64XX - help - This enables the NAND flash controller on the S3C24xx and S3C64xx - SoCs - - No board specific support is done by this driver, each board - must advertise a platform_device for the driver to attach. - -config MTD_NAND_S3C2410_DEBUG - bool "Samsung S3C NAND controller debug" - depends on MTD_NAND_S3C2410 - help - Enable debugging of the S3C NAND driver - -config MTD_NAND_S3C2410_CLKSTOP - bool "Samsung S3C NAND IDLE clock stop" - depends on MTD_NAND_S3C2410 - default n - help - Stop the clock to the NAND controller when there is no chip - selected to save power. This will mean there is a small delay - when the is NAND chip selected or released, but will save - approximately 5mA of power when there is nothing happening. - config MTD_NAND_SHARPSL tristate "Sharp SL Series (C7xx + others) NAND controller" depends on ARCH_PXA || COMPILE_TEST diff --git a/drivers/mtd/nand/raw/Makefile b/drivers/mtd/nand/raw/Makefile index 711d043ad4f8..363cd18eaf16 100644 --- a/drivers/mtd/nand/raw/Makefile +++ b/drivers/mtd/nand/raw/Makefile @@ -9,7 +9,6 @@ obj-$(CONFIG_MTD_NAND_DENALI) += denali.o obj-$(CONFIG_MTD_NAND_DENALI_PCI) += denali_pci.o obj-$(CONFIG_MTD_NAND_DENALI_DT) += denali_dt.o obj-$(CONFIG_MTD_NAND_AU1550) += au1550nd.o -obj-$(CONFIG_MTD_NAND_S3C2410) += s3c2410.o obj-$(CONFIG_MTD_NAND_DAVINCI) += davinci_nand.o obj-$(CONFIG_MTD_NAND_DISKONCHIP) += diskonchip.o obj-$(CONFIG_MTD_NAND_FSMC) += fsmc_nand.o diff --git a/drivers/mtd/nand/raw/s3c2410.c b/drivers/mtd/nand/raw/s3c2410.c deleted file mode 100644 index 33130d75c5ba..000000000000 --- a/drivers/mtd/nand/raw/s3c2410.c +++ /dev/null @@ -1,1192 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright © 2004-2008 Simtec Electronics - * http://armlinux.simtec.co.uk/ - * Ben Dooks - * - * Samsung S3C2410/S3C2440/S3C2412 NAND driver -*/ - -#define pr_fmt(fmt) "nand-s3c2410: " fmt - -#ifdef CONFIG_MTD_NAND_S3C2410_DEBUG -#define DEBUG -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include - -#define S3C2410_NFREG(x) (x) - -#define S3C2410_NFCONF S3C2410_NFREG(0x00) -#define S3C2410_NFCMD S3C2410_NFREG(0x04) -#define S3C2410_NFADDR S3C2410_NFREG(0x08) -#define S3C2410_NFDATA S3C2410_NFREG(0x0C) -#define S3C2410_NFSTAT S3C2410_NFREG(0x10) -#define S3C2410_NFECC S3C2410_NFREG(0x14) -#define S3C2440_NFCONT S3C2410_NFREG(0x04) -#define S3C2440_NFCMD S3C2410_NFREG(0x08) -#define S3C2440_NFADDR S3C2410_NFREG(0x0C) -#define S3C2440_NFDATA S3C2410_NFREG(0x10) -#define S3C2440_NFSTAT S3C2410_NFREG(0x20) -#define S3C2440_NFMECC0 S3C2410_NFREG(0x2C) -#define S3C2412_NFSTAT S3C2410_NFREG(0x28) -#define S3C2412_NFMECC0 S3C2410_NFREG(0x34) -#define S3C2410_NFCONF_EN (1<<15) -#define S3C2410_NFCONF_INITECC (1<<12) -#define S3C2410_NFCONF_nFCE (1<<11) -#define S3C2410_NFCONF_TACLS(x) ((x)<<8) -#define S3C2410_NFCONF_TWRPH0(x) ((x)<<4) -#define S3C2410_NFCONF_TWRPH1(x) ((x)<<0) -#define S3C2410_NFSTAT_BUSY (1<<0) -#define S3C2440_NFCONF_TACLS(x) ((x)<<12) -#define S3C2440_NFCONF_TWRPH0(x) ((x)<<8) -#define S3C2440_NFCONF_TWRPH1(x) ((x)<<4) -#define S3C2440_NFCONT_INITECC (1<<4) -#define S3C2440_NFCONT_nFCE (1<<1) -#define S3C2440_NFCONT_ENABLE (1<<0) -#define S3C2440_NFSTAT_READY (1<<0) -#define S3C2412_NFCONF_NANDBOOT (1<<31) -#define S3C2412_NFCONT_INIT_MAIN_ECC (1<<5) -#define S3C2412_NFCONT_nFCE0 (1<<1) -#define S3C2412_NFSTAT_READY (1<<0) - -/* new oob placement block for use with hardware ecc generation - */ -static int s3c2410_ooblayout_ecc(struct mtd_info *mtd, int section, - struct mtd_oob_region *oobregion) -{ - if (section) - return -ERANGE; - - oobregion->offset = 0; - oobregion->length = 3; - - return 0; -} - -static int s3c2410_ooblayout_free(struct mtd_info *mtd, int section, - struct mtd_oob_region *oobregion) -{ - if (section) - return -ERANGE; - - oobregion->offset = 8; - oobregion->length = 8; - - return 0; -} - -static const struct mtd_ooblayout_ops s3c2410_ooblayout_ops = { - .ecc = s3c2410_ooblayout_ecc, - .free = s3c2410_ooblayout_free, -}; - -/* controller and mtd information */ - -struct s3c2410_nand_info; - -/** - * struct s3c2410_nand_mtd - driver MTD structure - * @chip: The NAND chip information. - * @set: The platform information supplied for this set of NAND chips. - * @info: Link back to the hardware information. -*/ -struct s3c2410_nand_mtd { - struct nand_chip chip; - struct s3c2410_nand_set *set; - struct s3c2410_nand_info *info; -}; - -enum s3c_cpu_type { - TYPE_S3C2410, - TYPE_S3C2412, - TYPE_S3C2440, -}; - -enum s3c_nand_clk_state { - CLOCK_DISABLE = 0, - CLOCK_ENABLE, - CLOCK_SUSPEND, -}; - -/* overview of the s3c2410 nand state */ - -/** - * struct s3c2410_nand_info - NAND controller state. - * @controller: Base controller structure. - * @mtds: An array of MTD instances on this controller. - * @platform: The platform data for this board. - * @device: The platform device we bound to. - * @clk: The clock resource for this controller. - * @regs: The area mapped for the hardware registers. - * @sel_reg: Pointer to the register controlling the NAND selection. - * @sel_bit: The bit in @sel_reg to select the NAND chip. - * @mtd_count: The number of MTDs created from this controller. - * @save_sel: The contents of @sel_reg to be saved over suspend. - * @clk_rate: The clock rate from @clk. - * @clk_state: The current clock state. - * @cpu_type: The exact type of this controller. - */ -struct s3c2410_nand_info { - /* mtd info */ - struct nand_controller controller; - struct s3c2410_nand_mtd *mtds; - struct s3c2410_platform_nand *platform; - - /* device info */ - struct device *device; - struct clk *clk; - void __iomem *regs; - void __iomem *sel_reg; - int sel_bit; - int mtd_count; - unsigned long save_sel; - unsigned long clk_rate; - enum s3c_nand_clk_state clk_state; - - enum s3c_cpu_type cpu_type; -}; - -struct s3c24XX_nand_devtype_data { - enum s3c_cpu_type type; -}; - -/* conversion functions */ - -static struct s3c2410_nand_mtd *s3c2410_nand_mtd_toours(struct mtd_info *mtd) -{ - return container_of(mtd_to_nand(mtd), struct s3c2410_nand_mtd, - chip); -} - -static struct s3c2410_nand_info *s3c2410_nand_mtd_toinfo(struct mtd_info *mtd) -{ - return s3c2410_nand_mtd_toours(mtd)->info; -} - -static struct s3c2410_nand_info *to_nand_info(struct platform_device *dev) -{ - return platform_get_drvdata(dev); -} - -static struct s3c2410_platform_nand *to_nand_plat(struct platform_device *dev) -{ - return dev_get_platdata(&dev->dev); -} - -static inline int allow_clk_suspend(struct s3c2410_nand_info *info) -{ -#ifdef CONFIG_MTD_NAND_S3C2410_CLKSTOP - return 1; -#else - return 0; -#endif -} - -/** - * s3c2410_nand_clk_set_state - Enable, disable or suspend NAND clock. - * @info: The controller instance. - * @new_state: State to which clock should be set. - */ -static void s3c2410_nand_clk_set_state(struct s3c2410_nand_info *info, - enum s3c_nand_clk_state new_state) -{ - if (!allow_clk_suspend(info) && new_state == CLOCK_SUSPEND) - return; - - if (info->clk_state == CLOCK_ENABLE) { - if (new_state != CLOCK_ENABLE) - clk_disable_unprepare(info->clk); - } else { - if (new_state == CLOCK_ENABLE) - clk_prepare_enable(info->clk); - } - - info->clk_state = new_state; -} - -/* timing calculations */ - -#define NS_IN_KHZ 1000000 - -/** - * s3c_nand_calc_rate - calculate timing data. - * @wanted: The cycle time in nanoseconds. - * @clk: The clock rate in kHz. - * @max: The maximum divider value. - * - * Calculate the timing value from the given parameters. - */ -static int s3c_nand_calc_rate(int wanted, unsigned long clk, int max) -{ - int result; - - result = DIV_ROUND_UP((wanted * clk), NS_IN_KHZ); - - pr_debug("result %d from %ld, %d\n", result, clk, wanted); - - if (result > max) { - pr_err("%d ns is too big for current clock rate %ld\n", - wanted, clk); - return -1; - } - - if (result < 1) - result = 1; - - return result; -} - -#define to_ns(ticks, clk) (((ticks) * NS_IN_KHZ) / (unsigned int)(clk)) - -/* controller setup */ - -/** - * s3c2410_nand_setrate - setup controller timing information. - * @info: The controller instance. - * - * Given the information supplied by the platform, calculate and set - * the necessary timing registers in the hardware to generate the - * necessary timing cycles to the hardware. - */ -static int s3c2410_nand_setrate(struct s3c2410_nand_info *info) -{ - struct s3c2410_platform_nand *plat = info->platform; - int tacls_max = (info->cpu_type == TYPE_S3C2412) ? 8 : 4; - int tacls, twrph0, twrph1; - unsigned long clkrate = clk_get_rate(info->clk); - unsigned long set, cfg, mask; - unsigned long flags; - - /* calculate the timing information for the controller */ - - info->clk_rate = clkrate; - clkrate /= 1000; /* turn clock into kHz for ease of use */ - - if (plat != NULL) { - tacls = s3c_nand_calc_rate(plat->tacls, clkrate, tacls_max); - twrph0 = s3c_nand_calc_rate(plat->twrph0, clkrate, 8); - twrph1 = s3c_nand_calc_rate(plat->twrph1, clkrate, 8); - } else { - /* default timings */ - tacls = tacls_max; - twrph0 = 8; - twrph1 = 8; - } - - if (tacls < 0 || twrph0 < 0 || twrph1 < 0) { - dev_err(info->device, "cannot get suitable timings\n"); - return -EINVAL; - } - - dev_info(info->device, "Tacls=%d, %dns Twrph0=%d %dns, Twrph1=%d %dns\n", - tacls, to_ns(tacls, clkrate), twrph0, to_ns(twrph0, clkrate), - twrph1, to_ns(twrph1, clkrate)); - - switch (info->cpu_type) { - case TYPE_S3C2410: - mask = (S3C2410_NFCONF_TACLS(3) | - S3C2410_NFCONF_TWRPH0(7) | - S3C2410_NFCONF_TWRPH1(7)); - set = S3C2410_NFCONF_EN; - set |= S3C2410_NFCONF_TACLS(tacls - 1); - set |= S3C2410_NFCONF_TWRPH0(twrph0 - 1); - set |= S3C2410_NFCONF_TWRPH1(twrph1 - 1); - break; - - case TYPE_S3C2440: - case TYPE_S3C2412: - mask = (S3C2440_NFCONF_TACLS(tacls_max - 1) | - S3C2440_NFCONF_TWRPH0(7) | - S3C2440_NFCONF_TWRPH1(7)); - - set = S3C2440_NFCONF_TACLS(tacls - 1); - set |= S3C2440_NFCONF_TWRPH0(twrph0 - 1); - set |= S3C2440_NFCONF_TWRPH1(twrph1 - 1); - break; - - default: - BUG(); - } - - local_irq_save(flags); - - cfg = readl(info->regs + S3C2410_NFCONF); - cfg &= ~mask; - cfg |= set; - writel(cfg, info->regs + S3C2410_NFCONF); - - local_irq_restore(flags); - - dev_dbg(info->device, "NF_CONF is 0x%lx\n", cfg); - - return 0; -} - -/** - * s3c2410_nand_inithw - basic hardware initialisation - * @info: The hardware state. - * - * Do the basic initialisation of the hardware, using s3c2410_nand_setrate() - * to setup the hardware access speeds and set the controller to be enabled. -*/ -static int s3c2410_nand_inithw(struct s3c2410_nand_info *info) -{ - int ret; - - ret = s3c2410_nand_setrate(info); - if (ret < 0) - return ret; - - switch (info->cpu_type) { - case TYPE_S3C2410: - default: - break; - - case TYPE_S3C2440: - case TYPE_S3C2412: - /* enable the controller and de-assert nFCE */ - - writel(S3C2440_NFCONT_ENABLE, info->regs + S3C2440_NFCONT); - } - - return 0; -} - -/** - * s3c2410_nand_select_chip - select the given nand chip - * @this: NAND chip object. - * @chip: The chip number. - * - * This is called by the MTD layer to either select a given chip for the - * @mtd instance, or to indicate that the access has finished and the - * chip can be de-selected. - * - * The routine ensures that the nFCE line is correctly setup, and any - * platform specific selection code is called to route nFCE to the specific - * chip. - */ -static void s3c2410_nand_select_chip(struct nand_chip *this, int chip) -{ - struct s3c2410_nand_info *info; - struct s3c2410_nand_mtd *nmtd; - unsigned long cur; - - nmtd = nand_get_controller_data(this); - info = nmtd->info; - - if (chip != -1) - s3c2410_nand_clk_set_state(info, CLOCK_ENABLE); - - cur = readl(info->sel_reg); - - if (chip == -1) { - cur |= info->sel_bit; - } else { - if (nmtd->set != NULL && chip > nmtd->set->nr_chips) { - dev_err(info->device, "invalid chip %d\n", chip); - return; - } - - if (info->platform != NULL) { - if (info->platform->select_chip != NULL) - (info->platform->select_chip) (nmtd->set, chip); - } - - cur &= ~info->sel_bit; - } - - writel(cur, info->sel_reg); - - if (chip == -1) - s3c2410_nand_clk_set_state(info, CLOCK_SUSPEND); -} - -/* s3c2410_nand_hwcontrol - * - * Issue command and address cycles to the chip -*/ - -static void s3c2410_nand_hwcontrol(struct nand_chip *chip, int cmd, - unsigned int ctrl) -{ - struct mtd_info *mtd = nand_to_mtd(chip); - struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd); - - if (cmd == NAND_CMD_NONE) - return; - - if (ctrl & NAND_CLE) - writeb(cmd, info->regs + S3C2410_NFCMD); - else - writeb(cmd, info->regs + S3C2410_NFADDR); -} - -/* command and control functions */ - -static void s3c2440_nand_hwcontrol(struct nand_chip *chip, int cmd, - unsigned int ctrl) -{ - struct mtd_info *mtd = nand_to_mtd(chip); - struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd); - - if (cmd == NAND_CMD_NONE) - return; - - if (ctrl & NAND_CLE) - writeb(cmd, info->regs + S3C2440_NFCMD); - else - writeb(cmd, info->regs + S3C2440_NFADDR); -} - -/* s3c2410_nand_devready() - * - * returns 0 if the nand is busy, 1 if it is ready -*/ - -static int s3c2410_nand_devready(struct nand_chip *chip) -{ - struct mtd_info *mtd = nand_to_mtd(chip); - struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd); - return readb(info->regs + S3C2410_NFSTAT) & S3C2410_NFSTAT_BUSY; -} - -static int s3c2440_nand_devready(struct nand_chip *chip) -{ - struct mtd_info *mtd = nand_to_mtd(chip); - struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd); - return readb(info->regs + S3C2440_NFSTAT) & S3C2440_NFSTAT_READY; -} - -static int s3c2412_nand_devready(struct nand_chip *chip) -{ - struct mtd_info *mtd = nand_to_mtd(chip); - struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd); - return readb(info->regs + S3C2412_NFSTAT) & S3C2412_NFSTAT_READY; -} - -/* ECC handling functions */ - -static int s3c2410_nand_correct_data(struct nand_chip *chip, u_char *dat, - u_char *read_ecc, u_char *calc_ecc) -{ - struct mtd_info *mtd = nand_to_mtd(chip); - struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd); - unsigned int diff0, diff1, diff2; - unsigned int bit, byte; - - pr_debug("%s(%p,%p,%p,%p)\n", __func__, mtd, dat, read_ecc, calc_ecc); - - diff0 = read_ecc[0] ^ calc_ecc[0]; - diff1 = read_ecc[1] ^ calc_ecc[1]; - diff2 = read_ecc[2] ^ calc_ecc[2]; - - pr_debug("%s: rd %*phN calc %*phN diff %02x%02x%02x\n", - __func__, 3, read_ecc, 3, calc_ecc, - diff0, diff1, diff2); - - if (diff0 == 0 && diff1 == 0 && diff2 == 0) - return 0; /* ECC is ok */ - - /* sometimes people do not think about using the ECC, so check - * to see if we have an 0xff,0xff,0xff read ECC and then ignore - * the error, on the assumption that this is an un-eccd page. - */ - if (read_ecc[0] == 0xff && read_ecc[1] == 0xff && read_ecc[2] == 0xff - && info->platform->ignore_unset_ecc) - return 0; - - /* Can we correct this ECC (ie, one row and column change). - * Note, this is similar to the 256 error code on smartmedia */ - - if (((diff0 ^ (diff0 >> 1)) & 0x55) == 0x55 && - ((diff1 ^ (diff1 >> 1)) & 0x55) == 0x55 && - ((diff2 ^ (diff2 >> 1)) & 0x55) == 0x55) { - /* calculate the bit position of the error */ - - bit = ((diff2 >> 3) & 1) | - ((diff2 >> 4) & 2) | - ((diff2 >> 5) & 4); - - /* calculate the byte position of the error */ - - byte = ((diff2 << 7) & 0x100) | - ((diff1 << 0) & 0x80) | - ((diff1 << 1) & 0x40) | - ((diff1 << 2) & 0x20) | - ((diff1 << 3) & 0x10) | - ((diff0 >> 4) & 0x08) | - ((diff0 >> 3) & 0x04) | - ((diff0 >> 2) & 0x02) | - ((diff0 >> 1) & 0x01); - - dev_dbg(info->device, "correcting error bit %d, byte %d\n", - bit, byte); - - dat[byte] ^= (1 << bit); - return 1; - } - - /* if there is only one bit difference in the ECC, then - * one of only a row or column parity has changed, which - * means the error is most probably in the ECC itself */ - - diff0 |= (diff1 << 8); - diff0 |= (diff2 << 16); - - /* equal to "(diff0 & ~(1 << __ffs(diff0)))" */ - if ((diff0 & (diff0 - 1)) == 0) - return 1; - - return -1; -} - -/* ECC functions - * - * These allow the s3c2410 and s3c2440 to use the controller's ECC - * generator block to ECC the data as it passes through] -*/ - -static void s3c2410_nand_enable_hwecc(struct nand_chip *chip, int mode) -{ - struct s3c2410_nand_info *info; - unsigned long ctrl; - - info = s3c2410_nand_mtd_toinfo(nand_to_mtd(chip)); - ctrl = readl(info->regs + S3C2410_NFCONF); - ctrl |= S3C2410_NFCONF_INITECC; - writel(ctrl, info->regs + S3C2410_NFCONF); -} - -static void s3c2412_nand_enable_hwecc(struct nand_chip *chip, int mode) -{ - struct s3c2410_nand_info *info; - unsigned long ctrl; - - info = s3c2410_nand_mtd_toinfo(nand_to_mtd(chip)); - ctrl = readl(info->regs + S3C2440_NFCONT); - writel(ctrl | S3C2412_NFCONT_INIT_MAIN_ECC, - info->regs + S3C2440_NFCONT); -} - -static void s3c2440_nand_enable_hwecc(struct nand_chip *chip, int mode) -{ - struct s3c2410_nand_info *info; - unsigned long ctrl; - - info = s3c2410_nand_mtd_toinfo(nand_to_mtd(chip)); - ctrl = readl(info->regs + S3C2440_NFCONT); - writel(ctrl | S3C2440_NFCONT_INITECC, info->regs + S3C2440_NFCONT); -} - -static int s3c2410_nand_calculate_ecc(struct nand_chip *chip, - const u_char *dat, u_char *ecc_code) -{ - struct mtd_info *mtd = nand_to_mtd(chip); - struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd); - - ecc_code[0] = readb(info->regs + S3C2410_NFECC + 0); - ecc_code[1] = readb(info->regs + S3C2410_NFECC + 1); - ecc_code[2] = readb(info->regs + S3C2410_NFECC + 2); - - pr_debug("%s: returning ecc %*phN\n", __func__, 3, ecc_code); - - return 0; -} - -static int s3c2412_nand_calculate_ecc(struct nand_chip *chip, - const u_char *dat, u_char *ecc_code) -{ - struct mtd_info *mtd = nand_to_mtd(chip); - struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd); - unsigned long ecc = readl(info->regs + S3C2412_NFMECC0); - - ecc_code[0] = ecc; - ecc_code[1] = ecc >> 8; - ecc_code[2] = ecc >> 16; - - pr_debug("%s: returning ecc %*phN\n", __func__, 3, ecc_code); - - return 0; -} - -static int s3c2440_nand_calculate_ecc(struct nand_chip *chip, - const u_char *dat, u_char *ecc_code) -{ - struct mtd_info *mtd = nand_to_mtd(chip); - struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd); - unsigned long ecc = readl(info->regs + S3C2440_NFMECC0); - - ecc_code[0] = ecc; - ecc_code[1] = ecc >> 8; - ecc_code[2] = ecc >> 16; - - pr_debug("%s: returning ecc %06lx\n", __func__, ecc & 0xffffff); - - return 0; -} - -/* over-ride the standard functions for a little more speed. We can - * use read/write block to move the data buffers to/from the controller -*/ - -static void s3c2410_nand_read_buf(struct nand_chip *this, u_char *buf, int len) -{ - readsb(this->legacy.IO_ADDR_R, buf, len); -} - -static void s3c2440_nand_read_buf(struct nand_chip *this, u_char *buf, int len) -{ - struct mtd_info *mtd = nand_to_mtd(this); - struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd); - - readsl(info->regs + S3C2440_NFDATA, buf, len >> 2); - - /* cleanup if we've got less than a word to do */ - if (len & 3) { - buf += len & ~3; - - for (; len & 3; len--) - *buf++ = readb(info->regs + S3C2440_NFDATA); - } -} - -static void s3c2410_nand_write_buf(struct nand_chip *this, const u_char *buf, - int len) -{ - writesb(this->legacy.IO_ADDR_W, buf, len); -} - -static void s3c2440_nand_write_buf(struct nand_chip *this, const u_char *buf, - int len) -{ - struct mtd_info *mtd = nand_to_mtd(this); - struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd); - - writesl(info->regs + S3C2440_NFDATA, buf, len >> 2); - - /* cleanup any fractional write */ - if (len & 3) { - buf += len & ~3; - - for (; len & 3; len--, buf++) - writeb(*buf, info->regs + S3C2440_NFDATA); - } -} - -/* device management functions */ - -static void s3c24xx_nand_remove(struct platform_device *pdev) -{ - struct s3c2410_nand_info *info = to_nand_info(pdev); - - if (info == NULL) - return; - - /* Release all our mtds and their partitions, then go through - * freeing the resources used - */ - - if (info->mtds != NULL) { - struct s3c2410_nand_mtd *ptr = info->mtds; - int mtdno; - - for (mtdno = 0; mtdno < info->mtd_count; mtdno++, ptr++) { - pr_debug("releasing mtd %d (%p)\n", mtdno, ptr); - WARN_ON(mtd_device_unregister(nand_to_mtd(&ptr->chip))); - nand_cleanup(&ptr->chip); - } - } - - /* free the common resources */ - - if (!IS_ERR(info->clk)) - s3c2410_nand_clk_set_state(info, CLOCK_DISABLE); -} - -static int s3c2410_nand_add_partition(struct s3c2410_nand_info *info, - struct s3c2410_nand_mtd *mtd, - struct s3c2410_nand_set *set) -{ - if (set) { - struct mtd_info *mtdinfo = nand_to_mtd(&mtd->chip); - - mtdinfo->name = set->name; - - return mtd_device_register(mtdinfo, set->partitions, - set->nr_partitions); - } - - return -ENODEV; -} - -static int s3c2410_nand_setup_interface(struct nand_chip *chip, int csline, - const struct nand_interface_config *conf) -{ - struct mtd_info *mtd = nand_to_mtd(chip); - struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd); - struct s3c2410_platform_nand *pdata = info->platform; - const struct nand_sdr_timings *timings; - int tacls; - - timings = nand_get_sdr_timings(conf); - if (IS_ERR(timings)) - return -ENOTSUPP; - - tacls = timings->tCLS_min - timings->tWP_min; - if (tacls < 0) - tacls = 0; - - pdata->tacls = DIV_ROUND_UP(tacls, 1000); - pdata->twrph0 = DIV_ROUND_UP(timings->tWP_min, 1000); - pdata->twrph1 = DIV_ROUND_UP(timings->tCLH_min, 1000); - - return s3c2410_nand_setrate(info); -} - -/** - * s3c2410_nand_init_chip - initialise a single instance of an chip - * @info: The base NAND controller the chip is on. - * @nmtd: The new controller MTD instance to fill in. - * @set: The information passed from the board specific platform data. - * - * Initialise the given @nmtd from the information in @info and @set. This - * readies the structure for use with the MTD layer functions by ensuring - * all pointers are setup and the necessary control routines selected. - */ -static void s3c2410_nand_init_chip(struct s3c2410_nand_info *info, - struct s3c2410_nand_mtd *nmtd, - struct s3c2410_nand_set *set) -{ - struct device_node *np = info->device->of_node; - struct nand_chip *chip = &nmtd->chip; - void __iomem *regs = info->regs; - - nand_set_flash_node(chip, set->of_node); - - chip->legacy.write_buf = s3c2410_nand_write_buf; - chip->legacy.read_buf = s3c2410_nand_read_buf; - chip->legacy.select_chip = s3c2410_nand_select_chip; - chip->legacy.chip_delay = 50; - nand_set_controller_data(chip, nmtd); - chip->options = set->options; - chip->controller = &info->controller; - - /* - * let's keep behavior unchanged for legacy boards booting via pdata and - * auto-detect timings only when booting with a device tree. - */ - if (!np) - chip->options |= NAND_KEEP_TIMINGS; - - switch (info->cpu_type) { - case TYPE_S3C2410: - chip->legacy.IO_ADDR_W = regs + S3C2410_NFDATA; - info->sel_reg = regs + S3C2410_NFCONF; - info->sel_bit = S3C2410_NFCONF_nFCE; - chip->legacy.cmd_ctrl = s3c2410_nand_hwcontrol; - chip->legacy.dev_ready = s3c2410_nand_devready; - break; - - case TYPE_S3C2440: - chip->legacy.IO_ADDR_W = regs + S3C2440_NFDATA; - info->sel_reg = regs + S3C2440_NFCONT; - info->sel_bit = S3C2440_NFCONT_nFCE; - chip->legacy.cmd_ctrl = s3c2440_nand_hwcontrol; - chip->legacy.dev_ready = s3c2440_nand_devready; - chip->legacy.read_buf = s3c2440_nand_read_buf; - chip->legacy.write_buf = s3c2440_nand_write_buf; - break; - - case TYPE_S3C2412: - chip->legacy.IO_ADDR_W = regs + S3C2440_NFDATA; - info->sel_reg = regs + S3C2440_NFCONT; - info->sel_bit = S3C2412_NFCONT_nFCE0; - chip->legacy.cmd_ctrl = s3c2440_nand_hwcontrol; - chip->legacy.dev_ready = s3c2412_nand_devready; - - if (readl(regs + S3C2410_NFCONF) & S3C2412_NFCONF_NANDBOOT) - dev_info(info->device, "System booted from NAND\n"); - - break; - } - - chip->legacy.IO_ADDR_R = chip->legacy.IO_ADDR_W; - - nmtd->info = info; - nmtd->set = set; - - chip->ecc.engine_type = info->platform->engine_type; - - /* - * If you use u-boot BBT creation code, specifying this flag will - * let the kernel fish out the BBT from the NAND. - */ - if (set->flash_bbt) - chip->bbt_options |= NAND_BBT_USE_FLASH; -} - -/** - * s3c2410_nand_attach_chip - Init the ECC engine after NAND scan - * @chip: The NAND chip - * - * This hook is called by the core after the identification of the NAND chip, - * once the relevant per-chip information is up to date.. This call ensure that - * we update the internal state accordingly. - * - * The internal state is currently limited to the ECC state information. -*/ -static int s3c2410_nand_attach_chip(struct nand_chip *chip) -{ - struct mtd_info *mtd = nand_to_mtd(chip); - struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd); - - switch (chip->ecc.engine_type) { - - case NAND_ECC_ENGINE_TYPE_NONE: - dev_info(info->device, "ECC disabled\n"); - break; - - case NAND_ECC_ENGINE_TYPE_SOFT: - /* - * This driver expects Hamming based ECC when engine_type is set - * to NAND_ECC_ENGINE_TYPE_SOFT. Force ecc.algo to - * NAND_ECC_ALGO_HAMMING to avoid adding an extra ecc_algo field - * to s3c2410_platform_nand. - */ - chip->ecc.algo = NAND_ECC_ALGO_HAMMING; - dev_info(info->device, "soft ECC\n"); - break; - - case NAND_ECC_ENGINE_TYPE_ON_HOST: - chip->ecc.calculate = s3c2410_nand_calculate_ecc; - chip->ecc.correct = s3c2410_nand_correct_data; - chip->ecc.strength = 1; - - switch (info->cpu_type) { - case TYPE_S3C2410: - chip->ecc.hwctl = s3c2410_nand_enable_hwecc; - chip->ecc.calculate = s3c2410_nand_calculate_ecc; - break; - - case TYPE_S3C2412: - chip->ecc.hwctl = s3c2412_nand_enable_hwecc; - chip->ecc.calculate = s3c2412_nand_calculate_ecc; - break; - - case TYPE_S3C2440: - chip->ecc.hwctl = s3c2440_nand_enable_hwecc; - chip->ecc.calculate = s3c2440_nand_calculate_ecc; - break; - } - - dev_dbg(info->device, "chip %p => page shift %d\n", - chip, chip->page_shift); - - /* change the behaviour depending on whether we are using - * the large or small page nand device */ - if (chip->page_shift > 10) { - chip->ecc.size = 256; - chip->ecc.bytes = 3; - } else { - chip->ecc.size = 512; - chip->ecc.bytes = 3; - mtd_set_ooblayout(nand_to_mtd(chip), - &s3c2410_ooblayout_ops); - } - - dev_info(info->device, "hardware ECC\n"); - break; - - default: - dev_err(info->device, "invalid ECC mode!\n"); - return -EINVAL; - } - - if (chip->bbt_options & NAND_BBT_USE_FLASH) - chip->options |= NAND_SKIP_BBTSCAN; - - return 0; -} - -static const struct nand_controller_ops s3c24xx_nand_controller_ops = { - .attach_chip = s3c2410_nand_attach_chip, - .setup_interface = s3c2410_nand_setup_interface, -}; - -static int s3c24xx_nand_probe_dt(struct platform_device *pdev) -{ - const struct s3c24XX_nand_devtype_data *devtype_data; - struct s3c2410_platform_nand *pdata; - struct s3c2410_nand_info *info = platform_get_drvdata(pdev); - struct device_node *np = pdev->dev.of_node, *child; - struct s3c2410_nand_set *sets; - - devtype_data = of_device_get_match_data(&pdev->dev); - if (!devtype_data) - return -ENODEV; - - info->cpu_type = devtype_data->type; - - pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); - if (!pdata) - return -ENOMEM; - - pdev->dev.platform_data = pdata; - - pdata->nr_sets = of_get_child_count(np); - if (!pdata->nr_sets) - return 0; - - sets = devm_kcalloc(&pdev->dev, pdata->nr_sets, sizeof(*sets), - GFP_KERNEL); - if (!sets) - return -ENOMEM; - - pdata->sets = sets; - - for_each_available_child_of_node(np, child) { - sets->name = (char *)child->name; - sets->of_node = child; - sets->nr_chips = 1; - - of_node_get(child); - - sets++; - } - - return 0; -} - -static int s3c24xx_nand_probe_pdata(struct platform_device *pdev) -{ - struct s3c2410_nand_info *info = platform_get_drvdata(pdev); - - info->cpu_type = platform_get_device_id(pdev)->driver_data; - - return 0; -} - -/* s3c24xx_nand_probe - * - * called by device layer when it finds a device matching - * one our driver can handled. This code checks to see if - * it can allocate all necessary resources then calls the - * nand layer to look for devices -*/ -static int s3c24xx_nand_probe(struct platform_device *pdev) -{ - struct s3c2410_platform_nand *plat; - struct s3c2410_nand_info *info; - struct s3c2410_nand_mtd *nmtd; - struct s3c2410_nand_set *sets; - struct resource *res; - int err = 0; - int size; - int nr_sets; - int setno; - - info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL); - if (info == NULL) { - err = -ENOMEM; - goto exit_error; - } - - platform_set_drvdata(pdev, info); - - nand_controller_init(&info->controller); - info->controller.ops = &s3c24xx_nand_controller_ops; - - /* get the clock source and enable it */ - - info->clk = devm_clk_get(&pdev->dev, "nand"); - if (IS_ERR(info->clk)) { - dev_err(&pdev->dev, "failed to get clock\n"); - err = -ENOENT; - goto exit_error; - } - - s3c2410_nand_clk_set_state(info, CLOCK_ENABLE); - - if (pdev->dev.of_node) - err = s3c24xx_nand_probe_dt(pdev); - else - err = s3c24xx_nand_probe_pdata(pdev); - - if (err) - goto exit_error; - - plat = to_nand_plat(pdev); - - /* allocate and map the resource */ - - /* currently we assume we have the one resource */ - res = pdev->resource; - size = resource_size(res); - - info->device = &pdev->dev; - info->platform = plat; - - info->regs = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(info->regs)) { - err = PTR_ERR(info->regs); - goto exit_error; - } - - dev_dbg(&pdev->dev, "mapped registers at %p\n", info->regs); - - if (!plat->sets || plat->nr_sets < 1) { - err = -EINVAL; - goto exit_error; - } - - sets = plat->sets; - nr_sets = plat->nr_sets; - - info->mtd_count = nr_sets; - - /* allocate our information */ - - size = nr_sets * sizeof(*info->mtds); - info->mtds = devm_kzalloc(&pdev->dev, size, GFP_KERNEL); - if (info->mtds == NULL) { - err = -ENOMEM; - goto exit_error; - } - - /* initialise all possible chips */ - - nmtd = info->mtds; - - for (setno = 0; setno < nr_sets; setno++, nmtd++, sets++) { - struct mtd_info *mtd = nand_to_mtd(&nmtd->chip); - - pr_debug("initialising set %d (%p, info %p)\n", - setno, nmtd, info); - - mtd->dev.parent = &pdev->dev; - s3c2410_nand_init_chip(info, nmtd, sets); - - err = nand_scan(&nmtd->chip, sets ? sets->nr_chips : 1); - if (err) - goto exit_error; - - s3c2410_nand_add_partition(info, nmtd, sets); - } - - /* initialise the hardware */ - err = s3c2410_nand_inithw(info); - if (err != 0) - goto exit_error; - - if (allow_clk_suspend(info)) { - dev_info(&pdev->dev, "clock idle support enabled\n"); - s3c2410_nand_clk_set_state(info, CLOCK_SUSPEND); - } - - return 0; - - exit_error: - s3c24xx_nand_remove(pdev); - - if (err == 0) - err = -EINVAL; - return err; -} - -/* PM Support */ -#ifdef CONFIG_PM - -static int s3c24xx_nand_suspend(struct platform_device *dev, pm_message_t pm) -{ - struct s3c2410_nand_info *info = platform_get_drvdata(dev); - - if (info) { - info->save_sel = readl(info->sel_reg); - - /* For the moment, we must ensure nFCE is high during - * the time we are suspended. This really should be - * handled by suspending the MTDs we are using, but - * that is currently not the case. */ - - writel(info->save_sel | info->sel_bit, info->sel_reg); - - s3c2410_nand_clk_set_state(info, CLOCK_DISABLE); - } - - return 0; -} - -static int s3c24xx_nand_resume(struct platform_device *dev) -{ - struct s3c2410_nand_info *info = platform_get_drvdata(dev); - unsigned long sel; - - if (info) { - s3c2410_nand_clk_set_state(info, CLOCK_ENABLE); - s3c2410_nand_inithw(info); - - /* Restore the state of the nFCE line. */ - - sel = readl(info->sel_reg); - sel &= ~info->sel_bit; - sel |= info->save_sel & info->sel_bit; - writel(sel, info->sel_reg); - - s3c2410_nand_clk_set_state(info, CLOCK_SUSPEND); - } - - return 0; -} - -#else -#define s3c24xx_nand_suspend NULL -#define s3c24xx_nand_resume NULL -#endif - -/* driver device registration */ - -static const struct platform_device_id s3c24xx_driver_ids[] = { - { - .name = "s3c6400-nand", - .driver_data = TYPE_S3C2412, /* compatible with 2412 */ - }, - { } -}; - -MODULE_DEVICE_TABLE(platform, s3c24xx_driver_ids); - -static struct platform_driver s3c24xx_nand_driver = { - .probe = s3c24xx_nand_probe, - .remove = s3c24xx_nand_remove, - .suspend = s3c24xx_nand_suspend, - .resume = s3c24xx_nand_resume, - .id_table = s3c24xx_driver_ids, - .driver = { - .name = "s3c24xx-nand", - }, -}; - -module_platform_driver(s3c24xx_nand_driver); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Ben Dooks "); -MODULE_DESCRIPTION("S3C24XX MTD NAND driver"); diff --git a/include/linux/platform_data/mtd-nand-s3c2410.h b/include/linux/platform_data/mtd-nand-s3c2410.h deleted file mode 100644 index 25390fc3e795..000000000000 --- a/include/linux/platform_data/mtd-nand-s3c2410.h +++ /dev/null @@ -1,70 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2004 Simtec Electronics - * Ben Dooks - * - * S3C2410 - NAND device controller platform_device info -*/ - -#ifndef __MTD_NAND_S3C2410_H -#define __MTD_NAND_S3C2410_H - -#include - -/** - * struct s3c2410_nand_set - define a set of one or more nand chips - * @flash_bbt: Openmoko u-boot can create a Bad Block Table - * Setting this flag will allow the kernel to - * look for it at boot time and also skip the NAND - * scan. - * @options: Default value to set into 'struct nand_chip' options. - * @nr_chips: Number of chips in this set - * @nr_partitions: Number of partitions pointed to by @partitions - * @name: Name of set (optional) - * @nr_map: Map for low-layer logical to physical chip numbers (option) - * @partitions: The mtd partition list - * - * define a set of one or more nand chips registered with an unique mtd. Also - * allows to pass flag to the underlying NAND layer. 'disable_ecc' will trigger - * a warning at boot time. - */ -struct s3c2410_nand_set { - unsigned int flash_bbt:1; - - unsigned int options; - int nr_chips; - int nr_partitions; - char *name; - int *nr_map; - struct mtd_partition *partitions; - struct device_node *of_node; -}; - -struct s3c2410_platform_nand { - /* timing information for controller, all times in nanoseconds */ - - int tacls; /* time for active CLE/ALE to nWE/nOE */ - int twrph0; /* active time for nWE/nOE */ - int twrph1; /* time for release CLE/ALE from nWE/nOE inactive */ - - unsigned int ignore_unset_ecc:1; - - enum nand_ecc_engine_type engine_type; - - int nr_sets; - struct s3c2410_nand_set *sets; - - void (*select_chip)(struct s3c2410_nand_set *, - int chip); -}; - -/** - * s3c_nand_set_platdata() - register NAND platform data. - * @nand: The NAND platform data to register with s3c_device_nand. - * - * This function copies the given NAND platform data, @nand and registers - * it with the s3c_device_nand. This allows @nand to be __initdata. -*/ -extern void s3c_nand_set_platdata(struct s3c2410_platform_nand *nand); - -#endif /*__MTD_NAND_S3C2410_H */ -- cgit v1.2.3 From 65128868bb3b0621d2d8e71f19852675a064b373 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 29 Aug 2025 15:29:04 -0700 Subject: mm/memory_hotplug: Update comment for hotplug memory callback priorities Add clarification to comment for memory hotplug callback ordering as the current comment does not provide clear language on which callback happens first. Acked-by: David Hildenbrand Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20250829222907.1290912-2-dave.jiang@intel.com Signed-off-by: Dave Jiang --- include/linux/memory.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memory.h b/include/linux/memory.h index 40eb70ccb09d..1305102688d0 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -115,8 +115,8 @@ struct notifier_block; struct mem_section; /* - * Priorities for the hotplug memory callback routines (stored in decreasing - * order in the callback chain) + * Priorities for the hotplug memory callback routines. Invoked from + * high to low. Higher priorities correspond to higher numbers. */ #define DEFAULT_CALLBACK_PRI 0 #define SLAB_CALLBACK_PRI 1 -- cgit v1.2.3 From b57fc652ca24ada3b0c888327f9944ed21559286 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 29 Aug 2025 15:29:05 -0700 Subject: drivers/base/node: Add a helper function node_update_perf_attrs() Add helper function node_update_perf_attrs() to allow update of node access coordinates computed by an external agent such as CXL. The helper allows updating of coordinates after the attribute being created by HMAT. Acked-by: David Hildenbrand Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20250829222907.1290912-3-dave.jiang@intel.com Signed-off-by: Dave Jiang --- drivers/base/node.c | 38 ++++++++++++++++++++++++++++++++++++++ include/linux/node.h | 8 ++++++++ 2 files changed, 46 insertions(+) (limited to 'include/linux') diff --git a/drivers/base/node.c b/drivers/base/node.c index 3399594136b2..db18a1c32637 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -248,6 +248,44 @@ void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord, } EXPORT_SYMBOL_GPL(node_set_perf_attrs); +/** + * node_update_perf_attrs - Update the performance values for given access class + * @nid: Node identifier to be updated + * @coord: Heterogeneous memory performance coordinates + * @access: The access class for the given attributes + */ +void node_update_perf_attrs(unsigned int nid, struct access_coordinate *coord, + enum access_coordinate_class access) +{ + struct node_access_nodes *access_node; + struct node *node; + int i; + + if (WARN_ON_ONCE(!node_online(nid))) + return; + + node = node_devices[nid]; + list_for_each_entry(access_node, &node->access_list, list_node) { + if (access_node->access != access) + continue; + + access_node->coord = *coord; + for (i = 0; access_attrs[i]; i++) { + sysfs_notify(&access_node->dev.kobj, + NULL, access_attrs[i]->name); + } + break; + } + + /* When setting CPU access coordinates, update mempolicy */ + if (access != ACCESS_COORDINATE_CPU) + return; + + if (mempolicy_set_node_perf(nid, coord)) + pr_info("failed to set mempolicy attrs for node %d\n", nid); +} +EXPORT_SYMBOL_GPL(node_update_perf_attrs); + /** * struct node_cache_info - Internal tracking for memory node caches * @dev: Device represeting the cache level diff --git a/include/linux/node.h b/include/linux/node.h index 2c7529335b21..866e3323f1fd 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -85,6 +85,8 @@ struct node_cache_attrs { void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs); void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord, enum access_coordinate_class access); +void node_update_perf_attrs(unsigned int nid, struct access_coordinate *coord, + enum access_coordinate_class access); #else static inline void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs) @@ -96,6 +98,12 @@ static inline void node_set_perf_attrs(unsigned int nid, enum access_coordinate_class access) { } + +static inline void node_update_perf_attrs(unsigned int nid, + struct access_coordinate *coord, + enum access_coordinate_class access) +{ +} #endif struct node { -- cgit v1.2.3 From 2e454fb8056df6da4bba7d89a57bf60e217463c0 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 29 Aug 2025 15:29:06 -0700 Subject: cxl, acpi/hmat: Update CXL access coordinates directly instead of through HMAT The current implementation of CXL memory hotplug notifier gets called before the HMAT memory hotplug notifier. The CXL driver calculates the access coordinates (bandwidth and latency values) for the CXL end to end path (i.e. CPU to endpoint). When the CXL region is onlined, the CXL memory hotplug notifier writes the access coordinates to the HMAT target structs. Then the HMAT memory hotplug notifier is called and it creates the access coordinates for the node sysfs attributes. During testing on an Intel platform, it was found that although the newly calculated coordinates were pushed to sysfs, the sysfs attributes for the access coordinates showed up with the wrong initiator. The system has 4 nodes (0, 1, 2, 3) where node 0 and 1 are CPU nodes and node 2 and 3 are CXL nodes. The expectation is that node 2 would show up as a target to node 0: /sys/devices/system/node/node2/access0/initiators/node0 However it was observed that node 2 showed up as a target under node 1: /sys/devices/system/node/node2/access0/initiators/node1 The original intent of the 'ext_updated' flag in HMAT handling code was to stop HMAT memory hotplug callback from clobbering the access coordinates after CXL has injected its calculated coordinates and replaced the generic target access coordinates provided by the HMAT table in the HMAT target structs. However the flag is hacky at best and blocks the updates from other CXL regions that are onlined in the same node later on. Remove the 'ext_updated' flag usage and just update the access coordinates for the nodes directly without touching HMAT target data. The hotplug memory callback ordering is changed. Instead of changing CXL, move HMAT back so there's room for the levels rather than have CXL share the same level as SLAB_CALLBACK_PRI. The change will resulting in the CXL callback to be executed after the HMAT callback. With the change, the CXL hotplug memory notifier runs after the HMAT callback. The HMAT callback will create the node sysfs attributes for access coordinates. The CXL callback will write the access coordinates to the now created node sysfs attributes directly and will not pollute the HMAT target values. A nodemask is introduced to keep track if a node has been updated and prevents further updates. Fixes: 067353a46d8c ("cxl/region: Add memory hotplug notifier for cxl region") Cc: stable@vger.kernel.org Tested-by: Marc Herbert Reviewed-by: Dan Williams Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20250829222907.1290912-4-dave.jiang@intel.com Signed-off-by: Dave Jiang --- drivers/acpi/numa/hmat.c | 6 ------ drivers/cxl/core/cdat.c | 5 ----- drivers/cxl/core/core.h | 1 - drivers/cxl/core/region.c | 20 ++++++++++++-------- include/linux/memory.h | 2 +- 5 files changed, 13 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c index 4958301f5417..5d32490dc4ab 100644 --- a/drivers/acpi/numa/hmat.c +++ b/drivers/acpi/numa/hmat.c @@ -74,7 +74,6 @@ struct memory_target { struct node_cache_attrs cache_attrs; u8 gen_port_device_handle[ACPI_SRAT_DEVICE_HANDLE_SIZE]; bool registered; - bool ext_updated; /* externally updated */ }; struct memory_initiator { @@ -391,7 +390,6 @@ int hmat_update_target_coordinates(int nid, struct access_coordinate *coord, coord->read_bandwidth, access); hmat_update_target_access(target, ACPI_HMAT_WRITE_BANDWIDTH, coord->write_bandwidth, access); - target->ext_updated = true; return 0; } @@ -773,10 +771,6 @@ static void hmat_update_target_attrs(struct memory_target *target, u32 best = 0; int i; - /* Don't update if an external agent has changed the data. */ - if (target->ext_updated) - return; - /* Don't update for generic port if there's no device handle */ if ((access == NODE_ACCESS_CLASS_GENPORT_SINK_LOCAL || access == NODE_ACCESS_CLASS_GENPORT_SINK_CPU) && diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index c0af645425f4..c891fd618cfd 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -1081,8 +1081,3 @@ int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr, { return hmat_update_target_coordinates(nid, &cxlr->coord[access], access); } - -bool cxl_need_node_perf_attrs_update(int nid) -{ - return !acpi_node_backed_by_real_pxm(nid); -} diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 2669f251d677..a253d308f3c9 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -139,7 +139,6 @@ long cxl_pci_get_latency(struct pci_dev *pdev); int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c); int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr, enum access_coordinate_class access); -bool cxl_need_node_perf_attrs_update(int nid); int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, struct access_coordinate *c); diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 71cc42d05248..0ed95cbc5d5b 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -30,6 +30,12 @@ * 3. Decoder targets */ +/* + * nodemask that sets per node when the access_coordinates for the node has + * been updated by the CXL memory hotplug notifier. + */ +static nodemask_t nodemask_region_seen = NODE_MASK_NONE; + static struct cxl_region *to_cxl_region(struct device *dev); #define __ACCESS_ATTR_RO(_level, _name) { \ @@ -2442,14 +2448,8 @@ static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid) for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) { if (cxlr->coord[i].read_bandwidth) { - rc = 0; - if (cxl_need_node_perf_attrs_update(nid)) - node_set_perf_attrs(nid, &cxlr->coord[i], i); - else - rc = cxl_update_hmat_access_coordinates(nid, cxlr, i); - - if (rc == 0) - cset++; + node_update_perf_attrs(nid, &cxlr->coord[i], i); + cset++; } } @@ -2487,6 +2487,10 @@ static int cxl_region_perf_attrs_callback(struct notifier_block *nb, if (nid != region_nid) return NOTIFY_DONE; + /* No action needed if node bit already set */ + if (node_test_and_set(nid, nodemask_region_seen)) + return NOTIFY_DONE; + if (!cxl_region_update_coordinates(cxlr, nid)) return NOTIFY_DONE; diff --git a/include/linux/memory.h b/include/linux/memory.h index 1305102688d0..0b755d1ef1ec 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -120,8 +120,8 @@ struct mem_section; */ #define DEFAULT_CALLBACK_PRI 0 #define SLAB_CALLBACK_PRI 1 -#define HMAT_CALLBACK_PRI 2 #define CXL_CALLBACK_PRI 5 +#define HMAT_CALLBACK_PRI 6 #define MM_COMPUTE_BATCH_PRI 10 #define CPUSET_CALLBACK_PRI 10 #define MEMTIER_HOTPLUG_PRI 100 -- cgit v1.2.3 From e99ecbc4c89adf551cccbbc00b5cb08c50969af6 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 29 Aug 2025 15:29:07 -0700 Subject: acpi/hmat: Remove now unused hmat_update_target_coordinates() Remove deadcode since CXL no longer calls hmat_update_target_coordinates(). Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20250829222907.1290912-5-dave.jiang@intel.com Signed-off-by: Dave Jiang --- drivers/acpi/numa/hmat.c | 28 ---------------------------- drivers/cxl/core/cdat.c | 6 ------ drivers/cxl/core/core.h | 2 -- include/linux/acpi.h | 12 ------------ 4 files changed, 48 deletions(-) (limited to 'include/linux') diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c index 5d32490dc4ab..5a36d57289b4 100644 --- a/drivers/acpi/numa/hmat.c +++ b/drivers/acpi/numa/hmat.c @@ -367,34 +367,6 @@ static void hmat_update_target_access(struct memory_target *target, } } -int hmat_update_target_coordinates(int nid, struct access_coordinate *coord, - enum access_coordinate_class access) -{ - struct memory_target *target; - int pxm; - - if (nid == NUMA_NO_NODE) - return -EINVAL; - - pxm = node_to_pxm(nid); - guard(mutex)(&target_lock); - target = find_mem_target(pxm); - if (!target) - return -ENODEV; - - hmat_update_target_access(target, ACPI_HMAT_READ_LATENCY, - coord->read_latency, access); - hmat_update_target_access(target, ACPI_HMAT_WRITE_LATENCY, - coord->write_latency, access); - hmat_update_target_access(target, ACPI_HMAT_READ_BANDWIDTH, - coord->read_bandwidth, access); - hmat_update_target_access(target, ACPI_HMAT_WRITE_BANDWIDTH, - coord->write_bandwidth, access); - - return 0; -} -EXPORT_SYMBOL_GPL(hmat_update_target_coordinates); - static __init void hmat_add_locality(struct acpi_hmat_locality *hmat_loc) { struct memory_locality *loc; diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index c891fd618cfd..bca1ec279651 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -1075,9 +1075,3 @@ void cxl_region_perf_data_calculate(struct cxl_region *cxlr, cxlr->coord[i].write_bandwidth += perf->coord[i].write_bandwidth; } } - -int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr, - enum access_coordinate_class access) -{ - return hmat_update_target_coordinates(nid, &cxlr->coord[access], access); -} diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index a253d308f3c9..0476c3b648de 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -137,8 +137,6 @@ enum cxl_poison_trace_type { long cxl_pci_get_latency(struct pci_dev *pdev); int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c); -int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr, - enum access_coordinate_class access); int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, struct access_coordinate *c); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 1c5bb1e887cd..5ff5d99f6ead 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1595,18 +1595,6 @@ static inline void acpi_use_parent_companion(struct device *dev) ACPI_COMPANION_SET(dev, ACPI_COMPANION(dev->parent)); } -#ifdef CONFIG_ACPI_HMAT -int hmat_update_target_coordinates(int nid, struct access_coordinate *coord, - enum access_coordinate_class access); -#else -static inline int hmat_update_target_coordinates(int nid, - struct access_coordinate *coord, - enum access_coordinate_class access) -{ - return -EOPNOTSUPP; -} -#endif - #ifdef CONFIG_ACPI_NUMA bool acpi_node_backed_by_real_pxm(int nid); #else -- cgit v1.2.3 From 54dbd2a8e974b900b18639e75f62702a4334ddc0 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 3 Sep 2025 14:52:56 +0300 Subject: PCI/P2PDMA: Reduce scope of pci_has_p2pmem() pci_has_p2pmem() is not used outside of p2pdma.c, and there is no need to export it for use by modules. Signed-off-by: Leon Romanovsky Signed-off-by: Bjorn Helgaas Reviewed-by: Logan Gunthorpe Link: https://patch.msgid.link/d40f3f1decf54c9236bc38b48a6aae612a5c182f.1756900291.git.leon@kernel.org --- drivers/pci/p2pdma.c | 3 +-- include/linux/pci-p2pdma.h | 5 ----- 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index 1cb5e423eed4..78e108e47254 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -738,7 +738,7 @@ EXPORT_SYMBOL_GPL(pci_p2pdma_distance_many); * pci_has_p2pmem - check if a given PCI device has published any p2pmem * @pdev: PCI device to check */ -bool pci_has_p2pmem(struct pci_dev *pdev) +static bool pci_has_p2pmem(struct pci_dev *pdev) { struct pci_p2pdma *p2pdma; bool res; @@ -750,7 +750,6 @@ bool pci_has_p2pmem(struct pci_dev *pdev) return res; } -EXPORT_SYMBOL_GPL(pci_has_p2pmem); /** * pci_p2pmem_find_many - find a peer-to-peer DMA memory device compatible with diff --git a/include/linux/pci-p2pdma.h b/include/linux/pci-p2pdma.h index 075c20b161d9..951f81a38f3a 100644 --- a/include/linux/pci-p2pdma.h +++ b/include/linux/pci-p2pdma.h @@ -21,7 +21,6 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, u64 offset); int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients, int num_clients, bool verbose); -bool pci_has_p2pmem(struct pci_dev *pdev); struct pci_dev *pci_p2pmem_find_many(struct device **clients, int num_clients); void *pci_alloc_p2pmem(struct pci_dev *pdev, size_t size); void pci_free_p2pmem(struct pci_dev *pdev, void *addr, size_t size); @@ -45,10 +44,6 @@ static inline int pci_p2pdma_distance_many(struct pci_dev *provider, { return -1; } -static inline bool pci_has_p2pmem(struct pci_dev *pdev) -{ - return false; -} static inline struct pci_dev *pci_p2pmem_find_many(struct device **clients, int num_clients) { -- cgit v1.2.3 From 038c7dc66e2744e5df57163b8f957745ae10d23e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 3 Sep 2025 20:46:40 -0700 Subject: compiler_types.h: Move __nocfi out of compiler-specific header Prepare for GCC KCFI support and move the __nocfi attribute from compiler-clang.h to compiler_types.h. This was already gated by CONFIG_CFI_CLANG, so this remains safe for non-KCFI GCC builds. Signed-off-by: Kees Cook Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Nathan Chancellor Link: https://lore.kernel.org/r/20250904034656.3670313-1-kees@kernel.org --- include/linux/compiler-clang.h | 5 ----- include/linux/compiler_types.h | 4 +++- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index fa4ffe037bc7..7a4568e421dc 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -96,11 +96,6 @@ # define __noscs __attribute__((__no_sanitize__("shadow-call-stack"))) #endif -#if __has_feature(kcfi) -/* Disable CFI checking inside a function. */ -#define __nocfi __attribute__((__no_sanitize__("kcfi"))) -#endif - /* * Turn individual warnings and errors on and off locally, depending * on version. diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 16755431fc11..a910f9fa5341 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -432,7 +432,9 @@ struct ftrace_likely_data { # define __noscs #endif -#ifndef __nocfi +#if defined(CONFIG_CFI_CLANG) +# define __nocfi __attribute__((__no_sanitize__("kcfi"))) +#else # define __nocfi #endif -- cgit v1.2.3 From 0b815825b1b0bd6762ca028e9b6631b002efb7ca Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 3 Sep 2025 20:46:45 -0700 Subject: x86/cfi: Remove __noinitretpoline and __noretpoline Commit 66f793099a63 ("x86/retpoline: Avoid retpolines for built-in __init functions") disabled retpolines in __init sections (__noinitretpoline) as a precaution against potential issues with retpolines in early boot, but it has not been a problem in practice (i.e. see Clang below). Commit 87358710c1fb ("x86/retpoline: Support retpoline builds with Clang") narrowed this to only GCC, as Clang doesn't have per-function control over retpoline emission. As such, Clang has been booting with retpolines in __init since retpoline support was introduced. Clang KCFI has been instrumenting __init since CFI was introduced. With the introduction of KCFI for GCC, KCFI instrumentation with retpolines disabled means that objtool does not construct .retpoline_sites section entries for the non-retpoline KCFI calls. At boot, the KCFI rehashing code, via __apply_fineibt(), misses all __init KCFI calls (since they are not retpolines), resulting in immediate hash mismatches: all preambles are rehashed (via .cfi_sites) and none of the __init call sites are rehashed. Remove __noinitretpoline since it provides no meaningful utility and creates problems with CFI. Additionally remove __noretpoline since it is now unused. Alternatively, cfi_rand_callers() could walk the .kcfi_traps section which is exactly the list of KCFI instrumentation sites. But it seems better to have as few differences in common instruction sequences between compilers as possible, so better to remove the special handling of retpolines in __init for GCC. Signed-off-by: Kees Cook Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250904034656.3670313-6-kees@kernel.org --- include/linux/compiler-gcc.h | 4 ---- include/linux/init.h | 8 -------- 2 files changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 5d07c469b571..5de824a0b3d7 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -35,10 +35,6 @@ (typeof(ptr)) (__ptr + (off)); \ }) -#ifdef CONFIG_MITIGATION_RETPOLINE -#define __noretpoline __attribute__((__indirect_branch__("keep"))) -#endif - #if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__) #define __latent_entropy __attribute__((latent_entropy)) #endif diff --git a/include/linux/init.h b/include/linux/init.h index a60d32d227ee..17c1bc712e23 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -7,13 +7,6 @@ #include #include -/* Built-in __init functions needn't be compiled with retpoline */ -#if defined(__noretpoline) && !defined(MODULE) -#define __noinitretpoline __noretpoline -#else -#define __noinitretpoline -#endif - /* These macros are used to mark some functions or * initialized data (doesn't apply to uninitialized data) * as `initialization' functions. The kernel can take this @@ -50,7 +43,6 @@ /* These are for everybody (although not all archs will actually discard it in modules) */ #define __init __section(".init.text") __cold __latent_entropy \ - __noinitretpoline \ __no_kstack_erase #define __initdata __section(".init.data") #define __initconst __section(".init.rodata") -- cgit v1.2.3 From 1544344563376b2a2ae2af5af1db00d6410c18e0 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Fri, 29 Aug 2025 15:28:44 +0800 Subject: rhashtable: Use __always_inline instead of inline Sometimes, the compiler is not clever enough to inline the rhashtable_lookup() for us, even if the "obj_cmpfn" and "key_len" in params is const. This can introduce more overhead. Therefore, use __always_inline for the rhashtable. Signed-off-by: Menglong Dong Signed-off-by: Herbert Xu --- include/linux/rhashtable.h | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 6c85b28ea30b..e740157f3cd7 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -122,7 +122,7 @@ static inline unsigned int rht_bucket_index(const struct bucket_table *tbl, return hash & (tbl->size - 1); } -static inline unsigned int rht_key_get_hash(struct rhashtable *ht, +static __always_inline unsigned int rht_key_get_hash(struct rhashtable *ht, const void *key, const struct rhashtable_params params, unsigned int hash_rnd) { @@ -152,7 +152,7 @@ static inline unsigned int rht_key_get_hash(struct rhashtable *ht, return hash; } -static inline unsigned int rht_key_hashfn( +static __always_inline unsigned int rht_key_hashfn( struct rhashtable *ht, const struct bucket_table *tbl, const void *key, const struct rhashtable_params params) { @@ -161,7 +161,7 @@ static inline unsigned int rht_key_hashfn( return rht_bucket_index(tbl, hash); } -static inline unsigned int rht_head_hashfn( +static __always_inline unsigned int rht_head_hashfn( struct rhashtable *ht, const struct bucket_table *tbl, const struct rhash_head *he, const struct rhashtable_params params) { @@ -586,7 +586,7 @@ static inline int rhashtable_compare(struct rhashtable_compare_arg *arg, } /* Internal function, do not use. */ -static inline struct rhash_head *__rhashtable_lookup( +static __always_inline struct rhash_head *__rhashtable_lookup( struct rhashtable *ht, const void *key, const struct rhashtable_params params) { @@ -639,7 +639,7 @@ restart: * * Returns the first entry on which the compare function returned true. */ -static inline void *rhashtable_lookup( +static __always_inline void *rhashtable_lookup( struct rhashtable *ht, const void *key, const struct rhashtable_params params) { @@ -662,7 +662,7 @@ static inline void *rhashtable_lookup( * * Returns the first entry on which the compare function returned true. */ -static inline void *rhashtable_lookup_fast( +static __always_inline void *rhashtable_lookup_fast( struct rhashtable *ht, const void *key, const struct rhashtable_params params) { @@ -689,7 +689,7 @@ static inline void *rhashtable_lookup_fast( * * Returns the list of entries that match the given key. */ -static inline struct rhlist_head *rhltable_lookup( +static __always_inline struct rhlist_head *rhltable_lookup( struct rhltable *hlt, const void *key, const struct rhashtable_params params) { @@ -702,7 +702,7 @@ static inline struct rhlist_head *rhltable_lookup( * function returns the existing element already in hashes if there is a clash, * otherwise it returns an error via ERR_PTR(). */ -static inline void *__rhashtable_insert_fast( +static __always_inline void *__rhashtable_insert_fast( struct rhashtable *ht, const void *key, struct rhash_head *obj, const struct rhashtable_params params, bool rhlist) { @@ -825,7 +825,7 @@ out_unlock: * Will trigger an automatic deferred table resizing if residency in the * table grows beyond 70%. */ -static inline int rhashtable_insert_fast( +static __always_inline int rhashtable_insert_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params) { @@ -854,7 +854,7 @@ static inline int rhashtable_insert_fast( * Will trigger an automatic deferred table resizing if residency in the * table grows beyond 70%. */ -static inline int rhltable_insert_key( +static __always_inline int rhltable_insert_key( struct rhltable *hlt, const void *key, struct rhlist_head *list, const struct rhashtable_params params) { @@ -877,7 +877,7 @@ static inline int rhltable_insert_key( * Will trigger an automatic deferred table resizing if residency in the * table grows beyond 70%. */ -static inline int rhltable_insert( +static __always_inline int rhltable_insert( struct rhltable *hlt, struct rhlist_head *list, const struct rhashtable_params params) { @@ -902,7 +902,7 @@ static inline int rhltable_insert( * Will trigger an automatic deferred table resizing if residency in the * table grows beyond 70%. */ -static inline int rhashtable_lookup_insert_fast( +static __always_inline int rhashtable_lookup_insert_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params) { @@ -929,7 +929,7 @@ static inline int rhashtable_lookup_insert_fast( * object if it exists, NULL if it did not and the insertion was successful, * and an ERR_PTR otherwise. */ -static inline void *rhashtable_lookup_get_insert_fast( +static __always_inline void *rhashtable_lookup_get_insert_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params) { @@ -956,7 +956,7 @@ static inline void *rhashtable_lookup_get_insert_fast( * * Returns zero on success. */ -static inline int rhashtable_lookup_insert_key( +static __always_inline int rhashtable_lookup_insert_key( struct rhashtable *ht, const void *key, struct rhash_head *obj, const struct rhashtable_params params) { @@ -982,7 +982,7 @@ static inline int rhashtable_lookup_insert_key( * object if it exists, NULL if it does not and the insertion was successful, * and an ERR_PTR otherwise. */ -static inline void *rhashtable_lookup_get_insert_key( +static __always_inline void *rhashtable_lookup_get_insert_key( struct rhashtable *ht, const void *key, struct rhash_head *obj, const struct rhashtable_params params) { @@ -992,7 +992,7 @@ static inline void *rhashtable_lookup_get_insert_key( } /* Internal function, please use rhashtable_remove_fast() instead */ -static inline int __rhashtable_remove_fast_one( +static __always_inline int __rhashtable_remove_fast_one( struct rhashtable *ht, struct bucket_table *tbl, struct rhash_head *obj, const struct rhashtable_params params, bool rhlist) @@ -1074,7 +1074,7 @@ unlocked: } /* Internal function, please use rhashtable_remove_fast() instead */ -static inline int __rhashtable_remove_fast( +static __always_inline int __rhashtable_remove_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params, bool rhlist) { @@ -1115,7 +1115,7 @@ static inline int __rhashtable_remove_fast( * * Returns zero on success, -ENOENT if the entry could not be found. */ -static inline int rhashtable_remove_fast( +static __always_inline int rhashtable_remove_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params) { @@ -1137,7 +1137,7 @@ static inline int rhashtable_remove_fast( * * Returns zero on success, -ENOENT if the entry could not be found. */ -static inline int rhltable_remove( +static __always_inline int rhltable_remove( struct rhltable *hlt, struct rhlist_head *list, const struct rhashtable_params params) { @@ -1145,7 +1145,7 @@ static inline int rhltable_remove( } /* Internal function, please use rhashtable_replace_fast() instead */ -static inline int __rhashtable_replace_fast( +static __always_inline int __rhashtable_replace_fast( struct rhashtable *ht, struct bucket_table *tbl, struct rhash_head *obj_old, struct rhash_head *obj_new, const struct rhashtable_params params) @@ -1208,7 +1208,7 @@ unlocked: * Returns zero on success, -ENOENT if the entry could not be found, * -EINVAL if hash is not the same for the old and new objects. */ -static inline int rhashtable_replace_fast( +static __always_inline int rhashtable_replace_fast( struct rhashtable *ht, struct rhash_head *obj_old, struct rhash_head *obj_new, const struct rhashtable_params params) -- cgit v1.2.3 From 886d6981208263b55a1eb8b39c5d00db1544b9bb Mon Sep 17 00:00:00 2001 From: Zhushuai Yin Date: Sat, 30 Aug 2025 18:27:57 +0800 Subject: crypto: hisilicon/zip - add hashjoin, gather, and UDMA data move features The new version of the hisilicon zip driver supports the hash join and gather features, as well as the data move feature (UDMA), including data copying and memory initialization functions.These features are registered to the uacce subsystem. Signed-off-by: Zhushuai Yin Signed-off-by: Chenghai Huang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 29 ++++++++++++++++++++++------- drivers/crypto/hisilicon/zip/dae_main.c | 11 +++++++++-- include/linux/hisi_acc_qm.h | 1 + include/uapi/misc/uacce/hisi_qm.h | 1 + 4 files changed, 33 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 102aff9ea19a..98099117bb38 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -2742,6 +2742,27 @@ static void qm_remove_uacce(struct hisi_qm *qm) } } +static void qm_uacce_api_ver_init(struct hisi_qm *qm) +{ + struct uacce_device *uacce = qm->uacce; + + switch (qm->ver) { + case QM_HW_V1: + uacce->api_ver = HISI_QM_API_VER_BASE; + break; + case QM_HW_V2: + uacce->api_ver = HISI_QM_API_VER2_BASE; + break; + case QM_HW_V3: + case QM_HW_V4: + uacce->api_ver = HISI_QM_API_VER3_BASE; + break; + default: + uacce->api_ver = HISI_QM_API_VER5_BASE; + break; + } +} + static int qm_alloc_uacce(struct hisi_qm *qm) { struct pci_dev *pdev = qm->pdev; @@ -2775,13 +2796,6 @@ static int qm_alloc_uacce(struct hisi_qm *qm) uacce->is_vf = pdev->is_virtfn; uacce->priv = qm; - if (qm->ver == QM_HW_V1) - uacce->api_ver = HISI_QM_API_VER_BASE; - else if (qm->ver == QM_HW_V2) - uacce->api_ver = HISI_QM_API_VER2_BASE; - else - uacce->api_ver = HISI_QM_API_VER3_BASE; - if (qm->ver == QM_HW_V1) mmio_page_nr = QM_DOORBELL_PAGE_NR; else if (!test_bit(QM_SUPPORT_DB_ISOLATION, &qm->caps)) @@ -2801,6 +2815,7 @@ static int qm_alloc_uacce(struct hisi_qm *qm) uacce->qf_pg_num[UACCE_QFRT_DUS] = dus_page_nr; qm->uacce = uacce; + qm_uacce_api_ver_init(qm); INIT_LIST_HEAD(&qm->isolate_data.qm_hw_errs); mutex_init(&qm->isolate_data.isolate_lock); diff --git a/drivers/crypto/hisilicon/zip/dae_main.c b/drivers/crypto/hisilicon/zip/dae_main.c index 6f22e4c36e49..4c5481c77436 100644 --- a/drivers/crypto/hisilicon/zip/dae_main.c +++ b/drivers/crypto/hisilicon/zip/dae_main.c @@ -15,6 +15,7 @@ #define DAE_REG_RD_TMOUT_US USEC_PER_SEC #define DAE_ALG_NAME "hashagg" +#define DAE_V5_ALG_NAME "hashagg\nudma\nhashjoin\ngather" /* error */ #define DAE_AXI_CFG_OFFSET 0x331000 @@ -82,6 +83,7 @@ int hisi_dae_set_user_domain(struct hisi_qm *qm) int hisi_dae_set_alg(struct hisi_qm *qm) { + const char *alg_name; size_t len; if (!dae_is_support(qm)) @@ -90,9 +92,14 @@ int hisi_dae_set_alg(struct hisi_qm *qm) if (!qm->uacce) return 0; + if (qm->ver >= QM_HW_V5) + alg_name = DAE_V5_ALG_NAME; + else + alg_name = DAE_ALG_NAME; + len = strlen(qm->uacce->algs); /* A line break may be required */ - if (len + strlen(DAE_ALG_NAME) + 1 >= QM_DEV_ALG_MAX_LEN) { + if (len + strlen(alg_name) + 1 >= QM_DEV_ALG_MAX_LEN) { pci_err(qm->pdev, "algorithm name is too long!\n"); return -EINVAL; } @@ -100,7 +107,7 @@ int hisi_dae_set_alg(struct hisi_qm *qm) if (len) strcat((char *)qm->uacce->algs, "\n"); - strcat((char *)qm->uacce->algs, DAE_ALG_NAME); + strcat((char *)qm->uacce->algs, alg_name); return 0; } diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index 0c4c84b8c3be..f2254ddc327c 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -125,6 +125,7 @@ enum qm_hw_ver { QM_HW_V2 = 0x21, QM_HW_V3 = 0x30, QM_HW_V4 = 0x50, + QM_HW_V5 = 0x51, }; enum qm_fun_type { diff --git a/include/uapi/misc/uacce/hisi_qm.h b/include/uapi/misc/uacce/hisi_qm.h index 3e66dbc2f323..10504b48eabf 100644 --- a/include/uapi/misc/uacce/hisi_qm.h +++ b/include/uapi/misc/uacce/hisi_qm.h @@ -31,6 +31,7 @@ struct hisi_qp_info { #define HISI_QM_API_VER_BASE "hisi_qm_v1" #define HISI_QM_API_VER2_BASE "hisi_qm_v2" #define HISI_QM_API_VER3_BASE "hisi_qm_v3" +#define HISI_QM_API_VER5_BASE "hisi_qm_v5" /* UACCE_CMD_QM_SET_QP_CTX: Set qp algorithm type */ #define UACCE_CMD_QM_SET_QP_CTX _IOWR('H', 10, struct hisi_qp_ctx) -- cgit v1.2.3 From 20f988320d2718ef28b1f0635acc88c12a216d29 Mon Sep 17 00:00:00 2001 From: "Rai, Amardeep" Date: Wed, 20 Aug 2025 17:38:19 +0300 Subject: usb: core: Add a function to get USB version independent periodic payload Add usb_endpoint_max_periodic_payload() to obtain maximum payload bytes in a service interval for isochronous and interrupt endpoints in a USB version independent way. Signed-off-by: Rai, Amardeep Signed-off-by: Mathias Nyman Co-developed-by: Sakari Ailus Signed-off-by: Sakari Ailus Reviewed-by: Hans de Goede Acked-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20250820143824.551777-5-sakari.ailus@linux.intel.com --- drivers/usb/core/usb.c | 29 +++++++++++++++++++++++++++++ include/linux/usb.h | 3 +++ 2 files changed, 32 insertions(+) (limited to 'include/linux') diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index fca7735fc660..ca9ff6ad8e73 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -1110,6 +1110,35 @@ void usb_free_noncoherent(struct usb_device *dev, size_t size, } EXPORT_SYMBOL_GPL(usb_free_noncoherent); +/** + * usb_endpoint_max_periodic_payload - Get maximum payload bytes per service + * interval + * @udev: The USB device + * @ep: The endpoint + * + * Returns: the maximum number of bytes isochronous or interrupt endpoint @ep + * can transfer during a service interval, or 0 for other endpoints. + */ +u32 usb_endpoint_max_periodic_payload(struct usb_device *udev, + const struct usb_host_endpoint *ep) +{ + if (!usb_endpoint_xfer_isoc(&ep->desc) && + !usb_endpoint_xfer_int(&ep->desc)) + return 0; + + switch (udev->speed) { + case USB_SPEED_SUPER_PLUS: + if (USB_SS_SSP_ISOC_COMP(ep->ss_ep_comp.bmAttributes)) + return le32_to_cpu(ep->ssp_isoc_ep_comp.dwBytesPerInterval); + fallthrough; + case USB_SPEED_SUPER: + return le16_to_cpu(ep->ss_ep_comp.wBytesPerInterval); + default: + return usb_endpoint_maxp(&ep->desc) * usb_endpoint_maxp_mult(&ep->desc); + } +} +EXPORT_SYMBOL_GPL(usb_endpoint_max_periodic_payload); + /* * Notifications of device and interface registration */ diff --git a/include/linux/usb.h b/include/linux/usb.h index 9d662c6abb4d..e9cf2786d8bd 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -2039,6 +2039,9 @@ static inline u16 usb_maxpacket(struct usb_device *udev, int pipe) return usb_endpoint_maxp(&ep->desc); } +u32 usb_endpoint_max_periodic_payload(struct usb_device *udev, + const struct usb_host_endpoint *ep); + /* translate USB error codes to codes user space understands */ static inline int usb_translate_errors(int error_code) { -- cgit v1.2.3 From d6725169a9bbcb5bd1dd14b2891b874614c59f52 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Wed, 20 Aug 2025 17:38:21 +0300 Subject: usb: core: Introduce usb_endpoint_is_hs_isoc_double() Introduce usb_endpoint_is_hs_isoc_double() tell whether an endpoint conforms to USB 2.0 Isochronous Double IN Bandwidth ECN. Signed-off-by: Sakari Ailus Acked-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20250820143824.551777-7-sakari.ailus@linux.intel.com --- drivers/usb/core/usb.c | 19 +++++++++++++++++++ include/linux/usb.h | 3 +++ 2 files changed, 22 insertions(+) (limited to 'include/linux') diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index ca9ff6ad8e73..939dc4aafb89 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -1139,6 +1139,25 @@ u32 usb_endpoint_max_periodic_payload(struct usb_device *udev, } EXPORT_SYMBOL_GPL(usb_endpoint_max_periodic_payload); +/** + * usb_endpoint_is_hs_isoc_double - Tell whether an endpoint uses USB 2 + * Isochronous Double IN Bandwidth + * @udev: The USB device + * @ep: The endpoint + * + * Returns: true if an endpoint @ep conforms to USB 2 Isochronous Double IN + * Bandwidth ECN, false otherwise. + */ +bool usb_endpoint_is_hs_isoc_double(struct usb_device *udev, + const struct usb_host_endpoint *ep) +{ + return ep->eusb2_isoc_ep_comp.bDescriptorType && + le16_to_cpu(udev->descriptor.bcdUSB) == 0x220 && + usb_endpoint_is_isoc_in(&ep->desc) && + !le16_to_cpu(ep->desc.wMaxPacketSize); +} +EXPORT_SYMBOL_GPL(usb_endpoint_is_hs_isoc_double); + /* * Notifications of device and interface registration */ diff --git a/include/linux/usb.h b/include/linux/usb.h index e9cf2786d8bd..70ef00c42d22 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -2042,6 +2042,9 @@ static inline u16 usb_maxpacket(struct usb_device *udev, int pipe) u32 usb_endpoint_max_periodic_payload(struct usb_device *udev, const struct usb_host_endpoint *ep); +bool usb_endpoint_is_hs_isoc_double(struct usb_device *udev, + const struct usb_host_endpoint *ep); + /* translate USB error codes to codes user space understands */ static inline int usb_translate_errors(int error_code) { -- cgit v1.2.3 From 23743ba64709a9c137c1b928f8b8e00d846af9cc Mon Sep 17 00:00:00 2001 From: Calixte Pernot Date: Mon, 25 Aug 2025 14:56:09 +0200 Subject: vt: add support for smput/rmput escape codes Support "\e[?1049h" and "\e[?1049l" escape codes. This patch allows programs to enter and leave alternate screens. This feature is widely available in graphical terminal emulators and mostly used by fullscreen terminal-based user interfaces such as text editors. Most editors such as vim and nano assume this escape code in not supported and will not try to print the escape sequence if TERM=linux. To try out this patch, run `TERM=xterm-256color vim` inside a VT. Signed-off-by: Calixte Pernot Link: https://lore.kernel.org/r/20250825125607.2478-3-calixte.pernot@grenoble-inp.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 58 ++++++++++++++++++++++++++++++++++++++++++ include/linux/console_struct.h | 3 +++ 2 files changed, 61 insertions(+) (limited to 'include/linux') diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 869261141535..3f80e98e5c51 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -141,6 +141,7 @@ static const struct consw *con_driver_map[MAX_NR_CONSOLES]; static int con_open(struct tty_struct *, struct file *); static void vc_init(struct vc_data *vc, int do_clear); static void gotoxy(struct vc_data *vc, int new_x, int new_y); +static void restore_cur(struct vc_data *vc); static void save_cur(struct vc_data *vc); static void reset_terminal(struct vc_data *vc, int do_clear); static void con_flush_chars(struct tty_struct *tty); @@ -1341,6 +1342,10 @@ struct vc_data *vc_deallocate(unsigned int currcons) kfree(vc->vc_screenbuf); vc_cons[currcons].d = NULL; } + if (vc->vc_saved_screen != NULL) { + kfree(vc->vc_saved_screen); + vc->vc_saved_screen = NULL; + } return vc; } @@ -1875,6 +1880,45 @@ static int get_bracketed_paste(struct tty_struct *tty) return vc->vc_bracketed_paste; } +/* console_lock is held */ +static void enter_alt_screen(struct vc_data *vc) +{ + unsigned int size = vc->vc_rows * vc->vc_cols * 2; + + if (vc->vc_saved_screen != NULL) + return; /* Already inside an alt-screen */ + vc->vc_saved_screen = kmemdup((u16 *)vc->vc_origin, size, GFP_KERNEL); + if (vc->vc_saved_screen == NULL) + return; + vc->vc_saved_rows = vc->vc_rows; + vc->vc_saved_cols = vc->vc_cols; + save_cur(vc); + /* clear entire screen */ + csi_J(vc, CSI_J_FULL); +} + +/* console_lock is held */ +static void leave_alt_screen(struct vc_data *vc) +{ + unsigned int rows = min(vc->vc_saved_rows, vc->vc_rows); + unsigned int cols = min(vc->vc_saved_cols, vc->vc_cols); + u16 *src, *dest; + + if (vc->vc_saved_screen == NULL) + return; /* Not inside an alt-screen */ + for (unsigned int r = 0; r < rows; r++) { + src = vc->vc_saved_screen + r * vc->vc_saved_cols; + dest = ((u16 *)vc->vc_origin) + r * vc->vc_cols; + memcpy(dest, src, 2 * cols); + } + restore_cur(vc); + /* Update the entire screen */ + if (con_should_update(vc)) + do_update_region(vc, vc->vc_origin, vc->vc_screenbuf_size / 2); + kfree(vc->vc_saved_screen); + vc->vc_saved_screen = NULL; +} + enum { CSI_DEC_hl_CURSOR_KEYS = 1, /* CKM: cursor keys send ^[Ox/^[[x */ CSI_DEC_hl_132_COLUMNS = 3, /* COLM: 80/132 mode switch */ @@ -1885,6 +1929,7 @@ enum { CSI_DEC_hl_MOUSE_X10 = 9, CSI_DEC_hl_SHOW_CURSOR = 25, /* TCEM */ CSI_DEC_hl_MOUSE_VT200 = 1000, + CSI_DEC_hl_ALT_SCREEN = 1049, CSI_DEC_hl_BRACKETED_PASTE = 2004, }; @@ -1941,6 +1986,12 @@ static void csi_DEC_hl(struct vc_data *vc, bool on_off) case CSI_DEC_hl_BRACKETED_PASTE: vc->vc_bracketed_paste = on_off; break; + case CSI_DEC_hl_ALT_SCREEN: + if (on_off) + enter_alt_screen(vc); + else + leave_alt_screen(vc); + break; } } @@ -2179,6 +2230,13 @@ static void reset_terminal(struct vc_data *vc, int do_clear) vc->vc_deccm = global_cursor_default; vc->vc_decim = 0; + if (vc->vc_saved_screen != NULL) { + kfree(vc->vc_saved_screen); + vc->vc_saved_screen = NULL; + vc->vc_saved_rows = 0; + vc->vc_saved_cols = 0; + } + vt_reset_keyboard(vc->vc_num); vc->vc_cursor_type = cur_default; diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h index 59b4fec5f254..13b35637bd5a 100644 --- a/include/linux/console_struct.h +++ b/include/linux/console_struct.h @@ -159,6 +159,9 @@ struct vc_data { struct uni_pagedict *uni_pagedict; struct uni_pagedict **uni_pagedict_loc; /* [!] Location of uni_pagedict variable for this console */ u32 **vc_uni_lines; /* unicode screen content */ + u16 *vc_saved_screen; + unsigned int vc_saved_cols; + unsigned int vc_saved_rows; /* additional information is in vt_kern.h */ }; -- cgit v1.2.3 From ca734f54b346db170be20ff3382bc3da8eb65904 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sun, 17 Aug 2025 14:53:15 -0700 Subject: Input: pxa27x-keypad - drop support for platform data There are no in-kernel users of pxa27x_keypad_platform_data in the kernel, and the driver supports configuration via device tree, so drop support of static platform data and move properties parsing from OF-specific methods to generic ones. Link: https://lore.kernel.org/r/20250817215316.1872689-3-dmitry.torokhov@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/pxa27x_keypad.c | 378 +++++++++------------------- include/linux/platform_data/keypad-pxa27x.h | 73 ------ 2 files changed, 121 insertions(+), 330 deletions(-) delete mode 100644 include/linux/platform_data/keypad-pxa27x.h (limited to 'include/linux') diff --git a/drivers/input/keyboard/pxa27x_keypad.c b/drivers/input/keyboard/pxa27x_keypad.c index e6f6adb56de4..4519eecb317b 100644 --- a/drivers/input/keyboard/pxa27x_keypad.c +++ b/drivers/input/keyboard/pxa27x_keypad.c @@ -21,13 +21,13 @@ #include #include #include +#include #include #include #include #include #include -#include /* * Keypad Controller registers */ @@ -100,54 +100,69 @@ #define keypad_readl(off) __raw_readl(keypad->mmio_base + (off)) #define keypad_writel(off, v) __raw_writel((v), keypad->mmio_base + (off)) +#define MAX_MATRIX_KEY_ROWS 8 +#define MAX_MATRIX_KEY_COLS 8 +#define MAX_DIRECT_KEY_NUM 8 +#define MAX_ROTARY_ENCODERS 2 + #define MAX_MATRIX_KEY_NUM (MAX_MATRIX_KEY_ROWS * MAX_MATRIX_KEY_COLS) #define MAX_KEYPAD_KEYS (MAX_MATRIX_KEY_NUM + MAX_DIRECT_KEY_NUM) -struct pxa27x_keypad { - const struct pxa27x_keypad_platform_data *pdata; +struct pxa27x_keypad_rotary { + unsigned short *key_codes; + int rel_code; + bool enabled; +}; +struct pxa27x_keypad { struct clk *clk; struct input_dev *input_dev; void __iomem *mmio_base; int irq; - unsigned short keycodes[MAX_KEYPAD_KEYS]; - int rotary_rel_code[2]; - + unsigned int matrix_key_rows; + unsigned int matrix_key_cols; unsigned int row_shift; + unsigned int direct_key_num; + unsigned int direct_key_mask; + bool direct_key_low_active; + + /* key debounce interval */ + unsigned int debounce_interval; + + unsigned short keycodes[MAX_KEYPAD_KEYS]; + /* state row bits of each column scan */ u32 matrix_key_state[MAX_MATRIX_KEY_COLS]; u32 direct_key_state; - unsigned int direct_key_mask; + struct pxa27x_keypad_rotary rotary[MAX_ROTARY_ENCODERS]; }; -#ifdef CONFIG_OF -static int pxa27x_keypad_matrix_key_parse_dt(struct pxa27x_keypad *keypad, - struct pxa27x_keypad_platform_data *pdata) +static int pxa27x_keypad_matrix_key_parse(struct pxa27x_keypad *keypad) { struct input_dev *input_dev = keypad->input_dev; struct device *dev = input_dev->dev.parent; - u32 rows, cols; int error; - error = matrix_keypad_parse_properties(dev, &rows, &cols); + error = matrix_keypad_parse_properties(dev, &keypad->matrix_key_rows, + &keypad->matrix_key_cols); if (error) return error; - if (rows > MAX_MATRIX_KEY_ROWS || cols > MAX_MATRIX_KEY_COLS) { + if (keypad->matrix_key_rows > MAX_MATRIX_KEY_ROWS || + keypad->matrix_key_cols > MAX_MATRIX_KEY_COLS) { dev_err(dev, "rows or cols exceeds maximum value\n"); return -EINVAL; } - pdata->matrix_key_rows = rows; - pdata->matrix_key_cols = cols; + keypad->row_shift = get_count_order(keypad->matrix_key_cols); error = matrix_keypad_build_keymap(NULL, NULL, - pdata->matrix_key_rows, - pdata->matrix_key_cols, + keypad->matrix_key_rows, + keypad->matrix_key_cols, keypad->keycodes, input_dev); if (error) return error; @@ -155,20 +170,17 @@ static int pxa27x_keypad_matrix_key_parse_dt(struct pxa27x_keypad *keypad, return 0; } -static int pxa27x_keypad_direct_key_parse_dt(struct pxa27x_keypad *keypad, - struct pxa27x_keypad_platform_data *pdata) +static int pxa27x_keypad_direct_key_parse(struct pxa27x_keypad *keypad) { struct input_dev *input_dev = keypad->input_dev; struct device *dev = input_dev->dev.parent; - struct device_node *np = dev->of_node; - const __be16 *prop; unsigned short code; - unsigned int proplen, size; + int count; int i; int error; - error = of_property_read_u32(np, "marvell,direct-key-count", - &pdata->direct_key_num); + error = device_property_read_u32(dev, "marvell,direct-key-count", + &keypad->direct_key_num); if (error) { /* * If do not have marvel,direct-key-count defined, @@ -177,151 +189,121 @@ static int pxa27x_keypad_direct_key_parse_dt(struct pxa27x_keypad *keypad, return error == -EINVAL ? 0 : error; } - error = of_property_read_u32(np, "marvell,direct-key-mask", - &pdata->direct_key_mask); + error = device_property_read_u32(dev, "marvell,direct-key-mask", + &keypad->direct_key_mask); if (error) { if (error != -EINVAL) return error; /* * If marvell,direct-key-mask is not defined, driver will use - * default value. Default value is set when configure the keypad. + * a default value based on number of direct keys set up. + * The default value is calculated in pxa27x_keypad_config(). */ - pdata->direct_key_mask = 0; + keypad->direct_key_mask = 0; } - pdata->direct_key_low_active = of_property_read_bool(np, - "marvell,direct-key-low-active"); - - prop = of_get_property(np, "marvell,direct-key-map", &proplen); - if (!prop) - return -EINVAL; + keypad->direct_key_low_active = + device_property_read_bool(dev, "marvell,direct-key-low-active"); - if (proplen % sizeof(u16)) + count = device_property_count_u16(dev, "marvell,direct-key-map"); + if (count <= 0 || count > MAX_DIRECT_KEY_NUM) return -EINVAL; - size = proplen / sizeof(u16); - - /* Only MAX_DIRECT_KEY_NUM is accepted.*/ - if (size > MAX_DIRECT_KEY_NUM) - return -EINVAL; + error = device_property_read_u16_array(dev, "marvell,direct-key-map", + &keypad->keycodes[MAX_MATRIX_KEY_NUM], + count); - for (i = 0; i < size; i++) { - code = be16_to_cpup(prop + i); - keypad->keycodes[MAX_MATRIX_KEY_NUM + i] = code; + for (i = 0; i < count; i++) { + code = keypad->keycodes[MAX_MATRIX_KEY_NUM + i]; __set_bit(code, input_dev->keybit); } return 0; } -static int pxa27x_keypad_rotary_parse_dt(struct pxa27x_keypad *keypad, - struct pxa27x_keypad_platform_data *pdata) +static int pxa27x_keypad_rotary_parse(struct pxa27x_keypad *keypad) { - const __be32 *prop; - int i, relkey_ret; - unsigned int code, proplen; - const char *rotaryname[2] = { - "marvell,rotary0", "marvell,rotary1"}; - const char relkeyname[] = {"marvell,rotary-rel-key"}; + static const char * const rotaryname[] = { "marvell,rotary0", "marvell,rotary1" }; struct input_dev *input_dev = keypad->input_dev; struct device *dev = input_dev->dev.parent; - struct device_node *np = dev->of_node; - - relkey_ret = of_property_read_u32(np, relkeyname, &code); - /* if can read correct rotary key-code, we do not need this. */ - if (relkey_ret == 0) { - unsigned short relcode; + struct pxa27x_keypad_rotary *encoder; + unsigned int code; + int i; + int error; - /* rotary0 taks lower half, rotary1 taks upper half. */ - relcode = code & 0xffff; - pdata->rotary0_rel_code = (code & 0xffff); - __set_bit(relcode, input_dev->relbit); + error = device_property_read_u32(dev, "marvell,rotary-rel-key", &code); + if (!error) { + for (i = 0; i < MAX_ROTARY_ENCODERS; i++, code >>= 16) { + encoder = &keypad->rotary[i]; + encoder->enabled = true; + encoder->rel_code = code & 0xffff; + input_set_capability(input_dev, EV_REL, encoder->rel_code); + } - relcode = code >> 16; - pdata->rotary1_rel_code = relcode; - __set_bit(relcode, input_dev->relbit); + return 0; } - for (i = 0; i < 2; i++) { - prop = of_get_property(np, rotaryname[i], &proplen); + for (i = 0; i < MAX_ROTARY_ENCODERS; i++) { + encoder = &keypad->rotary[i]; + /* * If the prop is not set, it means keypad does not need * initialize the rotaryX. */ - if (!prop) + if (!device_property_present(dev, rotaryname[i])) continue; - code = be32_to_cpup(prop); + error = device_property_read_u32(dev, rotaryname[i], &code); + if (error) + return error; + /* * Not all up/down key code are valid. * Now we depends on direct-rel-code. */ - if ((!(code & 0xffff) || !(code >> 16)) && relkey_ret) { - return relkey_ret; - } else { - unsigned int n = MAX_MATRIX_KEY_NUM + (i << 1); - unsigned short keycode; - - keycode = code & 0xffff; - keypad->keycodes[n] = keycode; - __set_bit(keycode, input_dev->keybit); - - keycode = code >> 16; - keypad->keycodes[n + 1] = keycode; - __set_bit(keycode, input_dev->keybit); - - if (i == 0) - pdata->rotary0_rel_code = -1; - else - pdata->rotary1_rel_code = -1; - } - if (i == 0) - pdata->enable_rotary0 = 1; - else - pdata->enable_rotary1 = 1; - } + if (!(code & 0xffff) || !(code >> 16)) + return -EINVAL; + + encoder->enabled = true; + encoder->rel_code = -1; + encoder->key_codes = &keypad->keycodes[MAX_MATRIX_KEY_NUM + i * 2]; + encoder->key_codes[0] = code & 0xffff; + encoder->key_codes[1] = code >> 16; - keypad->rotary_rel_code[0] = pdata->rotary0_rel_code; - keypad->rotary_rel_code[1] = pdata->rotary1_rel_code; + input_set_capability(input_dev, EV_KEY, encoder->key_codes[0]); + input_set_capability(input_dev, EV_KEY, encoder->key_codes[1]); + } return 0; } -static int pxa27x_keypad_build_keycode_from_dt(struct pxa27x_keypad *keypad) +static int pxa27x_keypad_parse_properties(struct pxa27x_keypad *keypad) { struct input_dev *input_dev = keypad->input_dev; struct device *dev = input_dev->dev.parent; - struct device_node *np = dev->of_node; - struct pxa27x_keypad_platform_data *pdata; int error; - pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL); - if (!pdata) { - dev_err(dev, "failed to allocate memory for pdata\n"); - return -ENOMEM; - } - - error = pxa27x_keypad_matrix_key_parse_dt(keypad, pdata); + error = pxa27x_keypad_matrix_key_parse(keypad); if (error) { dev_err(dev, "failed to parse matrix key\n"); return error; } - error = pxa27x_keypad_direct_key_parse_dt(keypad, pdata); + error = pxa27x_keypad_direct_key_parse(keypad); if (error) { dev_err(dev, "failed to parse direct key\n"); return error; } - error = pxa27x_keypad_rotary_parse_dt(keypad, pdata); + error = pxa27x_keypad_rotary_parse(keypad); if (error) { dev_err(dev, "failed to parse rotary key\n"); return error; } - error = of_property_read_u32(np, "marvell,debounce-interval", - &pdata->debounce_interval); + error = device_property_read_u32(dev, "marvell,debounce-interval", + &keypad->debounce_interval); if (error) { dev_err(dev, "failed to parse debounce-interval\n"); return error; @@ -333,91 +315,11 @@ static int pxa27x_keypad_build_keycode_from_dt(struct pxa27x_keypad *keypad) */ input_dev->keycodemax = ARRAY_SIZE(keypad->keycodes); - keypad->pdata = pdata; - return 0; -} - -#else - -static int pxa27x_keypad_build_keycode_from_dt(struct pxa27x_keypad *keypad) -{ - dev_info(keypad->input_dev->dev.parent, "missing platform data\n"); - - return -EINVAL; -} - -#endif - -static int pxa27x_keypad_build_keycode(struct pxa27x_keypad *keypad) -{ - const struct pxa27x_keypad_platform_data *pdata = keypad->pdata; - struct input_dev *input_dev = keypad->input_dev; - unsigned short keycode; - int i; - int error; - - error = matrix_keypad_build_keymap(pdata->matrix_keymap_data, NULL, - pdata->matrix_key_rows, - pdata->matrix_key_cols, - keypad->keycodes, input_dev); - if (error) - return error; - - /* - * The keycodes may not only include matrix keys but also the direct - * or rotary keys. - */ - input_dev->keycodemax = ARRAY_SIZE(keypad->keycodes); - - /* For direct keys. */ - for (i = 0; i < pdata->direct_key_num; i++) { - keycode = pdata->direct_key_map[i]; - keypad->keycodes[MAX_MATRIX_KEY_NUM + i] = keycode; - __set_bit(keycode, input_dev->keybit); - } - - if (pdata->enable_rotary0) { - if (pdata->rotary0_up_key && pdata->rotary0_down_key) { - keycode = pdata->rotary0_up_key; - keypad->keycodes[MAX_MATRIX_KEY_NUM + 0] = keycode; - __set_bit(keycode, input_dev->keybit); - - keycode = pdata->rotary0_down_key; - keypad->keycodes[MAX_MATRIX_KEY_NUM + 1] = keycode; - __set_bit(keycode, input_dev->keybit); - - keypad->rotary_rel_code[0] = -1; - } else { - keypad->rotary_rel_code[0] = pdata->rotary0_rel_code; - __set_bit(pdata->rotary0_rel_code, input_dev->relbit); - } - } - - if (pdata->enable_rotary1) { - if (pdata->rotary1_up_key && pdata->rotary1_down_key) { - keycode = pdata->rotary1_up_key; - keypad->keycodes[MAX_MATRIX_KEY_NUM + 2] = keycode; - __set_bit(keycode, input_dev->keybit); - - keycode = pdata->rotary1_down_key; - keypad->keycodes[MAX_MATRIX_KEY_NUM + 3] = keycode; - __set_bit(keycode, input_dev->keybit); - - keypad->rotary_rel_code[1] = -1; - } else { - keypad->rotary_rel_code[1] = pdata->rotary1_rel_code; - __set_bit(pdata->rotary1_rel_code, input_dev->relbit); - } - } - - __clear_bit(KEY_RESERVED, input_dev->keybit); - return 0; } static void pxa27x_keypad_scan_matrix(struct pxa27x_keypad *keypad) { - const struct pxa27x_keypad_platform_data *pdata = keypad->pdata; struct input_dev *input_dev = keypad->input_dev; int row, col, num_keys_pressed = 0; u32 new_state[MAX_MATRIX_KEY_COLS]; @@ -435,8 +337,8 @@ static void pxa27x_keypad_scan_matrix(struct pxa27x_keypad *keypad) row = KPAS_RP(kpas); /* if invalid row/col, treat as no key pressed */ - if (col >= pdata->matrix_key_cols || - row >= pdata->matrix_key_rows) + if (col >= keypad->matrix_key_cols || + row >= keypad->matrix_key_rows) goto scan; new_state[col] = BIT(row); @@ -459,7 +361,7 @@ static void pxa27x_keypad_scan_matrix(struct pxa27x_keypad *keypad) new_state[7] = (kpasmkp3 >> 16) & KPASMKP_MKC_MASK; } scan: - for (col = 0; col < pdata->matrix_key_cols; col++) { + for (col = 0; col < keypad->matrix_key_cols; col++) { u32 bits_changed; int code; @@ -467,7 +369,7 @@ scan: if (bits_changed == 0) continue; - for (row = 0; row < pdata->matrix_key_rows; row++) { + for (row = 0; row < keypad->matrix_key_rows; row++) { if ((bits_changed & BIT(row)) == 0) continue; @@ -496,14 +398,16 @@ static inline int rotary_delta(u32 kprec) static void report_rotary_event(struct pxa27x_keypad *keypad, int r, int delta) { + struct pxa27x_keypad_rotary *encoder = &keypad->rotary[r]; struct input_dev *dev = keypad->input_dev; - if (delta == 0) + if (!encoder->enabled || delta == 0) return; - if (keypad->rotary_rel_code[r] == -1) { - int code = MAX_MATRIX_KEY_NUM + 2 * r + (delta > 0 ? 0 : 1); - unsigned char keycode = keypad->keycodes[code]; + if (encoder->rel_code == -1) { + int idx = delta > 0 ? 0 : 1; + int code = MAX_MATRIX_KEY_NUM + 2 * r + idx; + unsigned char keycode = encoder->key_codes[idx]; /* simulate a press-n-release */ input_event(dev, EV_MSC, MSC_SCAN, code); @@ -513,30 +417,28 @@ static void report_rotary_event(struct pxa27x_keypad *keypad, int r, int delta) input_report_key(dev, keycode, 0); input_sync(dev); } else { - input_report_rel(dev, keypad->rotary_rel_code[r], delta); + input_report_rel(dev, encoder->rel_code, delta); input_sync(dev); } } static void pxa27x_keypad_scan_rotary(struct pxa27x_keypad *keypad) { - const struct pxa27x_keypad_platform_data *pdata = keypad->pdata; u32 kprec; + int i; /* read and reset to default count value */ kprec = keypad_readl(KPREC); keypad_writel(KPREC, DEFAULT_KPREC); - if (pdata->enable_rotary0) + for (i = 0; i < MAX_ROTARY_ENCODERS; i++) { report_rotary_event(keypad, 0, rotary_delta(kprec)); - - if (pdata->enable_rotary1) - report_rotary_event(keypad, 1, rotary_delta(kprec >> 16)); + kprec >>= 16; + } } static void pxa27x_keypad_scan_direct(struct pxa27x_keypad *keypad) { - const struct pxa27x_keypad_platform_data *pdata = keypad->pdata; struct input_dev *input_dev = keypad->input_dev; unsigned int new_state; u32 kpdk, bits_changed; @@ -544,14 +446,14 @@ static void pxa27x_keypad_scan_direct(struct pxa27x_keypad *keypad) kpdk = keypad_readl(KPDK); - if (pdata->enable_rotary0 || pdata->enable_rotary1) + if (keypad->rotary[0].enabled || keypad->rotary[1].enabled) pxa27x_keypad_scan_rotary(keypad); /* * The KPDR_DK only output the key pin level, so it relates to board, * and low level may be active. */ - if (pdata->direct_key_low_active) + if (keypad->direct_key_low_active) new_state = ~KPDK_DK(kpdk) & keypad->direct_key_mask; else new_state = KPDK_DK(kpdk) & keypad->direct_key_mask; @@ -561,7 +463,7 @@ static void pxa27x_keypad_scan_direct(struct pxa27x_keypad *keypad) if (bits_changed == 0) return; - for (i = 0; i < pdata->direct_key_num; i++) { + for (i = 0; i < keypad->direct_key_num; i++) { if (bits_changed & BIT(i)) { int code = MAX_MATRIX_KEY_NUM + i; @@ -574,21 +476,11 @@ static void pxa27x_keypad_scan_direct(struct pxa27x_keypad *keypad) keypad->direct_key_state = new_state; } -static void clear_wakeup_event(struct pxa27x_keypad *keypad) -{ - const struct pxa27x_keypad_platform_data *pdata = keypad->pdata; - - if (pdata->clear_wakeup_event) - (pdata->clear_wakeup_event)(); -} - static irqreturn_t pxa27x_keypad_irq_handler(int irq, void *dev_id) { struct pxa27x_keypad *keypad = dev_id; unsigned long kpc = keypad_readl(KPC); - clear_wakeup_event(keypad); - if (kpc & KPC_DI) pxa27x_keypad_scan_direct(keypad); @@ -600,7 +492,6 @@ static irqreturn_t pxa27x_keypad_irq_handler(int irq, void *dev_id) static void pxa27x_keypad_config(struct pxa27x_keypad *keypad) { - const struct pxa27x_keypad_platform_data *pdata = keypad->pdata; unsigned int mask = 0, direct_key_num = 0; unsigned long kpc = 0; @@ -608,35 +499,33 @@ static void pxa27x_keypad_config(struct pxa27x_keypad *keypad) keypad_readl(KPC); /* enable matrix keys with automatic scan */ - if (pdata->matrix_key_rows && pdata->matrix_key_cols) { + if (keypad->matrix_key_rows && keypad->matrix_key_cols) { kpc |= KPC_ASACT | KPC_MIE | KPC_ME | KPC_MS_ALL; - kpc |= KPC_MKRN(pdata->matrix_key_rows) | - KPC_MKCN(pdata->matrix_key_cols); + kpc |= KPC_MKRN(keypad->matrix_key_rows) | + KPC_MKCN(keypad->matrix_key_cols); } /* enable rotary key, debounce interval same as direct keys */ - if (pdata->enable_rotary0) { + if (keypad->rotary[0].enabled) { mask |= 0x03; direct_key_num = 2; kpc |= KPC_REE0; } - if (pdata->enable_rotary1) { + if (keypad->rotary[1].enabled) { mask |= 0x0c; direct_key_num = 4; kpc |= KPC_REE1; } - if (pdata->direct_key_num > direct_key_num) - direct_key_num = pdata->direct_key_num; + if (keypad->direct_key_num > direct_key_num) + direct_key_num = keypad->direct_key_num; /* * Direct keys usage may not start from KP_DKIN0, check the platfrom * mask data to config the specific. */ - if (pdata->direct_key_mask) - keypad->direct_key_mask = pdata->direct_key_mask; - else + if (!keypad->direct_key_mask) keypad->direct_key_mask = GENMASK(direct_key_num - 1, 0) & ~mask; /* enable direct key */ @@ -645,7 +534,7 @@ static void pxa27x_keypad_config(struct pxa27x_keypad *keypad) keypad_writel(KPC, kpc | KPC_RE_ZERO_DEB); keypad_writel(KPREC, DEFAULT_KPREC); - keypad_writel(KPKDI, pdata->debounce_interval); + keypad_writel(KPKDI, keypad->debounce_interval); } static int pxa27x_keypad_open(struct input_dev *dev) @@ -719,19 +608,12 @@ static int pxa27x_keypad_resume(struct device *dev) static DEFINE_SIMPLE_DEV_PM_OPS(pxa27x_keypad_pm_ops, pxa27x_keypad_suspend, pxa27x_keypad_resume); - static int pxa27x_keypad_probe(struct platform_device *pdev) { - const struct pxa27x_keypad_platform_data *pdata = - dev_get_platdata(&pdev->dev); - struct device_node *np = pdev->dev.of_node; struct pxa27x_keypad *keypad; struct input_dev *input_dev; - int irq, error; - - /* Driver need build keycode from device tree or pdata */ - if (!np && !pdata) - return -EINVAL; + int irq; + int error; irq = platform_get_irq(pdev, 0); if (irq < 0) @@ -746,7 +628,6 @@ static int pxa27x_keypad_probe(struct platform_device *pdev) if (!input_dev) return -ENOMEM; - keypad->pdata = pdata; keypad->input_dev = input_dev; keypad->irq = irq; @@ -775,29 +656,12 @@ static int pxa27x_keypad_probe(struct platform_device *pdev) input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP); input_set_capability(input_dev, EV_MSC, MSC_SCAN); - if (pdata) { - error = pxa27x_keypad_build_keycode(keypad); - } else { - error = pxa27x_keypad_build_keycode_from_dt(keypad); - /* - * Data that we get from DT resides in dynamically - * allocated memory so we need to update our pdata - * pointer. - */ - pdata = keypad->pdata; - } + error = pxa27x_keypad_parse_properties(keypad); if (error) { - dev_err(&pdev->dev, "failed to build keycode\n"); + dev_err(&pdev->dev, "failed to parse keypad properties\n"); return error; } - keypad->row_shift = get_count_order(pdata->matrix_key_cols); - - if ((pdata->enable_rotary0 && keypad->rotary_rel_code[0] != -1) || - (pdata->enable_rotary1 && keypad->rotary_rel_code[1] != -1)) { - input_dev->evbit[0] |= BIT_MASK(EV_REL); - } - error = devm_request_irq(&pdev->dev, irq, pxa27x_keypad_irq_handler, 0, pdev->name, keypad); if (error) { diff --git a/include/linux/platform_data/keypad-pxa27x.h b/include/linux/platform_data/keypad-pxa27x.h deleted file mode 100644 index a376442b9935..000000000000 --- a/include/linux/platform_data/keypad-pxa27x.h +++ /dev/null @@ -1,73 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_ARCH_PXA27x_KEYPAD_H -#define __ASM_ARCH_PXA27x_KEYPAD_H - -#include -#include - -#define MAX_MATRIX_KEY_ROWS (8) -#define MAX_MATRIX_KEY_COLS (8) -#define MATRIX_ROW_SHIFT (3) -#define MAX_DIRECT_KEY_NUM (8) - -/* pxa3xx keypad platform specific parameters - * - * NOTE: - * 1. direct_key_num indicates the number of keys in the direct keypad - * _plus_ the number of rotary-encoder sensor inputs, this can be - * left as 0 if only rotary encoders are enabled, the driver will - * automatically calculate this - * - * 2. direct_key_map is the key code map for the direct keys, if rotary - * encoder(s) are enabled, direct key 0/1(2/3) will be ignored - * - * 3. rotary can be either interpreted as a relative input event (e.g. - * REL_WHEEL/REL_HWHEEL) or specific keys (e.g. UP/DOWN/LEFT/RIGHT) - * - * 4. matrix key and direct key will use the same debounce_interval by - * default, which should be sufficient in most cases - * - * pxa168 keypad platform specific parameter - * - * NOTE: - * clear_wakeup_event callback is a workaround required to clear the - * keypad interrupt. The keypad wake must be cleared in addition to - * reading the MI/DI bits in the KPC register. - */ -struct pxa27x_keypad_platform_data { - - /* code map for the matrix keys */ - const struct matrix_keymap_data *matrix_keymap_data; - unsigned int matrix_key_rows; - unsigned int matrix_key_cols; - - /* direct keys */ - int direct_key_num; - unsigned int direct_key_map[MAX_DIRECT_KEY_NUM]; - /* the key output may be low active */ - int direct_key_low_active; - /* give board a chance to choose the start direct key */ - unsigned int direct_key_mask; - - /* rotary encoders 0 */ - int enable_rotary0; - int rotary0_rel_code; - int rotary0_up_key; - int rotary0_down_key; - - /* rotary encoders 1 */ - int enable_rotary1; - int rotary1_rel_code; - int rotary1_up_key; - int rotary1_down_key; - - /* key debounce interval */ - unsigned int debounce_interval; - - /* clear wakeup event requirement for pxa168 */ - void (*clear_wakeup_event)(void); -}; - -extern void pxa_set_keypad_info(struct pxa27x_keypad_platform_data *info); - -#endif /* __ASM_ARCH_PXA27x_KEYPAD_H */ -- cgit v1.2.3 From 890ba82a60cb7a6584968a4ac15546f434afa40b Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sun, 17 Aug 2025 14:39:06 -0700 Subject: Input: spear-keyboard - drop support for platform data There are no in-kernel users of spear kbd_platform_data in the kernel, and the driver supports configuration via device tree, so drop support of static platform data and move properties parsing from OF-specific methods to generic ones. Link: https://lore.kernel.org/r/vppjxui76im26uamznx7evm5lmbe3d6v3oxsa7mqyytykh4zm6@nhlf33v3hp6g Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/spear-keyboard.c | 71 +++--------- include/linux/platform_data/keyboard-spear.h | 164 --------------------------- 2 files changed, 15 insertions(+), 220 deletions(-) delete mode 100644 include/linux/platform_data/keyboard-spear.h (limited to 'include/linux') diff --git a/drivers/input/keyboard/spear-keyboard.c b/drivers/input/keyboard/spear-keyboard.c index 2fae337562a2..53f3ac64c980 100644 --- a/drivers/input/keyboard/spear-keyboard.c +++ b/drivers/input/keyboard/spear-keyboard.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -22,7 +23,6 @@ #include #include #include -#include /* Keyboard Registers */ #define MODE_CTL_REG 0x00 @@ -56,13 +56,12 @@ struct spear_kbd { void __iomem *io_base; struct clk *clk; unsigned int irq; - unsigned int mode; - unsigned int suspended_rate; + u32 mode; + u32 suspended_rate; + u32 mode_ctl_reg; unsigned short last_key; unsigned short keycodes[NUM_ROWS * NUM_COLS]; - bool rep; bool irq_wake_enabled; - u32 mode_ctl_reg; }; static irqreturn_t spear_kbd_interrupt(int irq, void *dev_id) @@ -143,46 +142,8 @@ static void spear_kbd_close(struct input_dev *dev) kbd->last_key = KEY_RESERVED; } -#ifdef CONFIG_OF -static int spear_kbd_parse_dt(struct platform_device *pdev, - struct spear_kbd *kbd) -{ - struct device_node *np = pdev->dev.of_node; - int error; - u32 val, suspended_rate; - - if (!np) { - dev_err(&pdev->dev, "Missing DT data\n"); - return -EINVAL; - } - - if (of_property_read_bool(np, "autorepeat")) - kbd->rep = true; - - if (of_property_read_u32(np, "suspended_rate", &suspended_rate)) - kbd->suspended_rate = suspended_rate; - - error = of_property_read_u32(np, "st,mode", &val); - if (error) { - dev_err(&pdev->dev, "DT: Invalid or missing mode\n"); - return error; - } - - kbd->mode = val; - return 0; -} -#else -static inline int spear_kbd_parse_dt(struct platform_device *pdev, - struct spear_kbd *kbd) -{ - return -ENOSYS; -} -#endif - static int spear_kbd_probe(struct platform_device *pdev) { - struct kbd_platform_data *pdata = dev_get_platdata(&pdev->dev); - const struct matrix_keymap_data *keymap = pdata ? pdata->keymap : NULL; struct spear_kbd *kbd; struct input_dev *input_dev; int irq; @@ -198,6 +159,14 @@ static int spear_kbd_probe(struct platform_device *pdev) return -ENOMEM; } + error = device_property_read_u32(&pdev->dev, "st,mode", &kbd->mode); + if (error) { + dev_err(&pdev->dev, "Invalid or missing mode\n"); + return error; + } + + device_property_read_u32(&pdev->dev, "suspended_rate", &kbd->suspended_rate); + input_dev = devm_input_allocate_device(&pdev->dev); if (!input_dev) { dev_err(&pdev->dev, "unable to allocate input device\n"); @@ -207,16 +176,6 @@ static int spear_kbd_probe(struct platform_device *pdev) kbd->input = input_dev; kbd->irq = irq; - if (!pdata) { - error = spear_kbd_parse_dt(pdev, kbd); - if (error) - return error; - } else { - kbd->mode = pdata->mode; - kbd->rep = pdata->rep; - kbd->suspended_rate = pdata->suspended_rate; - } - kbd->io_base = devm_platform_get_and_ioremap_resource(pdev, 0, NULL); if (IS_ERR(kbd->io_base)) return PTR_ERR(kbd->io_base); @@ -234,21 +193,21 @@ static int spear_kbd_probe(struct platform_device *pdev) input_dev->open = spear_kbd_open; input_dev->close = spear_kbd_close; - error = matrix_keypad_build_keymap(keymap, NULL, NUM_ROWS, NUM_COLS, + error = matrix_keypad_build_keymap(NULL, NULL, NUM_ROWS, NUM_COLS, kbd->keycodes, input_dev); if (error) { dev_err(&pdev->dev, "Failed to build keymap\n"); return error; } - if (kbd->rep) + if (device_property_read_bool(&pdev->dev, "autorepeat")) __set_bit(EV_REP, input_dev->evbit); input_set_capability(input_dev, EV_MSC, MSC_SCAN); input_set_drvdata(input_dev, kbd); error = devm_request_irq(&pdev->dev, irq, spear_kbd_interrupt, 0, - "keyboard", kbd); + "keyboard", kbd); if (error) { dev_err(&pdev->dev, "request_irq failed\n"); return error; diff --git a/include/linux/platform_data/keyboard-spear.h b/include/linux/platform_data/keyboard-spear.h deleted file mode 100644 index 5e3ff653900c..000000000000 --- a/include/linux/platform_data/keyboard-spear.h +++ /dev/null @@ -1,164 +0,0 @@ -/* - * Copyright (C) 2010 ST Microelectronics - * Rajeev Kumar - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - */ - -#ifndef __PLAT_KEYBOARD_H -#define __PLAT_KEYBOARD_H - -#include -#include -#include -#include - -#define DECLARE_9x9_KEYMAP(_name) \ -int _name[] = { \ - KEY(0, 0, KEY_ESC), \ - KEY(0, 1, KEY_1), \ - KEY(0, 2, KEY_2), \ - KEY(0, 3, KEY_3), \ - KEY(0, 4, KEY_4), \ - KEY(0, 5, KEY_5), \ - KEY(0, 6, KEY_6), \ - KEY(0, 7, KEY_7), \ - KEY(0, 8, KEY_8), \ - KEY(1, 0, KEY_9), \ - KEY(1, 1, KEY_MINUS), \ - KEY(1, 2, KEY_EQUAL), \ - KEY(1, 3, KEY_BACKSPACE), \ - KEY(1, 4, KEY_TAB), \ - KEY(1, 5, KEY_Q), \ - KEY(1, 6, KEY_W), \ - KEY(1, 7, KEY_E), \ - KEY(1, 8, KEY_R), \ - KEY(2, 0, KEY_T), \ - KEY(2, 1, KEY_Y), \ - KEY(2, 2, KEY_U), \ - KEY(2, 3, KEY_I), \ - KEY(2, 4, KEY_O), \ - KEY(2, 5, KEY_P), \ - KEY(2, 6, KEY_LEFTBRACE), \ - KEY(2, 7, KEY_RIGHTBRACE), \ - KEY(2, 8, KEY_ENTER), \ - KEY(3, 0, KEY_LEFTCTRL), \ - KEY(3, 1, KEY_A), \ - KEY(3, 2, KEY_S), \ - KEY(3, 3, KEY_D), \ - KEY(3, 4, KEY_F), \ - KEY(3, 5, KEY_G), \ - KEY(3, 6, KEY_H), \ - KEY(3, 7, KEY_J), \ - KEY(3, 8, KEY_K), \ - KEY(4, 0, KEY_L), \ - KEY(4, 1, KEY_SEMICOLON), \ - KEY(4, 2, KEY_APOSTROPHE), \ - KEY(4, 3, KEY_GRAVE), \ - KEY(4, 4, KEY_LEFTSHIFT), \ - KEY(4, 5, KEY_BACKSLASH), \ - KEY(4, 6, KEY_Z), \ - KEY(4, 7, KEY_X), \ - KEY(4, 8, KEY_C), \ - KEY(5, 0, KEY_V), \ - KEY(5, 1, KEY_B), \ - KEY(5, 2, KEY_N), \ - KEY(5, 3, KEY_M), \ - KEY(5, 4, KEY_COMMA), \ - KEY(5, 5, KEY_DOT), \ - KEY(5, 6, KEY_SLASH), \ - KEY(5, 7, KEY_RIGHTSHIFT), \ - KEY(5, 8, KEY_KPASTERISK), \ - KEY(6, 0, KEY_LEFTALT), \ - KEY(6, 1, KEY_SPACE), \ - KEY(6, 2, KEY_CAPSLOCK), \ - KEY(6, 3, KEY_F1), \ - KEY(6, 4, KEY_F2), \ - KEY(6, 5, KEY_F3), \ - KEY(6, 6, KEY_F4), \ - KEY(6, 7, KEY_F5), \ - KEY(6, 8, KEY_F6), \ - KEY(7, 0, KEY_F7), \ - KEY(7, 1, KEY_F8), \ - KEY(7, 2, KEY_F9), \ - KEY(7, 3, KEY_F10), \ - KEY(7, 4, KEY_NUMLOCK), \ - KEY(7, 5, KEY_SCROLLLOCK), \ - KEY(7, 6, KEY_KP7), \ - KEY(7, 7, KEY_KP8), \ - KEY(7, 8, KEY_KP9), \ - KEY(8, 0, KEY_KPMINUS), \ - KEY(8, 1, KEY_KP4), \ - KEY(8, 2, KEY_KP5), \ - KEY(8, 3, KEY_KP6), \ - KEY(8, 4, KEY_KPPLUS), \ - KEY(8, 5, KEY_KP1), \ - KEY(8, 6, KEY_KP2), \ - KEY(8, 7, KEY_KP3), \ - KEY(8, 8, KEY_KP0), \ -} - -#define DECLARE_6x6_KEYMAP(_name) \ -int _name[] = { \ - KEY(0, 0, KEY_RESERVED), \ - KEY(0, 1, KEY_1), \ - KEY(0, 2, KEY_2), \ - KEY(0, 3, KEY_3), \ - KEY(0, 4, KEY_4), \ - KEY(0, 5, KEY_5), \ - KEY(1, 0, KEY_Q), \ - KEY(1, 1, KEY_W), \ - KEY(1, 2, KEY_E), \ - KEY(1, 3, KEY_R), \ - KEY(1, 4, KEY_T), \ - KEY(1, 5, KEY_Y), \ - KEY(2, 0, KEY_D), \ - KEY(2, 1, KEY_F), \ - KEY(2, 2, KEY_G), \ - KEY(2, 3, KEY_H), \ - KEY(2, 4, KEY_J), \ - KEY(2, 5, KEY_K), \ - KEY(3, 0, KEY_B), \ - KEY(3, 1, KEY_N), \ - KEY(3, 2, KEY_M), \ - KEY(3, 3, KEY_COMMA), \ - KEY(3, 4, KEY_DOT), \ - KEY(3, 5, KEY_SLASH), \ - KEY(4, 0, KEY_F6), \ - KEY(4, 1, KEY_F7), \ - KEY(4, 2, KEY_F8), \ - KEY(4, 3, KEY_F9), \ - KEY(4, 4, KEY_F10), \ - KEY(4, 5, KEY_NUMLOCK), \ - KEY(5, 0, KEY_KP2), \ - KEY(5, 1, KEY_KP3), \ - KEY(5, 2, KEY_KP0), \ - KEY(5, 3, KEY_KPDOT), \ - KEY(5, 4, KEY_RO), \ - KEY(5, 5, KEY_ZENKAKUHANKAKU), \ -} - -#define KEYPAD_9x9 0 -#define KEYPAD_6x6 1 -#define KEYPAD_2x2 2 - -/** - * struct kbd_platform_data - spear keyboard platform data - * keymap: pointer to keymap data (table and size) - * rep: enables key autorepeat - * mode: choose keyboard support(9x9, 6x6, 2x2) - * suspended_rate: rate at which keyboard would operate in suspended mode - * - * This structure is supposed to be used by platform code to supply - * keymaps to drivers that implement keyboards. - */ -struct kbd_platform_data { - const struct matrix_keymap_data *keymap; - bool rep; - unsigned int mode; - unsigned int suspended_rate; -}; - -#endif /* __PLAT_KEYBOARD_H */ -- cgit v1.2.3 From e465ad7ef57aa1ec4122fd5b34c182d59629cb91 Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Mon, 11 Aug 2025 08:48:08 -0400 Subject: clk: ti: dpll: convert from round_rate() to determine_rate() The round_rate() clk ops is deprecated, so migrate this driver from round_rate() to determine_rate(). Part of these changes were done using the Coccinelle semantic patch on the cover letter of this series, and the rest of the changes were manually done. omap4_dpll_regm4xen_round_rate() is now only called by omap4_dpll_regm4xen_determine_rate(), so let's merge that functionality into one function. This is needed for another cleanup to completely remove the round_rate() clk ops from the clk core. Tested-by: Anddreas Kemnade # OMAP3 GTA04, OMAP4 Panda Reviewed-by: Kevin Hilman Tested-by: Kevin Hilman Signed-off-by: Brian Masney --- drivers/clk/ti/clkt_dpll.c | 28 +++++++-------- drivers/clk/ti/clock.h | 6 +--- drivers/clk/ti/dpll.c | 4 +-- drivers/clk/ti/dpll3xxx.c | 7 ++-- drivers/clk/ti/dpll44xx.c | 89 ++++++++++++++++------------------------------ include/linux/clk/ti.h | 8 ++--- 6 files changed, 57 insertions(+), 85 deletions(-) (limited to 'include/linux') diff --git a/drivers/clk/ti/clkt_dpll.c b/drivers/clk/ti/clkt_dpll.c index a8c0fc20f791..2ecd66968af4 100644 --- a/drivers/clk/ti/clkt_dpll.c +++ b/drivers/clk/ti/clkt_dpll.c @@ -268,10 +268,9 @@ unsigned long omap2_get_dpll_rate(struct clk_hw_omap *clk) /* DPLL rate rounding code */ /** - * omap2_dpll_round_rate - round a target rate for an OMAP DPLL + * omap2_dpll_determine_rate - round a target rate for an OMAP DPLL * @hw: struct clk_hw containing the struct clk * for a DPLL - * @target_rate: desired DPLL clock rate - * @parent_rate: parent's DPLL clock rate + * @req: rate request * * Given a DPLL and a desired target rate, round the target rate to a * possible, programmable rate for this DPLL. Attempts to select the @@ -280,8 +279,7 @@ unsigned long omap2_get_dpll_rate(struct clk_hw_omap *clk) * (expensive) function again. Returns -EINVAL if the target rate * cannot be rounded, or the rounded rate upon success. */ -long omap2_dpll_round_rate(struct clk_hw *hw, unsigned long target_rate, - unsigned long *parent_rate) +int omap2_dpll_determine_rate(struct clk_hw *hw, struct clk_rate_request *req) { struct clk_hw_omap *clk = to_clk_hw_omap(hw); int m, n, r, scaled_max_m; @@ -299,15 +297,15 @@ long omap2_dpll_round_rate(struct clk_hw *hw, unsigned long target_rate, dd = clk->dpll_data; - if (dd->max_rate && target_rate > dd->max_rate) - target_rate = dd->max_rate; + if (dd->max_rate && req->rate > dd->max_rate) + req->rate = dd->max_rate; ref_rate = clk_hw_get_rate(dd->clk_ref); clk_name = clk_hw_get_name(hw); pr_debug("clock: %s: starting DPLL round_rate, target rate %lu\n", - clk_name, target_rate); + clk_name, req->rate); - scaled_rt_rp = target_rate / (ref_rate / DPLL_SCALE_FACTOR); + scaled_rt_rp = req->rate / (ref_rate / DPLL_SCALE_FACTOR); scaled_max_m = dd->max_multiplier * DPLL_SCALE_FACTOR; dd->last_rounded_rate = 0; @@ -332,7 +330,7 @@ long omap2_dpll_round_rate(struct clk_hw *hw, unsigned long target_rate, if (m > scaled_max_m) break; - r = _dpll_test_mult(&m, n, &new_rate, target_rate, + r = _dpll_test_mult(&m, n, &new_rate, req->rate, ref_rate); /* m can't be set low enough for this n - try with a larger n */ @@ -340,7 +338,7 @@ long omap2_dpll_round_rate(struct clk_hw *hw, unsigned long target_rate, continue; /* skip rates above our target rate */ - delta = target_rate - new_rate; + delta = req->rate - new_rate; if (delta < 0) continue; @@ -359,13 +357,15 @@ long omap2_dpll_round_rate(struct clk_hw *hw, unsigned long target_rate, if (prev_min_delta == LONG_MAX) { pr_debug("clock: %s: cannot round to rate %lu\n", - clk_name, target_rate); + clk_name, req->rate); return -EINVAL; } dd->last_rounded_m = min_delta_m; dd->last_rounded_n = min_delta_n; - dd->last_rounded_rate = target_rate - prev_min_delta; + dd->last_rounded_rate = req->rate - prev_min_delta; - return dd->last_rounded_rate; + req->rate = dd->last_rounded_rate; + + return 0; } diff --git a/drivers/clk/ti/clock.h b/drivers/clk/ti/clock.h index 2de7acea1ea0..d5e24fe4ae3a 100644 --- a/drivers/clk/ti/clock.h +++ b/drivers/clk/ti/clock.h @@ -273,8 +273,7 @@ int omap3_noncore_dpll_set_rate_and_parent(struct clk_hw *hw, u8 index); int omap3_noncore_dpll_determine_rate(struct clk_hw *hw, struct clk_rate_request *req); -long omap2_dpll_round_rate(struct clk_hw *hw, unsigned long target_rate, - unsigned long *parent_rate); +int omap2_dpll_determine_rate(struct clk_hw *hw, struct clk_rate_request *req); unsigned long omap3_clkoutx2_recalc(struct clk_hw *hw, unsigned long parent_rate); @@ -296,9 +295,6 @@ void omap3_clk_lock_dpll5(void); unsigned long omap4_dpll_regm4xen_recalc(struct clk_hw *hw, unsigned long parent_rate); -long omap4_dpll_regm4xen_round_rate(struct clk_hw *hw, - unsigned long target_rate, - unsigned long *parent_rate); int omap4_dpll_regm4xen_determine_rate(struct clk_hw *hw, struct clk_rate_request *req); int omap2_clk_for_each(int (*fn)(struct clk_hw_omap *hw)); diff --git a/drivers/clk/ti/dpll.c b/drivers/clk/ti/dpll.c index 1f55554e0d73..971adafd9a8b 100644 --- a/drivers/clk/ti/dpll.c +++ b/drivers/clk/ti/dpll.c @@ -77,7 +77,7 @@ const struct clk_hw_omap_ops clkhwops_omap3_dpll = {}; static const struct clk_ops omap2_dpll_core_ck_ops = { .get_parent = &omap2_init_dpll_parent, .recalc_rate = &omap2_dpllcore_recalc, - .round_rate = &omap2_dpll_round_rate, + .determine_rate = &omap2_dpll_determine_rate, .set_rate = &omap2_reprogram_dpllcore, }; #else @@ -88,7 +88,7 @@ static const struct clk_ops omap2_dpll_core_ck_ops = {}; static const struct clk_ops omap3_dpll_core_ck_ops = { .get_parent = &omap2_init_dpll_parent, .recalc_rate = &omap3_dpll_recalc, - .round_rate = &omap2_dpll_round_rate, + .determine_rate = &omap2_dpll_determine_rate, }; static const struct clk_ops omap3_dpll_ck_ops = { diff --git a/drivers/clk/ti/dpll3xxx.c b/drivers/clk/ti/dpll3xxx.c index 00680486b1bd..8c51b988a04f 100644 --- a/drivers/clk/ti/dpll3xxx.c +++ b/drivers/clk/ti/dpll3xxx.c @@ -587,6 +587,7 @@ int omap3_noncore_dpll_determine_rate(struct clk_hw *hw, { struct clk_hw_omap *clk = to_clk_hw_omap(hw); struct dpll_data *dd; + int ret; if (!req->rate) return -EINVAL; @@ -599,8 +600,10 @@ int omap3_noncore_dpll_determine_rate(struct clk_hw *hw, (dd->modes & (1 << DPLL_LOW_POWER_BYPASS))) { req->best_parent_hw = dd->clk_bypass; } else { - req->rate = omap2_dpll_round_rate(hw, req->rate, - &req->best_parent_rate); + ret = omap2_dpll_determine_rate(hw, req); + if (ret != 0) + return ret; + req->best_parent_hw = dd->clk_ref; } diff --git a/drivers/clk/ti/dpll44xx.c b/drivers/clk/ti/dpll44xx.c index 3fc2cab69a3f..08ed57f181b4 100644 --- a/drivers/clk/ti/dpll44xx.c +++ b/drivers/clk/ti/dpll44xx.c @@ -133,61 +133,6 @@ unsigned long omap4_dpll_regm4xen_recalc(struct clk_hw *hw, return rate; } -/** - * omap4_dpll_regm4xen_round_rate - round DPLL rate, considering REGM4XEN bit - * @hw: struct hw_clk containing the struct clk * of the DPLL to round a rate for - * @target_rate: the desired rate of the DPLL - * @parent_rate: clock rate of the DPLL parent - * - * Compute the rate that would be programmed into the DPLL hardware - * for @clk if set_rate() were to be provided with the rate - * @target_rate. Takes the REGM4XEN bit into consideration, which is - * needed for the OMAP4 ABE DPLL. Returns the rounded rate (before - * M-dividers) upon success, -EINVAL if @clk is null or not a DPLL, or - * ~0 if an error occurred in omap2_dpll_round_rate(). - */ -long omap4_dpll_regm4xen_round_rate(struct clk_hw *hw, - unsigned long target_rate, - unsigned long *parent_rate) -{ - struct clk_hw_omap *clk = to_clk_hw_omap(hw); - struct dpll_data *dd; - long r; - - if (!clk || !clk->dpll_data) - return -EINVAL; - - dd = clk->dpll_data; - - dd->last_rounded_m4xen = 0; - - /* - * First try to compute the DPLL configuration for - * target rate without using the 4X multiplier. - */ - r = omap2_dpll_round_rate(hw, target_rate, NULL); - if (r != ~0) - goto out; - - /* - * If we did not find a valid DPLL configuration, try again, but - * this time see if using the 4X multiplier can help. Enabling the - * 4X multiplier is equivalent to dividing the target rate by 4. - */ - r = omap2_dpll_round_rate(hw, target_rate / OMAP4430_REGM4XEN_MULT, - NULL); - if (r == ~0) - return r; - - dd->last_rounded_rate *= OMAP4430_REGM4XEN_MULT; - dd->last_rounded_m4xen = 1; - -out: - omap4_dpll_lpmode_recalc(dd); - - return dd->last_rounded_rate; -} - /** * omap4_dpll_regm4xen_determine_rate - determine rate for a DPLL * @hw: pointer to the clock to determine rate for @@ -195,7 +140,7 @@ out: * * Determines which DPLL mode to use for reaching a desired rate. * Checks whether the DPLL shall be in bypass or locked mode, and if - * locked, calculates the M,N values for the DPLL via round-rate. + * locked, calculates the M,N values for the DPLL. * Returns 0 on success and a negative error value otherwise. */ int omap4_dpll_regm4xen_determine_rate(struct clk_hw *hw, @@ -215,8 +160,36 @@ int omap4_dpll_regm4xen_determine_rate(struct clk_hw *hw, (dd->modes & (1 << DPLL_LOW_POWER_BYPASS))) { req->best_parent_hw = dd->clk_bypass; } else { - req->rate = omap4_dpll_regm4xen_round_rate(hw, req->rate, - &req->best_parent_rate); + struct clk_rate_request tmp_req; + long r; + + clk_hw_init_rate_request(hw, &tmp_req, req->rate); + dd->last_rounded_m4xen = 0; + + /* + * First try to compute the DPLL configuration for + * target rate without using the 4X multiplier. + */ + + r = omap2_dpll_determine_rate(hw, &tmp_req); + if (r < 0) { + /* + * If we did not find a valid DPLL configuration, try again, but + * this time see if using the 4X multiplier can help. Enabling the + * 4X multiplier is equivalent to dividing the target rate by 4. + */ + tmp_req.rate /= OMAP4430_REGM4XEN_MULT; + r = omap2_dpll_determine_rate(hw, &tmp_req); + if (r < 0) + return r; + + dd->last_rounded_rate *= OMAP4430_REGM4XEN_MULT; + dd->last_rounded_m4xen = 1; + } + + omap4_dpll_lpmode_recalc(dd); + + req->rate = dd->last_rounded_rate; req->best_parent_hw = dd->clk_ref; } diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index e656f63efdce..54a3fa370004 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -34,14 +34,14 @@ struct clk_omap_reg { * @clk_ref: struct clk_hw pointer to the clock's reference clock input * @control_reg: register containing the DPLL mode bitfield * @enable_mask: mask of the DPLL mode bitfield in @control_reg - * @last_rounded_rate: cache of the last rate result of omap2_dpll_round_rate() - * @last_rounded_m: cache of the last M result of omap2_dpll_round_rate() + * @last_rounded_rate: cache of the last rate result of omap2_dpll_determine_rate() + * @last_rounded_m: cache of the last M result of omap2_dpll_determine_rate() * @last_rounded_m4xen: cache of the last M4X result of - * omap4_dpll_regm4xen_round_rate() + * omap4_dpll_regm4xen_determine_rate() * @last_rounded_lpmode: cache of the last lpmode result of * omap4_dpll_lpmode_recalc() * @max_multiplier: maximum valid non-bypass multiplier value (actual) - * @last_rounded_n: cache of the last N result of omap2_dpll_round_rate() + * @last_rounded_n: cache of the last N result of omap2_dpll_determine_rate() * @min_divider: minimum valid non-bypass divider value (actual) * @max_divider: maximum valid non-bypass divider value (actual) * @max_rate: maximum clock rate for the DPLL -- cgit v1.2.3 From 3f5952917498e7bb9d227812d4349668f62c413b Mon Sep 17 00:00:00 2001 From: Per Larsen Date: Wed, 20 Aug 2025 01:10:09 +0000 Subject: KVM: arm64: Mask response to FFA_FEATURE call The minimum size and alignment boundary for FFA_RXTX_MAP is returned in bit[1:0]. Mask off any other bits in w2 when reading the minimum buffer size in hyp_ffa_post_init. Acked-by: Will Deacon Signed-off-by: Per Larsen Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/ffa.c | 2 +- include/linux/arm_ffa.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c index 96109aa99c48..a8ec1a94f3f8 100644 --- a/arch/arm64/kvm/hyp/nvhe/ffa.c +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -739,7 +739,7 @@ static int hyp_ffa_post_init(void) if (res.a0 != FFA_SUCCESS) return -EOPNOTSUPP; - switch (res.a2) { + switch (res.a2 & FFA_FEAT_RXTX_MIN_SZ_MASK) { case FFA_FEAT_RXTX_MIN_SZ_4K: min_rxtx_sz = SZ_4K; break; diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index e1634897e159..cd7ee4df9045 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -128,6 +128,7 @@ #define FFA_FEAT_RXTX_MIN_SZ_4K 0 #define FFA_FEAT_RXTX_MIN_SZ_64K 1 #define FFA_FEAT_RXTX_MIN_SZ_16K 2 +#define FFA_FEAT_RXTX_MIN_SZ_MASK GENMASK(1, 0) /* FFA Bus/Device/Driver related */ struct ffa_device { -- cgit v1.2.3 From 2d0ddbb65cef99aab241378b0f4ff2d6ea8c3a5a Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 31 Aug 2025 09:04:06 -0700 Subject: Drivers: hv: Simplify data structures for VMBus channel close message struct vmbus_close_msg is used for sending the VMBus channel close message. It contains a struct vmbus_channel_msginfo, which has a flex array member at the end. The latter's presence in the middle of struct vmbus_close_msg causes warnings when built with -Wflex-array-member-not-at-end. But the struct vmbus_channel_msginfo is unused because the Hyper-V host does not send a response to the channel close message. So remove the struct vmbus_channel_msginfo. Then, since the only remaining field is struct vmbus_channel_close_channel, also remove the containing struct vmbus_close_msg and directly use struct vmbus_channel_close_channel. Besides eliminating unnecessary complexity, these changes resolve the -Wflex-array-member-not-at-end warnings. Signed-off-by: Michael Kelley Reviewed-by: Tianyu Lan Signed-off-by: Wei Liu --- drivers/hv/channel.c | 2 +- include/linux/hyperv.h | 7 +------ 2 files changed, 2 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 7c7c66e0dc3f..162d6aeece7b 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -925,7 +925,7 @@ static int vmbus_close_internal(struct vmbus_channel *channel) /* Send a closing message */ - msg = &channel->close_msg.msg; + msg = &channel->close_msg; msg->header.msgtype = CHANNELMSG_CLOSECHANNEL; msg->child_relid = channel->offermsg.child_relid; diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index a59c5c3e95fb..59826c89171c 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -707,11 +707,6 @@ struct vmbus_channel_msginfo { unsigned char msg[]; }; -struct vmbus_close_msg { - struct vmbus_channel_msginfo info; - struct vmbus_channel_close_channel msg; -}; - enum vmbus_device_type { HV_IDE = 0, HV_SCSI, @@ -800,7 +795,7 @@ struct vmbus_channel { struct hv_ring_buffer_info outbound; /* send to parent */ struct hv_ring_buffer_info inbound; /* receive from parent */ - struct vmbus_close_msg close_msg; + struct vmbus_channel_close_channel close_msg; /* Statistics */ u64 interrupts; /* Host to Guest interrupts */ -- cgit v1.2.3 From 1dfdf4527fd391f653c53b634af9122613c58904 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 30 Aug 2025 18:48:32 +0200 Subject: iio: adc: exynos_adc: Drop platform data support There are no Samsung Exynos SoC ADC driver users which bind via platform ID, thus platform data is never set and can be dropped. Reviewed-by: Andy Shevchenko Signed-off-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20250830-s3c-cleanup-adc-v2-3-4f8299343d32@linaro.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/exynos_adc.c | 11 +---------- include/linux/platform_data/touchscreen-s3c2410.h | 22 ---------------------- 2 files changed, 1 insertion(+), 32 deletions(-) delete mode 100644 include/linux/platform_data/touchscreen-s3c2410.h (limited to 'include/linux') diff --git a/drivers/iio/adc/exynos_adc.c b/drivers/iio/adc/exynos_adc.c index dd4b9921a8b5..1484adff00df 100644 --- a/drivers/iio/adc/exynos_adc.c +++ b/drivers/iio/adc/exynos_adc.c @@ -29,8 +29,6 @@ #include #include -#include - /* S3C/EXYNOS4412/5250 ADC_V1 registers definitions */ #define ADC_V1_CON(x) ((x) + 0x00) #define ADC_V1_DLY(x) ((x) + 0x08) @@ -106,7 +104,6 @@ struct exynos_adc { struct clk *clk; struct clk *sclk; unsigned int irq; - unsigned int delay; struct regulator *vdd; struct completion completion; @@ -213,7 +210,7 @@ static void exynos_adc_v1_init_hw(struct exynos_adc *info) writel(con1, ADC_V1_CON(info->regs)); /* set touchscreen delay */ - writel(info->delay, ADC_V1_DLY(info->regs)); + writel(10000, ADC_V1_DLY(info->regs)); } static void exynos_adc_v1_exit_hw(struct exynos_adc *info) @@ -556,7 +553,6 @@ static int exynos_adc_probe(struct platform_device *pdev) { struct exynos_adc *info = NULL; struct device_node *np = pdev->dev.of_node; - struct s3c2410_ts_mach_info *pdata = dev_get_platdata(&pdev->dev); struct iio_dev *indio_dev = NULL; int ret; int irq; @@ -655,11 +651,6 @@ static int exynos_adc_probe(struct platform_device *pdev) if (info->data->init_hw) info->data->init_hw(info); - if (pdata) - info->delay = pdata->delay; - else - info->delay = 10000; - ret = of_platform_populate(np, exynos_adc_match, NULL, &indio_dev->dev); if (ret < 0) { dev_err(&pdev->dev, "failed adding child nodes\n"); diff --git a/include/linux/platform_data/touchscreen-s3c2410.h b/include/linux/platform_data/touchscreen-s3c2410.h deleted file mode 100644 index bf8d3b9d7c6a..000000000000 --- a/include/linux/platform_data/touchscreen-s3c2410.h +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2005 Arnaud Patard -*/ - -#ifndef __TOUCHSCREEN_S3C2410_H -#define __TOUCHSCREEN_S3C2410_H - -struct s3c2410_ts_mach_info { - int delay; - int presc; - int oversampling_shift; - void (*cfg_gpio)(struct platform_device *dev); -}; - -extern void s3c24xx_ts_set_platdata(struct s3c2410_ts_mach_info *); -extern void s3c64xx_ts_set_platdata(struct s3c2410_ts_mach_info *); - -/* defined by architecture to configure gpio */ -extern void s3c24xx_ts_cfg_gpio(struct platform_device *dev); - -#endif /*__TOUCHSCREEN_S3C2410_H */ -- cgit v1.2.3 From cec1aec9c46305743a2d4a1bfb06f6b0374d5ed0 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 31 Aug 2025 12:48:22 +0200 Subject: iio: consumers: Add an iio_multiply_value() helper function The channel-scale handling in iio_convert_raw_to_processed() in essence does the following: processed = raw * caller-provided-scale * channel-scale Which can also be written as: multiplier = raw * caller-provided-scale iio-value = channel-scale processed = multiplier * iio-value Where iio-value is a set of IIO_VAL_* type + val + val2 integers, being able to handle multiplication of iio-values like this is something which is useful to have in general and, as previous bugfixes to iio_convert_raw_to_processed() have shown, also tricky to implement. Split the iio-value multiplication code from iio_convert_raw_to_processed() out into a new iio_multiply_value() helper. This serves multiple purposes: 1. Having this split out allows writing a KUnit test for this. 2. Having this split out allows re-use to get better precision when scaling values in iio_read_channel_processed_scale(). Reviewed-by: Andy Shevchenko Signed-off-by: Hans de Goede Link: https://patch.msgid.link/20250831104825.15097-4-hansg@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/inkern.c | 72 ++++++++++++++++++++++++++------------------ include/linux/iio/consumer.h | 18 +++++++++++ 2 files changed, 60 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/drivers/iio/inkern.c b/drivers/iio/inkern.c index 642beb4b3360..158d54de14a7 100644 --- a/drivers/iio/inkern.c +++ b/drivers/iio/inkern.c @@ -599,13 +599,50 @@ int iio_read_channel_average_raw(struct iio_channel *chan, int *val) } EXPORT_SYMBOL_GPL(iio_read_channel_average_raw); +int iio_multiply_value(int *result, s64 multiplier, + unsigned int type, int val, int val2) +{ + s64 denominator; + + switch (type) { + case IIO_VAL_INT: + *result = multiplier * val; + return IIO_VAL_INT; + case IIO_VAL_INT_PLUS_MICRO: + case IIO_VAL_INT_PLUS_NANO: + switch (type) { + case IIO_VAL_INT_PLUS_MICRO: + denominator = MICRO; + break; + case IIO_VAL_INT_PLUS_NANO: + denominator = NANO; + break; + } + *result = multiplier * abs(val); + *result += div_s64(multiplier * abs(val2), denominator); + if (val < 0 || val2 < 0) + *result *= -1; + return IIO_VAL_INT; + case IIO_VAL_FRACTIONAL: + *result = div_s64(multiplier * val, val2); + return IIO_VAL_INT; + case IIO_VAL_FRACTIONAL_LOG2: + *result = (multiplier * val) >> val2; + return IIO_VAL_INT; + default: + return -EINVAL; + } +} +EXPORT_SYMBOL_NS_GPL(iio_multiply_value, "IIO_UNIT_TEST"); + static int iio_convert_raw_to_processed_unlocked(struct iio_channel *chan, int raw, int *processed, unsigned int scale) { int scale_type, scale_val, scale_val2; int offset_type, offset_val, offset_val2; - s64 denominator, raw64 = raw; + s64 raw64 = raw; + int ret; offset_type = iio_channel_read(chan, &offset_val, &offset_val2, IIO_CHAN_INFO_OFFSET); @@ -644,35 +681,10 @@ static int iio_convert_raw_to_processed_unlocked(struct iio_channel *chan, return 0; } - switch (scale_type) { - case IIO_VAL_INT: - *processed = raw64 * scale_val * scale; - break; - case IIO_VAL_INT_PLUS_MICRO: - case IIO_VAL_INT_PLUS_NANO: - switch (scale_type) { - case IIO_VAL_INT_PLUS_MICRO: - denominator = MICRO; - break; - case IIO_VAL_INT_PLUS_NANO: - denominator = NANO; - break; - } - *processed = raw64 * scale * abs(scale_val); - *processed += div_s64(raw64 * scale * abs(scale_val2), denominator); - if (scale_val < 0 || scale_val2 < 0) - *processed *= -1; - break; - case IIO_VAL_FRACTIONAL: - *processed = div_s64(raw64 * (s64)scale_val * scale, - scale_val2); - break; - case IIO_VAL_FRACTIONAL_LOG2: - *processed = (raw64 * (s64)scale_val * scale) >> scale_val2; - break; - default: - return -EINVAL; - } + ret = iio_multiply_value(processed, raw64 * scale, + scale_type, scale_val, scale_val2); + if (ret < 0) + return ret; return 0; } diff --git a/include/linux/iio/consumer.h b/include/linux/iio/consumer.h index 6a4479616479..a38b277c2c02 100644 --- a/include/linux/iio/consumer.h +++ b/include/linux/iio/consumer.h @@ -381,6 +381,24 @@ int iio_read_channel_offset(struct iio_channel *chan, int *val, int iio_read_channel_scale(struct iio_channel *chan, int *val, int *val2); +/** + * iio_multiply_value() - Multiply an IIO value + * @result: Destination pointer for the multiplication result + * @multiplier: Multiplier. + * @type: One of the IIO_VAL_* constants. This decides how the @val and + * @val2 parameters are interpreted. + * @val: Value being multiplied. + * @val2: Value being multiplied. @val2 use depends on type. + * + * Multiply an IIO value with a s64 multiplier storing the result as + * IIO_VAL_INT. This is typically used for scaling. + * + * Returns: + * IIO_VAL_INT on success or a negative error-number on failure. + */ +int iio_multiply_value(int *result, s64 multiplier, + unsigned int type, int val, int val2); + /** * iio_convert_raw_to_processed() - Converts a raw value to a processed value * @chan: The channel being queried -- cgit v1.2.3 From 948cb194bcb4c01fb4cd029936f0c02b10780394 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 30 Jul 2025 15:59:24 +0200 Subject: mtd: map: add back asm/barrier.h inclusion The mb() macro is used in this header: In file included from include/linux/mtd/qinfo.h:5, from include/linux/mtd/pfow.h:8, from drivers/mtd/lpddr/lpddr_cmds.c:14: include/linux/mtd/map.h: In function 'inline_map_write': include/linux/mtd/map.h:428:9: error: implicit declaration of function 'mb' [-Wimplicit-function-declaration] Fixes: 56eb7c13b97c ("mtd: map: Don't use "proxy" headers") Signed-off-by: Arnd Bergmann Acked-by: Andy Shevchenko Signed-off-by: Miquel Raynal --- include/linux/mtd/map.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h index 288ef765a44e..75b0b2abc880 100644 --- a/include/linux/mtd/map.h +++ b/include/linux/mtd/map.h @@ -14,6 +14,7 @@ #include #include #include +#include struct device_node; struct module; -- cgit v1.2.3 From 121a0f839dbb397af5fabb701cea3e9983223e50 Mon Sep 17 00:00:00 2001 From: Israel Cepeda Date: Thu, 11 Sep 2025 20:13:41 +0200 Subject: usb: misc: Add Intel USBIO bridge driver Add a driver for the Intel USBIO USB IO-expander used by the MIPI cameras on various new (Meteor Lake and later) Intel laptops. This is an USB bridge driver which adds auxbus child devices for the GPIO, I2C and SPI functions of the USBIO chip and which exports IO-functions for the drivers for the auxbus child devices to communicate with the USBIO device's firmware. Co-developed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Israel Cepeda Link: https://lore.kernel.org/r/20250911181343.77398-2-hansg@kernel.org Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 8 + drivers/usb/misc/Kconfig | 14 + drivers/usb/misc/Makefile | 1 + drivers/usb/misc/usbio.c | 749 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/usb/usbio.h | 177 +++++++++++ 5 files changed, 949 insertions(+) create mode 100644 drivers/usb/misc/usbio.c create mode 100644 include/linux/usb/usbio.h (limited to 'include/linux') diff --git a/MAINTAINERS b/MAINTAINERS index fed6cd812d79..434ac1593332 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12688,6 +12688,14 @@ S: Maintained F: Documentation/admin-guide/pm/intel_uncore_frequency_scaling.rst F: drivers/platform/x86/intel/uncore-frequency/ +INTEL USBIO USB I/O EXPANDER DRIVERS +M: Israel Cepeda +M: Hans de Goede +R: Sakari Ailus +S: Maintained +F: drivers/usb/misc/usbio.c +F: include/linux/usb/usbio.h + INTEL VENDOR SPECIFIC EXTENDED CAPABILITIES DRIVER M: David E. Box S: Supported diff --git a/drivers/usb/misc/Kconfig b/drivers/usb/misc/Kconfig index 26eaae32f738..09ac6f1c985f 100644 --- a/drivers/usb/misc/Kconfig +++ b/drivers/usb/misc/Kconfig @@ -179,6 +179,20 @@ config USB_LJCA This driver can also be built as a module. If so, the module will be called usb-ljca. +config USB_USBIO + tristate "Intel USBIO Bridge support" + depends on USB && ACPI + select AUXILIARY_BUS + help + This adds support for Intel USBIO drivers. + This enables the USBIO bridge driver module in charge to talk + to the USB device. Additional drivers such as GPIO_USBIO and + I2C_USBIO must be enabled in order to use the device's full + functionality. + + This driver can also be built as a module. If so, the module + will be called usbio. + source "drivers/usb/misc/sisusbvga/Kconfig" config USB_LD diff --git a/drivers/usb/misc/Makefile b/drivers/usb/misc/Makefile index 0cd5bc8f52fe..494ab0377f35 100644 --- a/drivers/usb/misc/Makefile +++ b/drivers/usb/misc/Makefile @@ -12,6 +12,7 @@ obj-$(CONFIG_USB_EMI62) += emi62.o obj-$(CONFIG_USB_EZUSB_FX2) += ezusb.o obj-$(CONFIG_APPLE_MFI_FASTCHARGE) += apple-mfi-fastcharge.o obj-$(CONFIG_USB_LJCA) += usb-ljca.o +obj-$(CONFIG_USB_USBIO) += usbio.o obj-$(CONFIG_USB_IDMOUSE) += idmouse.o obj-$(CONFIG_USB_IOWARRIOR) += iowarrior.o obj-$(CONFIG_USB_ISIGHTFW) += isight_firmware.o diff --git a/drivers/usb/misc/usbio.c b/drivers/usb/misc/usbio.c new file mode 100644 index 000000000000..37644dddf157 --- /dev/null +++ b/drivers/usb/misc/usbio.c @@ -0,0 +1,749 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Intel USBIO Bridge driver + * + * Copyright (c) 2025 Intel Corporation. + * Copyright (c) 2025 Red Hat, Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/************************************* + * USBIO Bridge Protocol Definitions * + *************************************/ + +/* USBIO Control Commands */ +#define USBIO_CTRLCMD_PROTVER 0 +#define USBIO_CTRLCMD_FWVER 1 +#define USBIO_CTRLCMD_HS 2 +#define USBIO_CTRLCMD_ENUMGPIO 16 +#define USBIO_CTRLCMD_ENUMI2C 17 + +/* USBIO Packet Flags */ +#define USBIO_PKTFLAG_ACK BIT(0) +#define USBIO_PKTFLAG_RSP BIT(1) +#define USBIO_PKTFLAG_CMP BIT(2) +#define USBIO_PKTFLAG_ERR BIT(3) + +#define USBIO_PKTFLAGS_REQRESP (USBIO_PKTFLAG_CMP | USBIO_PKTFLAG_ACK) + +#define USBIO_CTRLXFER_TIMEOUT 0 +#define USBIO_BULKXFER_TIMEOUT 100 + +struct usbio_protver { + u8 ver; +} __packed; + +struct usbio_fwver { + u8 major; + u8 minor; + __le16 patch; + __le16 build; +} __packed; + +/*********************************** + * USBIO Bridge Device Definitions * + ***********************************/ + +/** + * struct usbio_device - the usb device exposing IOs + * + * @dev: the device in the usb interface + * @udev: the detected usb device + * @intf: the usb interface + * @quirks: quirks + * @ctrl_mutex: protects ctrl_buf + * @ctrl_pipe: the control transfer pipe + * @ctrlbuf_len: the size of the control transfer pipe + * @ctrlbuf: the buffer used for control transfers + * @bulk_mutex: protects tx_buf, rx_buf and split bulk-transfers getting interrupted + * @tx_pipe: the bulk out pipe + * @txbuf_len: the size of the bulk out pipe + * @txbuf: the buffer used for bulk out transfers + * @rx_pipe: the bulk in pipe + * @rxbuf_len: the size of the bulk in pipe + * @rxdat_len: the data length at rx buffer + * @rxbuf: the buffer used for bulk in transfers + * @urb: the urb to read bulk pipe + * @done: completion object as request is done + * @cli_list: device's client list + * @nr_gpio_banks: Number of GPIO banks + * @gpios: GPIO bank descriptors + * @nr_gpio_banks: Number of I2C busses + * @gpios: I2C bank descriptors + */ +struct usbio_device { + struct device *dev; + struct usb_device *udev; + struct usb_interface *intf; + unsigned long quirks; + + struct mutex ctrl_mutex; + unsigned int ctrl_pipe; + u16 ctrlbuf_len; + void *ctrlbuf; + + struct mutex bulk_mutex; + unsigned int tx_pipe; + u16 txbuf_len; + void *txbuf; + + unsigned int rx_pipe; + u16 rxbuf_len; + u16 rxdat_len; + void *rxbuf; + struct urb *urb; + + struct completion done; + + struct list_head cli_list; + + unsigned int nr_gpio_banks; + struct usbio_gpio_bank_desc gpios[USBIO_MAX_GPIOBANKS]; + + unsigned int nr_i2c_buses; + struct usbio_i2c_bus_desc i2cs[USBIO_MAX_I2CBUSES]; +}; + +/** + * struct usbio_client - represents a usbio client + * + * @auxdev: auxiliary device object + * @mutex: protects @bridge + * @bridge: usbio bridge who service the client + * @link: usbio bridge clients list member + */ +struct usbio_client { + struct auxiliary_device auxdev; + struct mutex mutex; + struct usbio_device *bridge; + struct list_head link; +}; + +#define adev_to_client(adev) container_of_const(adev, struct usbio_client, auxdev) + +static int usbio_ctrl_msg(struct usbio_device *usbio, u8 type, u8 cmd, + const void *obuf, u16 obuf_len, void *ibuf, u16 ibuf_len) +{ + u8 request = USB_TYPE_VENDOR | USB_RECIP_DEVICE; + struct usbio_ctrl_packet *cpkt; + unsigned int pipe; + u16 cpkt_len; + int ret; + + lockdep_assert_held(&usbio->ctrl_mutex); + + if ((obuf_len > (usbio->ctrlbuf_len - sizeof(*cpkt))) || + (ibuf_len > (usbio->ctrlbuf_len - sizeof(*cpkt)))) + return -EMSGSIZE; + + /* Prepare Control Packet Header */ + cpkt = usbio->ctrlbuf; + cpkt->header.type = type; + cpkt->header.cmd = cmd; + if (type == USBIO_PKTTYPE_CTRL || ibuf_len) + cpkt->header.flags = USBIO_PKTFLAGS_REQRESP; + else + cpkt->header.flags = USBIO_PKTFLAG_CMP; + cpkt->len = obuf_len; + + /* Copy the data */ + memcpy(cpkt->data, obuf, obuf_len); + + pipe = usb_sndctrlpipe(usbio->udev, usbio->ctrl_pipe); + cpkt_len = sizeof(*cpkt) + obuf_len; + ret = usb_control_msg(usbio->udev, pipe, 0, request | USB_DIR_OUT, 0, 0, + cpkt, cpkt_len, USBIO_CTRLXFER_TIMEOUT); + dev_dbg(usbio->dev, "control out %d hdr %*phN data %*phN\n", ret, + (int)sizeof(*cpkt), cpkt, (int)cpkt->len, cpkt->data); + + if (ret != cpkt_len) { + dev_err(usbio->dev, "USB control out failed: %d\n", ret); + return (ret < 0) ? ret : -EPROTO; + } + + if (!(cpkt->header.flags & USBIO_PKTFLAG_ACK)) + return 0; + + pipe = usb_rcvctrlpipe(usbio->udev, usbio->ctrl_pipe); + cpkt_len = sizeof(*cpkt) + ibuf_len; + ret = usb_control_msg(usbio->udev, pipe, 0, request | USB_DIR_IN, 0, 0, + cpkt, cpkt_len, USBIO_CTRLXFER_TIMEOUT); + dev_dbg(usbio->dev, "control in %d hdr %*phN data %*phN\n", ret, + (int)sizeof(*cpkt), cpkt, (int)cpkt->len, cpkt->data); + + if (ret < sizeof(*cpkt)) { + dev_err(usbio->dev, "USB control in failed: %d\n", ret); + return (ret < 0) ? ret : -EPROTO; + } + + if (cpkt->header.type != type || cpkt->header.cmd != cmd || + !(cpkt->header.flags & USBIO_PKTFLAG_RSP)) { + dev_err(usbio->dev, "Unexpected reply type: %u, cmd: %u, flags: %u\n", + cpkt->header.type, cpkt->header.cmd, cpkt->header.flags); + return -EPROTO; + } + + if (cpkt->header.flags & USBIO_PKTFLAG_ERR) + return -EREMOTEIO; + + if (ibuf_len < cpkt->len) + return -ENOSPC; + + memcpy(ibuf, cpkt->data, cpkt->len); + + return cpkt->len; +} + +int usbio_control_msg(struct auxiliary_device *adev, u8 type, u8 cmd, + const void *obuf, u16 obuf_len, void *ibuf, u16 ibuf_len) +{ + struct usbio_client *client = adev_to_client(adev); + struct usbio_device *usbio; + int ret; + + guard(mutex)(&client->mutex); + + usbio = client->bridge; + if (!usbio) + return -ENODEV; /* Disconnected */ + + ret = usb_autopm_get_interface(usbio->intf); + if (ret) + return ret; + + mutex_lock(&usbio->ctrl_mutex); + + ret = usbio_ctrl_msg(client->bridge, type, cmd, obuf, obuf_len, ibuf, ibuf_len); + + mutex_unlock(&usbio->ctrl_mutex); + usb_autopm_put_interface(usbio->intf); + + return ret; +} +EXPORT_SYMBOL_NS_GPL(usbio_control_msg, "USBIO"); + +static void usbio_bulk_recv(struct urb *urb) +{ + struct usbio_bulk_packet *bpkt = urb->transfer_buffer; + struct usbio_device *usbio = urb->context; + + if (!urb->status) { + if (bpkt->header.flags & USBIO_PKTFLAG_RSP) { + usbio->rxdat_len = urb->actual_length; + complete(&usbio->done); + } + } else if (urb->status != -ENOENT) { + dev_err(usbio->dev, "Bulk in error %d\n", urb->status); + } + + usb_submit_urb(usbio->urb, GFP_ATOMIC); +} + +int usbio_bulk_msg(struct auxiliary_device *adev, u8 type, u8 cmd, bool last, + const void *obuf, u16 obuf_len, void *ibuf, u16 ibuf_len) +{ + struct usbio_client *client = adev_to_client(adev); + struct usbio_device *usbio = client->bridge; + struct usbio_bulk_packet *bpkt; + int ret, act = 0; + u16 bpkt_len; + + lockdep_assert_held(&client->mutex); + lockdep_assert_held(&usbio->bulk_mutex); + + if ((obuf_len > (usbio->txbuf_len - sizeof(*bpkt))) || + (ibuf_len > (usbio->txbuf_len - sizeof(*bpkt)))) + return -EMSGSIZE; + + if (ibuf_len) + reinit_completion(&usbio->done); + + /* If no data to send, skip to read */ + if (!obuf_len) + goto read; + + /* Prepare Bulk Packet Header */ + bpkt = usbio->txbuf; + bpkt->header.type = type; + bpkt->header.cmd = cmd; + if (!last) + bpkt->header.flags = 0; + else if (ibuf_len) + bpkt->header.flags = USBIO_PKTFLAGS_REQRESP; + else + bpkt->header.flags = USBIO_PKTFLAG_CMP; + bpkt->len = cpu_to_le16(obuf_len); + + /* Copy the data */ + memcpy(bpkt->data, obuf, obuf_len); + + bpkt_len = sizeof(*bpkt) + obuf_len; + ret = usb_bulk_msg(usbio->udev, usbio->tx_pipe, bpkt, bpkt_len, &act, + USBIO_BULKXFER_TIMEOUT); + dev_dbg(usbio->dev, "bulk out %d hdr %*phN data %*phN\n", act, + (int)sizeof(*bpkt), bpkt, obuf_len, bpkt->data); + + if (ret || act != bpkt_len) { + dev_err(usbio->dev, "Bulk out failed: %d\n", ret); + return ret ?: -EPROTO; + } + + if (!(bpkt->header.flags & USBIO_PKTFLAG_ACK)) + return obuf_len; + +read: + ret = wait_for_completion_timeout(&usbio->done, USBIO_BULKXFER_TIMEOUT); + if (ret <= 0) { + dev_err(usbio->dev, "Bulk in wait failed: %d\n", ret); + return ret ?: -ETIMEDOUT; + } + + act = usbio->rxdat_len; + bpkt = usbio->rxbuf; + bpkt_len = le16_to_cpu(bpkt->len); + dev_dbg(usbio->dev, "bulk in %d hdr %*phN data %*phN\n", act, + (int)sizeof(*bpkt), bpkt, bpkt_len, bpkt->data); + + /* + * Unsupported bulk commands get only an usbio_packet_header with + * the error flag set as reply. Return -EPIPE for this case. + */ + if (act == sizeof(struct usbio_packet_header) && + (bpkt->header.flags & USBIO_PKTFLAG_ERR)) + return -EPIPE; + + if (act < sizeof(*bpkt)) { + dev_err(usbio->dev, "Bulk in short read: %d\n", act); + return -EPROTO; + } + + if (bpkt->header.type != type || bpkt->header.cmd != cmd || + !(bpkt->header.flags & USBIO_PKTFLAG_RSP)) { + dev_err(usbio->dev, + "Unexpected bulk in type 0x%02x cmd 0x%02x flags 0x%02x\n", + bpkt->header.type, bpkt->header.cmd, bpkt->header.flags); + return -EPROTO; + } + + if (bpkt->header.flags & USBIO_PKTFLAG_ERR) + return -EREMOTEIO; + + if (ibuf_len < bpkt_len) + return -ENOSPC; + + memcpy(ibuf, bpkt->data, bpkt_len); + + return bpkt_len; +} +EXPORT_SYMBOL_NS_GPL(usbio_bulk_msg, "USBIO"); + +int usbio_acquire(struct auxiliary_device *adev) +{ + struct usbio_client *client = adev_to_client(adev); + struct usbio_device *usbio; + int ret; + + mutex_lock(&client->mutex); + + usbio = client->bridge; + if (!usbio) { + ret = -ENODEV; /* Disconnected */ + goto err_unlock; + } + + ret = usb_autopm_get_interface(usbio->intf); + if (ret) + goto err_unlock; + + mutex_lock(&usbio->bulk_mutex); + + /* Leave client locked until release to avoid abba deadlock issues */ + return 0; + +err_unlock: + mutex_unlock(&client->mutex); + + return ret; +} +EXPORT_SYMBOL_NS_GPL(usbio_acquire, "USBIO"); + +void usbio_release(struct auxiliary_device *adev) +{ + struct usbio_client *client = adev_to_client(adev); + struct usbio_device *usbio = client->bridge; + + lockdep_assert_held(&client->mutex); + + mutex_unlock(&usbio->bulk_mutex); + usb_autopm_put_interface(usbio->intf); + mutex_unlock(&client->mutex); +} +EXPORT_SYMBOL_NS_GPL(usbio_release, "USBIO"); + +void usbio_get_txrxbuf_len(struct auxiliary_device *adev, u16 *txbuf_len, u16 *rxbuf_len) +{ + struct usbio_client *client = adev_to_client(adev); + struct usbio_device *usbio; + + guard(mutex)(&client->mutex); + + usbio = client->bridge; + if (!usbio) + return; /* Disconnected */ + + *txbuf_len = usbio->txbuf_len; + *rxbuf_len = usbio->rxbuf_len; +} +EXPORT_SYMBOL_NS_GPL(usbio_get_txrxbuf_len, "USBIO"); + +unsigned long usbio_get_quirks(struct auxiliary_device *adev) +{ + struct usbio_client *client = adev_to_client(adev); + struct usbio_device *usbio; + + guard(mutex)(&client->mutex); + + usbio = client->bridge; + if (!usbio) + return 0; /* Disconnected */ + + return usbio->quirks; +} +EXPORT_SYMBOL_NS_GPL(usbio_get_quirks, "USBIO"); + +static void usbio_auxdev_release(struct device *dev) +{ + struct auxiliary_device *adev = to_auxiliary_dev(dev); + struct usbio_client *client = adev_to_client(adev); + + mutex_destroy(&client->mutex); + kfree(client); +} + +static int usbio_add_client(struct usbio_device *usbio, char *name, u8 id, void *data) +{ + struct usbio_client *client; + struct auxiliary_device *adev; + int ret; + + client = kzalloc(sizeof(*client), GFP_KERNEL); + if (!client) + return -ENOMEM; + + mutex_init(&client->mutex); + client->bridge = usbio; + adev = &client->auxdev; + adev->name = name; + adev->id = id; + + adev->dev.parent = usbio->dev; + adev->dev.platform_data = data; + adev->dev.release = usbio_auxdev_release; + + ret = auxiliary_device_init(adev); + if (ret) { + usbio_auxdev_release(&adev->dev); + return ret; + } + + ret = auxiliary_device_add(adev); + if (ret) { + auxiliary_device_uninit(adev); + return ret; + } + + list_add_tail(&client->link, &usbio->cli_list); + + return 0; +} + +static int usbio_enum_gpios(struct usbio_device *usbio) +{ + struct usbio_gpio_bank_desc *gpio = usbio->gpios; + + dev_dbg(usbio->dev, "GPIO Banks: %d\n", usbio->nr_gpio_banks); + + for (unsigned int i = 0; i < usbio->nr_gpio_banks; i++) + dev_dbg(usbio->dev, "\tBank%d[%d] map: %#08x\n", + gpio[i].id, gpio[i].pins, gpio[i].bmap); + + usbio_add_client(usbio, USBIO_GPIO_CLIENT, 0, gpio); + + return 0; +} + +static int usbio_enum_i2cs(struct usbio_device *usbio) +{ + struct usbio_i2c_bus_desc *i2c = usbio->i2cs; + + dev_dbg(usbio->dev, "I2C Busses: %d\n", usbio->nr_i2c_buses); + + for (unsigned int i = 0; i < usbio->nr_i2c_buses; i++) { + dev_dbg(usbio->dev, "\tBus%d caps: %#02x\n", i2c[i].id, i2c[i].caps); + usbio_add_client(usbio, USBIO_I2C_CLIENT, i, &i2c[i]); + } + + return 0; +} + +static int usbio_suspend(struct usb_interface *intf, pm_message_t msg) +{ + struct usbio_device *usbio = usb_get_intfdata(intf); + + usb_kill_urb(usbio->urb); + + return 0; +} + +static int usbio_resume(struct usb_interface *intf) +{ + struct usbio_device *usbio = usb_get_intfdata(intf); + + return usb_submit_urb(usbio->urb, GFP_KERNEL); +} + +static void usbio_disconnect(struct usb_interface *intf) +{ + struct usbio_device *usbio = usb_get_intfdata(intf); + struct usbio_client *client; + + /* Wakeup any clients waiting for a reply */ + usbio->rxdat_len = 0; + complete(&usbio->done); + + /* Let clients know the bridge is gone */ + list_for_each_entry(client, &usbio->cli_list, link) { + mutex_lock(&client->mutex); + client->bridge = NULL; + mutex_unlock(&client->mutex); + } + + /* From here on clients will no longer touch struct usbio_device */ + usb_kill_urb(usbio->urb); + usb_free_urb(usbio->urb); + + list_for_each_entry_reverse(client, &usbio->cli_list, link) { + auxiliary_device_delete(&client->auxdev); + auxiliary_device_uninit(&client->auxdev); + } +} + +static int usbio_probe(struct usb_interface *intf, const struct usb_device_id *id) +{ + struct usb_device *udev = interface_to_usbdev(intf); + struct usb_endpoint_descriptor *ep_in, *ep_out; + struct device *dev = &intf->dev; + struct usbio_protver protver; + struct usbio_device *usbio; + struct usbio_fwver fwver; + int ret; + + usbio = devm_kzalloc(dev, sizeof(*usbio), GFP_KERNEL); + if (!usbio) + return -ENOMEM; + + ret = devm_mutex_init(dev, &usbio->ctrl_mutex); + if (ret) + return ret; + + ret = devm_mutex_init(dev, &usbio->bulk_mutex); + if (ret) + return ret; + + usbio->dev = dev; + usbio->udev = udev; + usbio->intf = intf; + usbio->quirks = id ? id->driver_info : 0; + init_completion(&usbio->done); + INIT_LIST_HEAD(&usbio->cli_list); + usb_set_intfdata(intf, usbio); + + usbio->ctrl_pipe = usb_endpoint_num(&udev->ep0.desc); + usbio->ctrlbuf_len = usb_maxpacket(udev, usbio->ctrl_pipe); + usbio->ctrlbuf = devm_kzalloc(dev, usbio->ctrlbuf_len, GFP_KERNEL); + if (!usbio->ctrlbuf) + return -ENOMEM; + + /* Find the first bulk-in and bulk-out endpoints */ + ret = usb_find_common_endpoints(intf->cur_altsetting, &ep_in, &ep_out, + NULL, NULL); + if (ret) { + dev_err(dev, "Cannot find bulk endpoints: %d\n", ret); + return ret; + } + + usbio->tx_pipe = usb_sndbulkpipe(udev, usb_endpoint_num(ep_out)); + + if (usbio->quirks & USBIO_QUIRK_BULK_MAXP_63) + usbio->txbuf_len = 63; + else + usbio->txbuf_len = usb_endpoint_maxp(ep_out); + + usbio->txbuf = devm_kzalloc(dev, usbio->txbuf_len, GFP_KERNEL); + if (!usbio->txbuf) + return -ENOMEM; + + usbio->rx_pipe = usb_rcvbulkpipe(udev, usb_endpoint_num(ep_in)); + + if (usbio->quirks & USBIO_QUIRK_BULK_MAXP_63) + usbio->rxbuf_len = 63; + else + usbio->rxbuf_len = usb_endpoint_maxp(ep_in); + + usbio->rxbuf = devm_kzalloc(dev, usbio->rxbuf_len, GFP_KERNEL); + if (!usbio->rxbuf) + return -ENOMEM; + + usbio->urb = usb_alloc_urb(0, GFP_KERNEL); + if (!usbio->urb) + return -ENOMEM; + + usb_fill_bulk_urb(usbio->urb, udev, usbio->rx_pipe, usbio->rxbuf, + usbio->rxbuf_len, usbio_bulk_recv, usbio); + ret = usb_submit_urb(usbio->urb, GFP_KERNEL); + if (ret) + return dev_err_probe(dev, ret, "Submitting usb urb\n"); + + mutex_lock(&usbio->ctrl_mutex); + + ret = usbio_ctrl_msg(usbio, USBIO_PKTTYPE_CTRL, USBIO_CTRLCMD_HS, NULL, 0, NULL, 0); + if (ret < 0) + goto err_unlock; + + ret = usbio_ctrl_msg(usbio, USBIO_PKTTYPE_CTRL, USBIO_CTRLCMD_PROTVER, NULL, 0, + &protver, sizeof(protver)); + if (ret < 0) + goto err_unlock; + + ret = usbio_ctrl_msg(usbio, USBIO_PKTTYPE_CTRL, USBIO_CTRLCMD_FWVER, NULL, 0, + &fwver, sizeof(fwver)); + if (ret < 0) + goto err_unlock; + + ret = usbio_ctrl_msg(usbio, USBIO_PKTTYPE_CTRL, USBIO_CTRLCMD_ENUMGPIO, NULL, 0, + usbio->gpios, sizeof(usbio->gpios)); + if (ret < 0 || ret % sizeof(struct usbio_gpio_bank_desc)) { + ret = (ret < 0) ? ret : -EPROTO; + goto err_unlock; + } + usbio->nr_gpio_banks = ret / sizeof(struct usbio_gpio_bank_desc); + + ret = usbio_ctrl_msg(usbio, USBIO_PKTTYPE_CTRL, USBIO_CTRLCMD_ENUMI2C, NULL, 0, + usbio->i2cs, sizeof(usbio->i2cs)); + if (ret < 0 || ret % sizeof(struct usbio_i2c_bus_desc)) { + ret = (ret < 0) ? ret : -EPROTO; + goto err_unlock; + } + usbio->nr_i2c_buses = ret / sizeof(struct usbio_i2c_bus_desc); + + mutex_unlock(&usbio->ctrl_mutex); + + dev_dbg(dev, "ProtVer(BCD): %02x FwVer: %d.%d.%d.%d\n", + protver.ver, fwver.major, fwver.minor, + le16_to_cpu(fwver.patch), le16_to_cpu(fwver.build)); + + usbio_enum_gpios(usbio); + usbio_enum_i2cs(usbio); + + return 0; + +err_unlock: + mutex_unlock(&usbio->ctrl_mutex); + usb_kill_urb(usbio->urb); + usb_free_urb(usbio->urb); + + return ret; +} + +static const struct usb_device_id usbio_table[] = { + { USB_DEVICE(0x2ac1, 0x20c1), /* Lattice NX40 */ + .driver_info = USBIO_QUIRK_I2C_MAX_RW_LEN_52 }, + { USB_DEVICE(0x2ac1, 0x20c9), /* Lattice NX33 */ + .driver_info = USBIO_QUIRK_I2C_NO_INIT_ACK | USBIO_QUIRK_I2C_MAX_RW_LEN_52 | + USBIO_QUIRK_I2C_ALLOW_400KHZ }, + { USB_DEVICE(0x2ac1, 0x20cb) }, /* Lattice NX33U */ + { USB_DEVICE(0x06cb, 0x0701), /* Synaptics Sabre */ + .driver_info = USBIO_QUIRK_BULK_MAXP_63 | USBIO_QUIRK_I2C_USE_CHUNK_LEN }, + { } +}; +MODULE_DEVICE_TABLE(usb, usbio_table); + +static struct usb_driver usbio_driver = { + .name = "usbio-bridge", + .probe = usbio_probe, + .disconnect = usbio_disconnect, + .suspend = usbio_suspend, + .resume = usbio_resume, + .id_table = usbio_table, + .supports_autosuspend = 1, +}; +module_usb_driver(usbio_driver); + +struct usbio_match_ids_walk_data { + struct acpi_device *adev; + const struct acpi_device_id *hids; + unsigned int id; +}; + +static int usbio_match_device_ids(struct acpi_device *adev, void *data) +{ + struct usbio_match_ids_walk_data *wd = data; + unsigned int id = 0; + char *uid; + + if (acpi_match_device_ids(adev, wd->hids)) + return 0; + + uid = acpi_device_uid(adev); + if (uid) { + for (int i = 0; i < strlen(uid); i++) { + if (!kstrtouint(&uid[i], 10, &id)) + break; + } + } + + if (!uid || wd->id == id) { + wd->adev = adev; + return 1; + } + + return 0; +} + +void usbio_acpi_bind(struct auxiliary_device *adev, const struct acpi_device_id *hids) +{ + struct device *dev = &adev->dev; + struct acpi_device *parent; + struct usbio_match_ids_walk_data wd = { + .adev = NULL, + .hids = hids, + .id = adev->id, + }; + + parent = ACPI_COMPANION(dev->parent); + if (!parent) + return; + + acpi_dev_for_each_child(parent, usbio_match_device_ids, &wd); + if (wd.adev) + ACPI_COMPANION_SET(dev, wd.adev); +} +EXPORT_SYMBOL_NS_GPL(usbio_acpi_bind, "USBIO"); + +MODULE_DESCRIPTION("Intel USBIO Bridge driver"); +MODULE_AUTHOR("Israel Cepeda "); +MODULE_AUTHOR("Hans de Goede "); +MODULE_LICENSE("GPL"); diff --git a/include/linux/usb/usbio.h b/include/linux/usb/usbio.h new file mode 100644 index 000000000000..6c4e7c246d58 --- /dev/null +++ b/include/linux/usb/usbio.h @@ -0,0 +1,177 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2025 Intel Corporation. + * + */ + +#ifndef _LINUX_USBIO_H_ +#define _LINUX_USBIO_H_ + +#include +#include +#include +#include + +/*********************** + * USBIO Clients Names * + ***********************/ +#define USBIO_GPIO_CLIENT "usbio-gpio" +#define USBIO_I2C_CLIENT "usbio-i2c" + +/**************** + * USBIO quirks * + ****************/ +#define USBIO_QUIRK_BULK_MAXP_63 BIT(0) /* Force bulk endpoint maxp to 63 */ +#define USBIO_QUIRK_I2C_NO_INIT_ACK BIT(8) /* Do not ask for ack on I2C init */ +#define USBIO_QUIRK_I2C_MAX_RW_LEN_52 BIT(9) /* Set i2c-adapter max r/w len to 52 */ +#define USBIO_QUIRK_I2C_USE_CHUNK_LEN BIT(10) /* Send chunk-len for split xfers */ +#define USBIO_QUIRK_I2C_ALLOW_400KHZ BIT(11) /* Override desc, allowing 400 KHz */ + +/************************** + * USBIO Type Definitions * + **************************/ + +/* USBIO Packet Type */ +#define USBIO_PKTTYPE_CTRL 1 +#define USBIO_PKTTYPE_DBG 2 +#define USBIO_PKTTYPE_GPIO 3 +#define USBIO_PKTTYPE_I2C 4 + +/* USBIO Packet Header */ +struct usbio_packet_header { + u8 type; + u8 cmd; + u8 flags; +} __packed; + +/* USBIO Control Transfer Packet */ +struct usbio_ctrl_packet { + struct usbio_packet_header header; + u8 len; + u8 data[] __counted_by(len); +} __packed; + +/* USBIO Bulk Transfer Packet */ +struct usbio_bulk_packet { + struct usbio_packet_header header; + __le16 len; + u8 data[] __counted_by(len); +} __packed; + +/* USBIO GPIO commands */ +enum usbio_gpio_cmd { + USBIO_GPIOCMD_DEINIT, + USBIO_GPIOCMD_INIT, + USBIO_GPIOCMD_READ, + USBIO_GPIOCMD_WRITE, + USBIO_GPIOCMD_END +}; + +/* USBIO GPIO config */ +enum usbio_gpio_pincfg { + USBIO_GPIO_PINCFG_DEFAULT, + USBIO_GPIO_PINCFG_PULLUP, + USBIO_GPIO_PINCFG_PULLDOWN, + USBIO_GPIO_PINCFG_PUSHPULL +}; + +#define USBIO_GPIO_PINCFG_SHIFT 2 +#define USBIO_GPIO_PINCFG_MASK (0x3 << USBIO_GPIO_PINCFG_SHIFT) +#define USBIO_GPIO_SET_PINCFG(pincfg) \ + (((pincfg) << USBIO_GPIO_PINCFG_SHIFT) & USBIO_GPIO_PINCFG_MASK) + +enum usbio_gpio_pinmode { + USBIO_GPIO_PINMOD_INVAL, + USBIO_GPIO_PINMOD_INPUT, + USBIO_GPIO_PINMOD_OUTPUT, + USBIO_GPIO_PINMOD_MAXVAL +}; + +#define USBIO_GPIO_PINMOD_MASK 0x3 +#define USBIO_GPIO_SET_PINMOD(pin) (pin & USBIO_GPIO_PINMOD_MASK) + +/************************* + * USBIO GPIO Controller * + *************************/ + +#define USBIO_MAX_GPIOBANKS 5 +#define USBIO_GPIOSPERBANK 32 + +struct usbio_gpio_bank_desc { + u8 id; + u8 pins; + __le32 bmap; +} __packed; + +struct usbio_gpio_init { + u8 bankid; + u8 config; + u8 pincount; + u8 pin; +} __packed; + +struct usbio_gpio_rw { + u8 bankid; + u8 pincount; + u8 pin; + __le32 value; +} __packed; + +/* USBIO I2C commands */ +enum usbio_i2c_cmd { + USBIO_I2CCMD_UNINIT, + USBIO_I2CCMD_INIT, + USBIO_I2CCMD_READ, + USBIO_I2CCMD_WRITE, + USBIO_I2CCMD_END +}; + +/************************ + * USBIO I2C Controller * + ************************/ + +#define USBIO_MAX_I2CBUSES 5 + +#define USBIO_I2C_BUS_ADDR_CAP_10B BIT(3) /* 10bit address support */ +#define USBIO_I2C_BUS_MODE_CAP_MASK 0x3 +#define USBIO_I2C_BUS_MODE_CAP_SM 0 /* Standard Mode */ +#define USBIO_I2C_BUS_MODE_CAP_FM 1 /* Fast Mode */ +#define USBIO_I2C_BUS_MODE_CAP_FMP 2 /* Fast Mode+ */ +#define USBIO_I2C_BUS_MODE_CAP_HSM 3 /* High-Speed Mode */ + +struct usbio_i2c_bus_desc { + u8 id; + u8 caps; +} __packed; + +struct usbio_i2c_uninit { + u8 busid; + __le16 config; +} __packed; + +struct usbio_i2c_init { + u8 busid; + __le16 config; + __le32 speed; +} __packed; + +struct usbio_i2c_rw { + u8 busid; + __le16 config; + __le16 size; + u8 data[] __counted_by(size); +} __packed; + +int usbio_control_msg(struct auxiliary_device *adev, u8 type, u8 cmd, + const void *obuf, u16 obuf_len, void *ibuf, u16 ibuf_len); + +int usbio_bulk_msg(struct auxiliary_device *adev, u8 type, u8 cmd, bool last, + const void *obuf, u16 obuf_len, void *ibuf, u16 ibuf_len); + +int usbio_acquire(struct auxiliary_device *adev); +void usbio_release(struct auxiliary_device *adev); +void usbio_get_txrxbuf_len(struct auxiliary_device *adev, u16 *txbuf_len, u16 *rxbuf_len); +unsigned long usbio_get_quirks(struct auxiliary_device *adev); +void usbio_acpi_bind(struct auxiliary_device *adev, const struct acpi_device_id *hids); + +#endif -- cgit v1.2.3 From 7f70b89b2be66c03ddc76d3ad8aebeeec4a9c505 Mon Sep 17 00:00:00 2001 From: Guan-Yu Lin Date: Thu, 11 Sep 2025 14:20:14 +0000 Subject: usb: offload: add apis for offload usage tracking Introduce offload_usage and corresponding apis to track offload usage on each USB device. Offload denotes that there is another co-processor accessing the USB device via the same USB host controller. To optimize power usage, it's essential to monitor whether the USB device is actively used by other co-processor. This information is vital when determining if a USB device can be safely suspended during system power state transitions. Signed-off-by: Guan-Yu Lin Link: https://lore.kernel.org/r/20250911142051.90822-3-guanyulin@google.com Signed-off-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20250911142051.90822-3-guanyulin@google.com --- drivers/usb/core/Makefile | 1 + drivers/usb/core/offload.c | 136 +++++++++++++++++++++++++++++++++++++++++++++ drivers/usb/core/usb.c | 1 + include/linux/usb.h | 18 ++++++ 4 files changed, 156 insertions(+) create mode 100644 drivers/usb/core/offload.c (limited to 'include/linux') diff --git a/drivers/usb/core/Makefile b/drivers/usb/core/Makefile index ac006abd13b3..766000b4939e 100644 --- a/drivers/usb/core/Makefile +++ b/drivers/usb/core/Makefile @@ -9,6 +9,7 @@ usbcore-y += devio.o notify.o generic.o quirks.o devices.o usbcore-y += phy.o port.o usbcore-$(CONFIG_OF) += of.o +usbcore-$(CONFIG_USB_XHCI_SIDEBAND) += offload.o usbcore-$(CONFIG_USB_PCI) += hcd-pci.o usbcore-$(CONFIG_ACPI) += usb-acpi.o diff --git a/drivers/usb/core/offload.c b/drivers/usb/core/offload.c new file mode 100644 index 000000000000..7c699f1b8d2b --- /dev/null +++ b/drivers/usb/core/offload.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * offload.c - USB offload related functions + * + * Copyright (c) 2025, Google LLC. + * + * Author: Guan-Yu Lin + */ + +#include + +#include "usb.h" + +/** + * usb_offload_get - increment the offload_usage of a USB device + * @udev: the USB device to increment its offload_usage + * + * Incrementing the offload_usage of a usb_device indicates that offload is + * enabled on this usb_device; that is, another entity is actively handling USB + * transfers. This information allows the USB driver to adjust its power + * management policy based on offload activity. + * + * Return: 0 on success. A negative error code otherwise. + */ +int usb_offload_get(struct usb_device *udev) +{ + int ret; + + usb_lock_device(udev); + if (udev->state == USB_STATE_NOTATTACHED) { + usb_unlock_device(udev); + return -ENODEV; + } + + if (udev->state == USB_STATE_SUSPENDED || + udev->offload_at_suspend) { + usb_unlock_device(udev); + return -EBUSY; + } + + /* + * offload_usage could only be modified when the device is active, since + * it will alter the suspend flow of the device. + */ + ret = usb_autoresume_device(udev); + if (ret < 0) { + usb_unlock_device(udev); + return ret; + } + + udev->offload_usage++; + usb_autosuspend_device(udev); + usb_unlock_device(udev); + + return ret; +} +EXPORT_SYMBOL_GPL(usb_offload_get); + +/** + * usb_offload_put - drop the offload_usage of a USB device + * @udev: the USB device to drop its offload_usage + * + * The inverse operation of usb_offload_get, which drops the offload_usage of + * a USB device. This information allows the USB driver to adjust its power + * management policy based on offload activity. + * + * Return: 0 on success. A negative error code otherwise. + */ +int usb_offload_put(struct usb_device *udev) +{ + int ret; + + usb_lock_device(udev); + if (udev->state == USB_STATE_NOTATTACHED) { + usb_unlock_device(udev); + return -ENODEV; + } + + if (udev->state == USB_STATE_SUSPENDED || + udev->offload_at_suspend) { + usb_unlock_device(udev); + return -EBUSY; + } + + /* + * offload_usage could only be modified when the device is active, since + * it will alter the suspend flow of the device. + */ + ret = usb_autoresume_device(udev); + if (ret < 0) { + usb_unlock_device(udev); + return ret; + } + + /* Drop the count when it wasn't 0, ignore the operation otherwise. */ + if (udev->offload_usage) + udev->offload_usage--; + usb_autosuspend_device(udev); + usb_unlock_device(udev); + + return ret; +} +EXPORT_SYMBOL_GPL(usb_offload_put); + +/** + * usb_offload_check - check offload activities on a USB device + * @udev: the USB device to check its offload activity. + * + * Check if there are any offload activity on the USB device right now. This + * information could be used for power management or other forms of resource + * management. + * + * The caller must hold @udev's device lock. In addition, the caller should + * ensure downstream usb devices are all either suspended or marked as + * "offload_at_suspend" to ensure the correctness of the return value. + * + * Returns true on any offload activity, false otherwise. + */ +bool usb_offload_check(struct usb_device *udev) __must_hold(&udev->dev->mutex) +{ + struct usb_device *child; + bool active; + int port1; + + usb_hub_for_each_child(udev, port1, child) { + usb_lock_device(child); + active = usb_offload_check(child); + usb_unlock_device(child); + if (active) + return true; + } + + return !!udev->offload_usage; +} +EXPORT_SYMBOL_GPL(usb_offload_check); diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index b88b6271cb30..b6b0b8489523 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -670,6 +670,7 @@ struct usb_device *usb_alloc_dev(struct usb_device *parent, set_dev_node(&dev->dev, dev_to_node(bus->sysdev)); dev->state = USB_STATE_ATTACHED; dev->lpm_disable_count = 1; + dev->offload_usage = 0; atomic_set(&dev->urbnum, 0); INIT_LIST_HEAD(&dev->ep0.urb_list); diff --git a/include/linux/usb.h b/include/linux/usb.h index 70ef00c42d22..e85105939af8 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -636,6 +636,8 @@ struct usb3_lpm_parameters { * @do_remote_wakeup: remote wakeup should be enabled * @reset_resume: needs reset instead of resume * @port_is_suspended: the upstream port is suspended (L2 or U3) + * @offload_at_suspend: offload activities during suspend is enabled. + * @offload_usage: number of offload activities happening on this usb device. * @slot_id: Slot ID assigned by xHCI * @l1_params: best effor service latency for USB2 L1 LPM state, and L1 timeout. * @u1_params: exit latencies for USB3 U1 LPM state, and hub-initiated timeout. @@ -724,6 +726,8 @@ struct usb_device { unsigned do_remote_wakeup:1; unsigned reset_resume:1; unsigned port_is_suspended:1; + unsigned offload_at_suspend:1; + int offload_usage; enum usb_link_tunnel_mode tunnel_mode; struct device_link *usb4_link; @@ -841,6 +845,20 @@ static inline void usb_mark_last_busy(struct usb_device *udev) { } #endif +#if IS_ENABLED(CONFIG_USB_XHCI_SIDEBAND) +int usb_offload_get(struct usb_device *udev); +int usb_offload_put(struct usb_device *udev); +bool usb_offload_check(struct usb_device *udev); +#else + +static inline int usb_offload_get(struct usb_device *udev) +{ return 0; } +static inline int usb_offload_put(struct usb_device *udev) +{ return 0; } +static inline bool usb_offload_check(struct usb_device *udev) +{ return false; } +#endif + extern int usb_disable_lpm(struct usb_device *udev); extern void usb_enable_lpm(struct usb_device *udev); /* Same as above, but these functions lock/unlock the bandwidth_mutex. */ -- cgit v1.2.3 From ef82a4803aabaf623bfcae07981406f1386eabf9 Mon Sep 17 00:00:00 2001 From: Guan-Yu Lin Date: Thu, 11 Sep 2025 14:20:15 +0000 Subject: xhci: sideband: add api to trace sideband usage The existing sideband driver only registers sidebands without tracking their active usage. To address this, sideband will now record its active usage when it creates/removes interrupters. In addition, a new api is introduced to provide a means for other dirvers to fetch sideband activity information on a USB host controller. Signed-off-by: Guan-Yu Lin Link: https://lore.kernel.org/r/20250911142051.90822-4-guanyulin@google.com Signed-off-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20250911142051.90822-4-guanyulin@google.com --- drivers/usb/host/xhci-sideband.c | 36 ++++++++++++++++++++++++++++++++++++ include/linux/usb/xhci-sideband.h | 9 +++++++++ 2 files changed, 45 insertions(+) (limited to 'include/linux') diff --git a/drivers/usb/host/xhci-sideband.c b/drivers/usb/host/xhci-sideband.c index d49f9886dd84..e771a476fef2 100644 --- a/drivers/usb/host/xhci-sideband.c +++ b/drivers/usb/host/xhci-sideband.c @@ -266,6 +266,31 @@ xhci_sideband_get_event_buffer(struct xhci_sideband *sb) } EXPORT_SYMBOL_GPL(xhci_sideband_get_event_buffer); +/** + * xhci_sideband_check - check the existence of active sidebands + * @hcd: the host controller driver associated with the target host controller + * + * Allow other drivers, such as usb controller driver, to check if there are + * any sideband activity on the host controller. This information could be used + * for power management or other forms of resource management. The caller should + * ensure downstream usb devices are all either suspended or marked as + * "offload_at_suspend" to ensure the correctness of the return value. + * + * Returns true on any active sideband existence, false otherwise. + */ +bool xhci_sideband_check(struct usb_hcd *hcd) +{ + struct usb_device *udev = hcd->self.root_hub; + bool active; + + usb_lock_device(udev); + active = usb_offload_check(udev); + usb_unlock_device(udev); + + return active; +} +EXPORT_SYMBOL_GPL(xhci_sideband_check); + /** * xhci_sideband_create_interrupter - creates a new interrupter for this sideband * @sb: sideband instance for this usb device @@ -286,6 +311,7 @@ xhci_sideband_create_interrupter(struct xhci_sideband *sb, int num_seg, bool ip_autoclear, u32 imod_interval, int intr_num) { int ret = 0; + struct usb_device *udev; if (!sb || !sb->xhci) return -ENODEV; @@ -304,6 +330,9 @@ xhci_sideband_create_interrupter(struct xhci_sideband *sb, int num_seg, goto out; } + udev = sb->vdev->udev; + ret = usb_offload_get(udev); + sb->ir->ip_autoclear = ip_autoclear; out: @@ -323,6 +352,8 @@ EXPORT_SYMBOL_GPL(xhci_sideband_create_interrupter); void xhci_sideband_remove_interrupter(struct xhci_sideband *sb) { + struct usb_device *udev; + if (!sb || !sb->ir) return; @@ -330,6 +361,11 @@ xhci_sideband_remove_interrupter(struct xhci_sideband *sb) xhci_remove_secondary_interrupter(xhci_to_hcd(sb->xhci), sb->ir); sb->ir = NULL; + udev = sb->vdev->udev; + + if (udev->state != USB_STATE_NOTATTACHED) + usb_offload_put(udev); + mutex_unlock(&sb->mutex); } EXPORT_SYMBOL_GPL(xhci_sideband_remove_interrupter); diff --git a/include/linux/usb/xhci-sideband.h b/include/linux/usb/xhci-sideband.h index 45288c392f6e..005257085dcb 100644 --- a/include/linux/usb/xhci-sideband.h +++ b/include/linux/usb/xhci-sideband.h @@ -11,6 +11,7 @@ #include #include +#include #define EP_CTX_PER_DEV 31 /* FIXME defined twice, from xhci.h */ @@ -83,6 +84,14 @@ xhci_sideband_get_endpoint_buffer(struct xhci_sideband *sb, struct usb_host_endpoint *host_ep); struct sg_table * xhci_sideband_get_event_buffer(struct xhci_sideband *sb); + +#if IS_ENABLED(CONFIG_USB_XHCI_SIDEBAND) +bool xhci_sideband_check(struct usb_hcd *hcd); +#else +static inline bool xhci_sideband_check(struct usb_hcd *hcd) +{ return false; } +#endif /* IS_ENABLED(CONFIG_USB_XHCI_SIDEBAND) */ + int xhci_sideband_create_interrupter(struct xhci_sideband *sb, int num_seg, bool ip_autoclear, u32 imod_interval, int intr_num); -- cgit v1.2.3 From 70da02061499ca89ab92f7c4310f815d5fe674ec Mon Sep 17 00:00:00 2001 From: Antoniu Miclaus Date: Mon, 8 Sep 2025 07:35:22 +0000 Subject: iio: add power and energy measurement modifiers Add new IIO modifiers to support power and energy measurement devices: Power modifiers: - IIO_MOD_ACTIVE: Real power consumed by the load - IIO_MOD_REACTIVE: Power that oscillates between source and load - IIO_MOD_APPARENT: Magnitude of complex power Signal quality modifiers: - IIO_MOD_RMS: Root Mean Square value Additionally adds: - IIO_CHAN_INFO_POWERFACTOR: Power factor channel info type for representing the ratio of active power to apparent power These modifiers enable proper representation of power measurement devices like energy meters and power analyzers. Signed-off-by: Antoniu Miclaus Signed-off-by: Jonathan Cameron --- Documentation/ABI/testing/sysfs-bus-iio | 29 +++++++++++++++++++++++++++++ drivers/iio/industrialio-core.c | 5 +++++ include/linux/iio/types.h | 1 + include/uapi/linux/iio/types.h | 4 ++++ tools/iio/iio_event_monitor.c | 8 ++++++++ 5 files changed, 47 insertions(+) (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio index 2fb2cea4b192..78da68826307 100644 --- a/Documentation/ABI/testing/sysfs-bus-iio +++ b/Documentation/ABI/testing/sysfs-bus-iio @@ -167,7 +167,18 @@ Description: is required is a consistent labeling. Units after application of scale and offset are millivolts. +What: /sys/bus/iio/devices/iio:deviceX/in_altvoltageY_rms_raw +KernelVersion: 6.18 +Contact: linux-iio@vger.kernel.org +Description: + Raw (unscaled) Root Mean Square (RMS) voltage measurement from + channel Y. Units after application of scale and offset are + millivolts. + What: /sys/bus/iio/devices/iio:deviceX/in_powerY_raw +What: /sys/bus/iio/devices/iio:deviceX/in_powerY_active_raw +What: /sys/bus/iio/devices/iio:deviceX/in_powerY_reactive_raw +What: /sys/bus/iio/devices/iio:deviceX/in_powerY_apparent_raw KernelVersion: 4.5 Contact: linux-iio@vger.kernel.org Description: @@ -176,6 +187,13 @@ Description: unique to allow association with event codes. Units after application of scale and offset are milliwatts. +What: /sys/bus/iio/devices/iio:deviceX/in_powerY_powerfactor +KernelVersion: 6.18 +Contact: linux-iio@vger.kernel.org +Description: + Power factor measurement from channel Y. Power factor is the + ratio of active power to apparent power. The value is unitless. + What: /sys/bus/iio/devices/iio:deviceX/in_capacitanceY_raw KernelVersion: 3.2 Contact: linux-iio@vger.kernel.org @@ -1569,6 +1587,9 @@ Description: What: /sys/.../iio:deviceX/in_energy_input What: /sys/.../iio:deviceX/in_energy_raw +What: /sys/.../iio:deviceX/in_energyY_active_raw +What: /sys/.../iio:deviceX/in_energyY_reactive_raw +What: /sys/.../iio:deviceX/in_energyY_apparent_raw KernelVersion: 4.0 Contact: linux-iio@vger.kernel.org Description: @@ -1707,6 +1728,14 @@ Description: component of the signal while the 'q' channel contains the quadrature component. +What: /sys/bus/iio/devices/iio:deviceX/in_altcurrentY_rms_raw +KernelVersion: 6.18 +Contact: linux-iio@vger.kernel.org +Description: + Raw (unscaled no bias removal etc.) Root Mean Square (RMS) current + measurement from channel Y. Units after application of scale and + offset are milliamps. + What: /sys/.../iio:deviceX/in_energy_en What: /sys/.../iio:deviceX/in_distance_en What: /sys/.../iio:deviceX/in_velocity_sqrt(x^2+y^2+z^2)_en diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index 4ff2c44f9324..88c3d585a1bd 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -153,6 +153,10 @@ static const char * const iio_modifier_names[] = { [IIO_MOD_PITCH] = "pitch", [IIO_MOD_YAW] = "yaw", [IIO_MOD_ROLL] = "roll", + [IIO_MOD_RMS] = "rms", + [IIO_MOD_ACTIVE] = "active", + [IIO_MOD_REACTIVE] = "reactive", + [IIO_MOD_APPARENT] = "apparent", }; /* relies on pairs of these shared then separate */ @@ -190,6 +194,7 @@ static const char * const iio_chan_info_postfix[] = { [IIO_CHAN_INFO_ZEROPOINT] = "zeropoint", [IIO_CHAN_INFO_TROUGH] = "trough_raw", [IIO_CHAN_INFO_CONVDELAY] = "convdelay", + [IIO_CHAN_INFO_POWERFACTOR] = "powerfactor", }; /** * iio_device_id() - query the unique ID for the device diff --git a/include/linux/iio/types.h b/include/linux/iio/types.h index ad2761efcc83..34eebad12d2c 100644 --- a/include/linux/iio/types.h +++ b/include/linux/iio/types.h @@ -70,6 +70,7 @@ enum iio_chan_info_enum { IIO_CHAN_INFO_ZEROPOINT, IIO_CHAN_INFO_TROUGH, IIO_CHAN_INFO_CONVDELAY, + IIO_CHAN_INFO_POWERFACTOR, }; #endif /* _IIO_TYPES_H_ */ diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h index 3c3cc1497a1e..6d269b844271 100644 --- a/include/uapi/linux/iio/types.h +++ b/include/uapi/linux/iio/types.h @@ -109,6 +109,10 @@ enum iio_modifier { IIO_MOD_ROLL, IIO_MOD_LIGHT_UVA, IIO_MOD_LIGHT_UVB, + IIO_MOD_RMS, + IIO_MOD_ACTIVE, + IIO_MOD_REACTIVE, + IIO_MOD_APPARENT, }; enum iio_event_type { diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c index d26aff649f3f..03ca33869ce8 100644 --- a/tools/iio/iio_event_monitor.c +++ b/tools/iio/iio_event_monitor.c @@ -141,6 +141,10 @@ static const char * const iio_modifier_names[] = { [IIO_MOD_PITCH] = "pitch", [IIO_MOD_YAW] = "yaw", [IIO_MOD_ROLL] = "roll", + [IIO_MOD_RMS] = "rms", + [IIO_MOD_ACTIVE] = "active", + [IIO_MOD_REACTIVE] = "reactive", + [IIO_MOD_APPARENT] = "apparent", }; static bool event_is_known(struct iio_event_data *event) @@ -240,6 +244,10 @@ static bool event_is_known(struct iio_event_data *event) case IIO_MOD_PM4: case IIO_MOD_PM10: case IIO_MOD_O2: + case IIO_MOD_RMS: + case IIO_MOD_ACTIVE: + case IIO_MOD_REACTIVE: + case IIO_MOD_APPARENT: break; default: return false; -- cgit v1.2.3 From 3422b4bc606eee2ba7758ea9347c83332eeec3e3 Mon Sep 17 00:00:00 2001 From: Duje Mihanović Date: Thu, 11 Sep 2025 14:43:45 +0200 Subject: iio: adc: Add driver for Marvell 88PM886 PMIC ADC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Marvell's 88PM886 PMIC has a so-called General Purpose ADC used for monitoring various system voltages and temperatures. Add the relevant register definitions to the MFD header and a driver for the ADC. Acked-by: Karel Balej # for the PMIC Signed-off-by: Duje Mihanović Signed-off-by: Jonathan Cameron --- MAINTAINERS | 5 + drivers/iio/adc/88pm886-gpadc.c | 393 ++++++++++++++++++++++++++++++++++++++++ drivers/iio/adc/Kconfig | 13 ++ drivers/iio/adc/Makefile | 1 + include/linux/mfd/88pm886.h | 58 ++++++ 5 files changed, 470 insertions(+) create mode 100644 drivers/iio/adc/88pm886-gpadc.c (limited to 'include/linux') diff --git a/MAINTAINERS b/MAINTAINERS index 75615d82593e..5c572204933d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14714,6 +14714,11 @@ F: drivers/regulator/88pm886-regulator.c F: drivers/rtc/rtc-88pm886.c F: include/linux/mfd/88pm886.h +MARVELL 88PM886 PMIC GPADC DRIVER +M: Duje Mihanović +S: Maintained +F: drivers/iio/adc/88pm886-gpadc.c + MARVELL ARMADA 3700 PHY DRIVERS M: Miquel Raynal S: Maintained diff --git a/drivers/iio/adc/88pm886-gpadc.c b/drivers/iio/adc/88pm886-gpadc.c new file mode 100644 index 000000000000..cffe35136685 --- /dev/null +++ b/drivers/iio/adc/88pm886-gpadc.c @@ -0,0 +1,393 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2025, Duje Mihanović + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include + +struct pm886_gpadc { + struct regmap *map; +}; + +enum pm886_gpadc_channel { + VSC_CHAN, + VCHG_PWR_CHAN, + VCF_OUT_CHAN, + VBAT_CHAN, + VBAT_SLP_CHAN, + VBUS_CHAN, + + GPADC0_CHAN, + GPADC1_CHAN, + GPADC2_CHAN, + GPADC3_CHAN, + + GND_DET1_CHAN, + GND_DET2_CHAN, + MIC_DET_CHAN, + + TINT_CHAN, +}; + +static const int pm886_gpadc_regs[] = { + [VSC_CHAN] = PM886_REG_GPADC_VSC, + [VCHG_PWR_CHAN] = PM886_REG_GPADC_VCHG_PWR, + [VCF_OUT_CHAN] = PM886_REG_GPADC_VCF_OUT, + [VBAT_CHAN] = PM886_REG_GPADC_VBAT, + [VBAT_SLP_CHAN] = PM886_REG_GPADC_VBAT_SLP, + [VBUS_CHAN] = PM886_REG_GPADC_VBUS, + + [GPADC0_CHAN] = PM886_REG_GPADC_GPADC0, + [GPADC1_CHAN] = PM886_REG_GPADC_GPADC1, + [GPADC2_CHAN] = PM886_REG_GPADC_GPADC2, + [GPADC3_CHAN] = PM886_REG_GPADC_GPADC3, + + [GND_DET1_CHAN] = PM886_REG_GPADC_GND_DET1, + [GND_DET2_CHAN] = PM886_REG_GPADC_GND_DET2, + [MIC_DET_CHAN] = PM886_REG_GPADC_MIC_DET, + + [TINT_CHAN] = PM886_REG_GPADC_TINT, +}; + +#define ADC_CHANNEL_VOLTAGE(index, lsb, name) \ +{ \ + .type = IIO_VOLTAGE, \ + .indexed = 1, \ + .channel = index, \ + .address = lsb, \ + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | \ + BIT(IIO_CHAN_INFO_SCALE), \ + .datasheet_name = name, \ +} + +#define ADC_CHANNEL_RESISTANCE(index, lsb, name) \ +{ \ + .type = IIO_RESISTANCE, \ + .indexed = 1, \ + .channel = index, \ + .address = lsb, \ + .info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED), \ + .datasheet_name = name, \ +} + +#define ADC_CHANNEL_TEMPERATURE(index, lsb, name) \ +{ \ + .type = IIO_TEMP, \ + .indexed = 1, \ + .channel = index, \ + .address = lsb, \ + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | \ + BIT(IIO_CHAN_INFO_SCALE) | \ + BIT(IIO_CHAN_INFO_OFFSET), \ + .datasheet_name = name, \ +} + +static const struct iio_chan_spec pm886_gpadc_channels[] = { + ADC_CHANNEL_VOLTAGE(VSC_CHAN, 1367, "vsc"), + ADC_CHANNEL_VOLTAGE(VCHG_PWR_CHAN, 1709, "vchg_pwr"), + ADC_CHANNEL_VOLTAGE(VCF_OUT_CHAN, 1367, "vcf_out"), + ADC_CHANNEL_VOLTAGE(VBAT_CHAN, 1367, "vbat"), + ADC_CHANNEL_VOLTAGE(VBAT_SLP_CHAN, 1367, "vbat_slp"), + ADC_CHANNEL_VOLTAGE(VBUS_CHAN, 1709, "vbus"), + + ADC_CHANNEL_RESISTANCE(GPADC0_CHAN, 342, "gpadc0"), + ADC_CHANNEL_RESISTANCE(GPADC1_CHAN, 342, "gpadc1"), + ADC_CHANNEL_RESISTANCE(GPADC2_CHAN, 342, "gpadc2"), + ADC_CHANNEL_RESISTANCE(GPADC3_CHAN, 342, "gpadc3"), + + ADC_CHANNEL_VOLTAGE(GND_DET1_CHAN, 342, "gnddet1"), + ADC_CHANNEL_VOLTAGE(GND_DET2_CHAN, 342, "gnddet2"), + ADC_CHANNEL_VOLTAGE(MIC_DET_CHAN, 1367, "mic_det"), + + ADC_CHANNEL_TEMPERATURE(TINT_CHAN, 104, "tint"), +}; + +static const struct regmap_config pm886_gpadc_regmap_config = { + .reg_bits = 8, + .val_bits = 8, + .max_register = PM886_GPADC_MAX_REGISTER, +}; + +static int gpadc_get_raw(struct iio_dev *iio, enum pm886_gpadc_channel chan) +{ + struct pm886_gpadc *gpadc = iio_priv(iio); + __be16 buf; + int ret; + + ret = regmap_bulk_read(gpadc->map, pm886_gpadc_regs[chan], &buf, sizeof(buf)); + if (ret) + return ret; + + return be16_to_cpu(buf) >> 4; +} + +static int +gpadc_set_bias(struct pm886_gpadc *gpadc, enum pm886_gpadc_channel chan, bool on) +{ + unsigned int gpadc_num = chan - GPADC0_CHAN; + unsigned int bits = BIT(gpadc_num + 4) | BIT(gpadc_num); + + return regmap_assign_bits(gpadc->map, PM886_REG_GPADC_CONFIG(0x14), bits, on); +} + +static int +gpadc_find_bias_current(struct iio_dev *iio, struct iio_chan_spec const *chan, + unsigned int *raw_uV, unsigned int *raw_uA) +{ + struct pm886_gpadc *gpadc = iio_priv(iio); + unsigned int gpadc_num = chan->channel - GPADC0_CHAN; + unsigned int reg = PM886_REG_GPADC_CONFIG(0xb + gpadc_num); + unsigned long lsb = chan->address; + int ret; + + for (unsigned int i = 0; i < PM886_GPADC_BIAS_LEVELS; i++) { + ret = regmap_update_bits(gpadc->map, reg, GENMASK(3, 0), i); + if (ret) + return ret; + + /* Wait for the new bias level to apply. */ + fsleep(5 * USEC_PER_MSEC); + + *raw_uA = PM886_GPADC_INDEX_TO_BIAS_uA(i); + *raw_uV = gpadc_get_raw(iio, chan->channel) * lsb; + + /* + * Vendor kernel errors out above 1.25 V, but testing shows + * that the resistance of the battery detection channel (GPADC2 + * on coreprimevelte) reaches about 1.4 MΩ when the battery is + * removed, which can't be measured with such a low upper + * limit. Therefore, to be able to detect the battery without + * ugly externs as used in the vendor fuel gauge driver, + * increase this limit a bit. + */ + if (WARN_ON(*raw_uV > 1500 * (MICRO / MILLI))) + return -EIO; + + /* + * Vendor kernel errors out under 300 mV, but for the same + * reason as above (except the channel hovers around 3.5 kΩ + * with battery present) reduce this limit. + */ + if (*raw_uV < 200 * (MICRO / MILLI)) { + dev_dbg(&iio->dev, "bad bias for chan %d: %d uA @ %d uV\n", + chan->channel, *raw_uA, *raw_uV); + continue; + } + + dev_dbg(&iio->dev, "good bias for chan %d: %d uA @ %d uV\n", + chan->channel, *raw_uA, *raw_uV); + return 0; + } + + dev_err(&iio->dev, "failed to find good bias for chan %d\n", chan->channel); + return -EINVAL; +} + +static int +gpadc_get_resistance_ohm(struct iio_dev *iio, struct iio_chan_spec const *chan) +{ + struct pm886_gpadc *gpadc = iio_priv(iio); + unsigned int raw_uV, raw_uA; + int ret; + + ret = gpadc_set_bias(gpadc, chan->channel, true); + if (ret) + goto out; + + ret = gpadc_find_bias_current(iio, chan, &raw_uV, &raw_uA); + if (ret) + goto out; + + ret = DIV_ROUND_CLOSEST(raw_uV, raw_uA); +out: + gpadc_set_bias(gpadc, chan->channel, false); + return ret; +} + +static int +__pm886_gpadc_read_raw(struct iio_dev *iio, struct iio_chan_spec const *chan, + int *val, int *val2, long mask) +{ + unsigned long lsb = chan->address; + + switch (mask) { + case IIO_CHAN_INFO_RAW: + *val = gpadc_get_raw(iio, chan->channel); + if (*val < 0) + return *val; + + return IIO_VAL_INT; + case IIO_CHAN_INFO_SCALE: + *val = lsb; + + if (chan->type == IIO_VOLTAGE) { + *val2 = MILLI; + return IIO_VAL_FRACTIONAL; + } else { + return IIO_VAL_INT; + } + case IIO_CHAN_INFO_OFFSET: + /* Raw value is 104 millikelvin/LSB, convert it to 104 millicelsius/LSB */ + *val = ABSOLUTE_ZERO_MILLICELSIUS; + *val2 = lsb; + return IIO_VAL_FRACTIONAL; + case IIO_CHAN_INFO_PROCESSED: + *val = gpadc_get_resistance_ohm(iio, chan); + if (*val < 0) + return *val; + + return IIO_VAL_INT; + default: + return -EINVAL; + } +} + +static int pm886_gpadc_read_raw(struct iio_dev *iio, struct iio_chan_spec const *chan, + int *val, int *val2, long mask) +{ + struct device *dev = iio->dev.parent; + int ret; + + ret = pm_runtime_resume_and_get(dev); + if (ret) + return ret; + + ret = __pm886_gpadc_read_raw(iio, chan, val, val2, mask); + + pm_runtime_put_autosuspend(dev); + return ret; +} + +static int pm886_gpadc_hw_enable(struct regmap *map) +{ + const u8 config[] = { + PM886_GPADC_CONFIG1_EN_ALL, + PM886_GPADC_CONFIG2_EN_ALL, + PM886_GPADC_GND_DET2_EN, + }; + int ret; + + /* Enable the ADC block. */ + ret = regmap_set_bits(map, PM886_REG_GPADC_CONFIG(0x6), BIT(0)); + if (ret) + return ret; + + /* Enable all channels. */ + return regmap_bulk_write(map, PM886_REG_GPADC_CONFIG(0x1), config, ARRAY_SIZE(config)); +} + +static int pm886_gpadc_hw_disable(struct regmap *map) +{ + return regmap_clear_bits(map, PM886_REG_GPADC_CONFIG(0x6), BIT(0)); +} + +static const struct iio_info pm886_gpadc_iio_info = { + .read_raw = pm886_gpadc_read_raw, +}; + +static int pm886_gpadc_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct pm886_chip *chip = dev_get_drvdata(dev->parent); + struct i2c_client *client = chip->client; + struct pm886_gpadc *gpadc; + struct i2c_client *page; + struct iio_dev *iio; + int ret; + + iio = devm_iio_device_alloc(dev, sizeof(*gpadc)); + if (!iio) + return -ENOMEM; + + gpadc = iio_priv(iio); + dev_set_drvdata(dev, iio); + + page = devm_i2c_new_dummy_device(dev, client->adapter, + client->addr + PM886_PAGE_OFFSET_GPADC); + if (IS_ERR(page)) + return dev_err_probe(dev, PTR_ERR(page), "Failed to initialize GPADC page\n"); + + gpadc->map = devm_regmap_init_i2c(page, &pm886_gpadc_regmap_config); + if (IS_ERR(gpadc->map)) + return dev_err_probe(dev, PTR_ERR(gpadc->map), + "Failed to initialize GPADC regmap\n"); + + iio->name = "88pm886-gpadc"; + iio->modes = INDIO_DIRECT_MODE; + iio->info = &pm886_gpadc_iio_info; + iio->channels = pm886_gpadc_channels; + iio->num_channels = ARRAY_SIZE(pm886_gpadc_channels); + device_set_node(&iio->dev, dev_fwnode(dev->parent)); + + ret = devm_pm_runtime_enable(dev); + if (ret) + return dev_err_probe(dev, ret, "Failed to enable runtime PM\n"); + + pm_runtime_set_autosuspend_delay(dev, 50); + pm_runtime_use_autosuspend(dev); + ret = devm_iio_device_register(dev, iio); + if (ret) + return dev_err_probe(dev, ret, "Failed to register ADC\n"); + + return 0; +} + +static int pm886_gpadc_runtime_resume(struct device *dev) +{ + struct iio_dev *iio = dev_get_drvdata(dev); + struct pm886_gpadc *gpadc = iio_priv(iio); + + return pm886_gpadc_hw_enable(gpadc->map); +} + +static int pm886_gpadc_runtime_suspend(struct device *dev) +{ + struct iio_dev *iio = dev_get_drvdata(dev); + struct pm886_gpadc *gpadc = iio_priv(iio); + + return pm886_gpadc_hw_disable(gpadc->map); +} + +static DEFINE_RUNTIME_DEV_PM_OPS(pm886_gpadc_pm_ops, + pm886_gpadc_runtime_suspend, + pm886_gpadc_runtime_resume, NULL); + +static const struct platform_device_id pm886_gpadc_id[] = { + { "88pm886-gpadc" }, + { } +}; +MODULE_DEVICE_TABLE(platform, pm886_gpadc_id); + +static struct platform_driver pm886_gpadc_driver = { + .driver = { + .name = "88pm886-gpadc", + .pm = pm_ptr(&pm886_gpadc_pm_ops), + }, + .probe = pm886_gpadc_probe, + .id_table = pm886_gpadc_id, +}; +module_platform_driver(pm886_gpadc_driver); + +MODULE_AUTHOR("Duje Mihanović "); +MODULE_DESCRIPTION("Marvell 88PM886 GPADC driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index 09ab25c8cfdb..8c84b2c1d223 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -9,6 +9,19 @@ menu "Analog to digital converters" config IIO_ADC_HELPER tristate +config 88PM886_GPADC + tristate "Marvell 88PM886 GPADC driver" + depends on MFD_88PM886_PMIC + default MFD_88PM886_PMIC + help + Say Y here to enable support for the GPADC (General Purpose ADC) + found on the Marvell 88PM886 PMIC. The GPADC measures various + internal voltages and temperatures, including (but not limited to) + system, battery and USB Vbus. + + To compile this driver as a module, choose M here: the module will be + called 88pm886-gpadc. + config AB8500_GPADC bool "ST-Ericsson AB8500 GPADC driver" depends on AB8500_CORE && REGULATOR_AB8500 diff --git a/drivers/iio/adc/Makefile b/drivers/iio/adc/Makefile index 4d071f290911..611c16430621 100644 --- a/drivers/iio/adc/Makefile +++ b/drivers/iio/adc/Makefile @@ -6,6 +6,7 @@ obj-$(CONFIG_IIO_ADC_HELPER) += industrialio-adc.o # When adding new entries keep the list in alphabetical order +obj-$(CONFIG_88PM886_GPADC) += 88pm886-gpadc.o obj-$(CONFIG_AB8500_GPADC) += ab8500-gpadc.o obj-$(CONFIG_AD_SIGMA_DELTA) += ad_sigma_delta.o obj-$(CONFIG_AD4000) += ad4000.o diff --git a/include/linux/mfd/88pm886.h b/include/linux/mfd/88pm886.h index 85eca44f39ab..38892ba7b8a4 100644 --- a/include/linux/mfd/88pm886.h +++ b/include/linux/mfd/88pm886.h @@ -10,6 +10,7 @@ #define PM886_IRQ_ONKEY 0 #define PM886_PAGE_OFFSET_REGULATORS 1 +#define PM886_PAGE_OFFSET_GPADC 2 #define PM886_REG_ID 0x00 @@ -70,6 +71,63 @@ #define PM886_LDO_VSEL_MASK 0x0f #define PM886_BUCK_VSEL_MASK 0x7f +/* GPADC enable/disable registers */ +#define PM886_REG_GPADC_CONFIG(n) (n) + +#define PM886_GPADC_VSC_EN BIT(0) +#define PM886_GPADC_VBAT_EN BIT(1) +#define PM886_GPADC_GNDDET1_EN BIT(3) +#define PM886_GPADC_VBUS_EN BIT(4) +#define PM886_GPADC_VCHG_PWR_EN BIT(5) +#define PM886_GPADC_VCF_OUT_EN BIT(6) +#define PM886_GPADC_CONFIG1_EN_ALL \ + (PM886_GPADC_VSC_EN | \ + PM886_GPADC_VBAT_EN | \ + PM886_GPADC_GNDDET1_EN | \ + PM886_GPADC_VBUS_EN | \ + PM886_GPADC_VCHG_PWR_EN | \ + PM886_GPADC_VCF_OUT_EN) + +#define PM886_GPADC_TINT_EN BIT(0) +#define PM886_GPADC_PMODE_EN BIT(1) +#define PM886_GPADC_GPADC0_EN BIT(2) +#define PM886_GPADC_GPADC1_EN BIT(3) +#define PM886_GPADC_GPADC2_EN BIT(4) +#define PM886_GPADC_GPADC3_EN BIT(5) +#define PM886_GPADC_MIC_DET_EN BIT(6) +#define PM886_GPADC_CONFIG2_EN_ALL \ + (PM886_GPADC_TINT_EN | \ + PM886_GPADC_GPADC0_EN | \ + PM886_GPADC_GPADC1_EN | \ + PM886_GPADC_GPADC2_EN | \ + PM886_GPADC_GPADC3_EN | \ + PM886_GPADC_MIC_DET_EN) + +/* No CONFIG3_EN_ALL because this is the only bit there. */ +#define PM886_GPADC_GND_DET2_EN BIT(0) + +/* GPADC channel registers */ +#define PM886_REG_GPADC_VSC 0x40 +#define PM886_REG_GPADC_VCHG_PWR 0x4c +#define PM886_REG_GPADC_VCF_OUT 0x4e +#define PM886_REG_GPADC_TINT 0x50 +#define PM886_REG_GPADC_GPADC0 0x54 +#define PM886_REG_GPADC_GPADC1 0x56 +#define PM886_REG_GPADC_GPADC2 0x58 +#define PM886_REG_GPADC_VBAT 0xa0 +#define PM886_REG_GPADC_GND_DET1 0xa4 +#define PM886_REG_GPADC_GND_DET2 0xa6 +#define PM886_REG_GPADC_VBUS 0xa8 +#define PM886_REG_GPADC_GPADC3 0xaa +#define PM886_REG_GPADC_MIC_DET 0xac +#define PM886_REG_GPADC_VBAT_SLP 0xb0 + +/* VBAT_SLP is the last register and is 2 bytes wide like other channels. */ +#define PM886_GPADC_MAX_REGISTER (PM886_REG_GPADC_VBAT_SLP + 1) + +#define PM886_GPADC_BIAS_LEVELS 16 +#define PM886_GPADC_INDEX_TO_BIAS_uA(i) (1 + (i) * 5) + struct pm886_chip { struct i2c_client *client; unsigned int chip_id; -- cgit v1.2.3 From e68f150bc11d0a05cbe984a4e5c0f72a95cae07d Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Mon, 18 Aug 2025 09:46:15 +0300 Subject: memblock: drop for_each_free_mem_pfn_range_in_zone_from() for_each_free_mem_pfn_range_in_zone_from() and its "backend" implementation __next_mem_pfn_range_in_zone() were only used by deferred initialization of the memory map. Remove them as they are not used anymore. Reviewed-by: Wei Yang Reviewed-by: David Hildenbrand Signed-off-by: Mike Rapoport (Microsoft) --- .clang-format | 1 - include/linux/memblock.h | 22 ----------------- mm/memblock.c | 64 ------------------------------------------------ 3 files changed, 87 deletions(-) (limited to 'include/linux') diff --git a/.clang-format b/.clang-format index 48405c54ef27..f371a13b4d19 100644 --- a/.clang-format +++ b/.clang-format @@ -294,7 +294,6 @@ ForEachMacros: - 'for_each_fib6_node_rt_rcu' - 'for_each_fib6_walker_rt' - 'for_each_file_lock' - - 'for_each_free_mem_pfn_range_in_zone_from' - 'for_each_free_mem_range' - 'for_each_free_mem_range_reverse' - 'for_each_func_rsrc' diff --git a/include/linux/memblock.h b/include/linux/memblock.h index fcda8481de9a..221118b5a16e 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -324,28 +324,6 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, for (i = -1, __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid); \ i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid)) -#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT -void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone, - unsigned long *out_spfn, - unsigned long *out_epfn); - -/** - * for_each_free_mem_pfn_range_in_zone_from - iterate through zone specific - * free memblock areas from a given point - * @i: u64 used as loop variable - * @zone: zone in which all of the memory blocks reside - * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL - * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL - * - * Walks over free (memory && !reserved) areas of memblock in a specific - * zone, continuing from current position. Available as soon as memblock is - * initialized. - */ -#define for_each_free_mem_pfn_range_in_zone_from(i, zone, p_start, p_end) \ - for (; i != U64_MAX; \ - __next_mem_pfn_range_in_zone(&i, zone, p_start, p_end)) - -#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ /** * for_each_free_mem_range - iterate through free memblock areas diff --git a/mm/memblock.c b/mm/memblock.c index 117d963e677c..120a501a887a 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1445,70 +1445,6 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size, return 0; } -#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT -/** - * __next_mem_pfn_range_in_zone - iterator for for_each_*_range_in_zone() - * - * @idx: pointer to u64 loop variable - * @zone: zone in which all of the memory blocks reside - * @out_spfn: ptr to ulong for start pfn of the range, can be %NULL - * @out_epfn: ptr to ulong for end pfn of the range, can be %NULL - * - * This function is meant to be a zone/pfn specific wrapper for the - * for_each_mem_range type iterators. Specifically they are used in the - * deferred memory init routines and as such we were duplicating much of - * this logic throughout the code. So instead of having it in multiple - * locations it seemed like it would make more sense to centralize this to - * one new iterator that does everything they need. - */ -void __init_memblock -__next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone, - unsigned long *out_spfn, unsigned long *out_epfn) -{ - int zone_nid = zone_to_nid(zone); - phys_addr_t spa, epa; - - __next_mem_range(idx, zone_nid, MEMBLOCK_NONE, - &memblock.memory, &memblock.reserved, - &spa, &epa, NULL); - - while (*idx != U64_MAX) { - unsigned long epfn = PFN_DOWN(epa); - unsigned long spfn = PFN_UP(spa); - - /* - * Verify the end is at least past the start of the zone and - * that we have at least one PFN to initialize. - */ - if (zone->zone_start_pfn < epfn && spfn < epfn) { - /* if we went too far just stop searching */ - if (zone_end_pfn(zone) <= spfn) { - *idx = U64_MAX; - break; - } - - if (out_spfn) - *out_spfn = max(zone->zone_start_pfn, spfn); - if (out_epfn) - *out_epfn = min(zone_end_pfn(zone), epfn); - - return; - } - - __next_mem_range(idx, zone_nid, MEMBLOCK_NONE, - &memblock.memory, &memblock.reserved, - &spa, &epa, NULL); - } - - /* signal end of iteration */ - if (out_spfn) - *out_spfn = ULONG_MAX; - if (out_epfn) - *out_epfn = 0; -} - -#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ - /** * memblock_alloc_range_nid - allocate boot memory block * @size: size of memory block to be allocated in bytes -- cgit v1.2.3 From 7788255aba6545a27b8d143c5256536f8dfb2c0a Mon Sep 17 00:00:00 2001 From: Keir Fraser Date: Tue, 9 Sep 2025 10:00:06 +0000 Subject: KVM: Implement barriers before accessing kvm->buses[] on SRCU read paths This ensures that, if a VCPU has "observed" that an IO registration has occurred, the instruction currently being trapped or emulated will also observe the IO registration. At the same time, enforce that kvm_get_bus() is used only on the update side, ensuring that a long-term reference cannot be obtained by an SRCU reader. Signed-off-by: Keir Fraser Signed-off-by: Marc Zyngier --- arch/x86/kvm/vmx/vmx.c | 7 +++++++ include/linux/kvm_host.h | 10 +++++++--- virt/kvm/kvm_main.c | 32 ++++++++++++++++++++++++++------ 3 files changed, 40 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index aa157fe5b7b3..0bdf9405969a 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5785,6 +5785,13 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) if (kvm_test_request(KVM_REQ_EVENT, vcpu)) return 1; + /* + * Ensure that any updates to kvm->buses[] observed by the + * previous instruction (emulated or otherwise) are also + * visible to the instruction KVM is about to emulate. + */ + smp_rmb(); + if (!kvm_emulate_instruction(vcpu, 0)) return 0; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 15656b7fba6c..e7d6111cf254 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -966,11 +966,15 @@ static inline bool kvm_dirty_log_manual_protect_and_init_set(struct kvm *kvm) return !!(kvm->manual_dirty_log_protect & KVM_DIRTY_LOG_INITIALLY_SET); } +/* + * Get a bus reference under the update-side lock. No long-term SRCU reader + * references are permitted, to avoid stale reads vs concurrent IO + * registrations. + */ static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx) { - return srcu_dereference_check(kvm->buses[idx], &kvm->srcu, - lockdep_is_held(&kvm->slots_lock) || - !refcount_read(&kvm->users_count)); + return rcu_dereference_protected(kvm->buses[idx], + lockdep_is_held(&kvm->slots_lock)); } static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6c07dd423458..870ad8ea93a7 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1103,6 +1103,14 @@ void __weak kvm_arch_create_vm_debugfs(struct kvm *kvm) { } +/* Called only on cleanup and destruction paths when there are no users. */ +static inline struct kvm_io_bus *kvm_get_bus_for_destruction(struct kvm *kvm, + enum kvm_bus idx) +{ + return rcu_dereference_protected(kvm->buses[idx], + !refcount_read(&kvm->users_count)); +} + static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) { struct kvm *kvm = kvm_arch_alloc_vm(); @@ -1228,7 +1236,7 @@ out_err_no_disable: out_err_no_arch_destroy_vm: WARN_ON_ONCE(!refcount_dec_and_test(&kvm->users_count)); for (i = 0; i < KVM_NR_BUSES; i++) - kfree(kvm_get_bus(kvm, i)); + kfree(kvm_get_bus_for_destruction(kvm, i)); kvm_free_irq_routing(kvm); out_err_no_irq_routing: cleanup_srcu_struct(&kvm->irq_srcu); @@ -1276,7 +1284,7 @@ static void kvm_destroy_vm(struct kvm *kvm) kvm_free_irq_routing(kvm); for (i = 0; i < KVM_NR_BUSES; i++) { - struct kvm_io_bus *bus = kvm_get_bus(kvm, i); + struct kvm_io_bus *bus = kvm_get_bus_for_destruction(kvm, i); if (bus) kvm_io_bus_destroy(bus); @@ -5843,6 +5851,18 @@ static int __kvm_io_bus_write(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus, return -EOPNOTSUPP; } +static struct kvm_io_bus *kvm_get_bus_srcu(struct kvm *kvm, enum kvm_bus idx) +{ + /* + * Ensure that any updates to kvm_buses[] observed by the previous vCPU + * machine instruction are also visible to the vCPU machine instruction + * that triggered this call. + */ + smp_mb__after_srcu_read_lock(); + + return srcu_dereference(kvm->buses[idx], &kvm->srcu); +} + int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, int len, const void *val) { @@ -5855,7 +5875,7 @@ int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, .len = len, }; - bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); + bus = kvm_get_bus_srcu(vcpu->kvm, bus_idx); if (!bus) return -ENOMEM; r = __kvm_io_bus_write(vcpu, bus, &range, val); @@ -5874,7 +5894,7 @@ int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, .len = len, }; - bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); + bus = kvm_get_bus_srcu(vcpu->kvm, bus_idx); if (!bus) return -ENOMEM; @@ -5924,7 +5944,7 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, .len = len, }; - bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); + bus = kvm_get_bus_srcu(vcpu->kvm, bus_idx); if (!bus) return -ENOMEM; r = __kvm_io_bus_read(vcpu, bus, &range, val); @@ -6033,7 +6053,7 @@ struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, srcu_idx = srcu_read_lock(&kvm->srcu); - bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); + bus = kvm_get_bus_srcu(kvm, bus_idx); if (!bus) goto out_unlock; -- cgit v1.2.3 From 7d9a0273c45962e9a6bc06f3b87eef7c431c1853 Mon Sep 17 00:00:00 2001 From: Keir Fraser Date: Tue, 9 Sep 2025 10:00:07 +0000 Subject: KVM: Avoid synchronize_srcu() in kvm_io_bus_register_dev() Device MMIO registration may happen quite frequently during VM boot, and the SRCU synchronization each time has a measurable effect on VM startup time. In our experiments it can account for around 25% of a VM's startup time. Replace the synchronization with a deferred free of the old kvm_io_bus structure. Tested-by: Li RongQing Signed-off-by: Keir Fraser Signed-off-by: Marc Zyngier --- include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 11 +++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index e7d6111cf254..103be35caf0d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -206,6 +206,7 @@ struct kvm_io_range { struct kvm_io_bus { int dev_count; int ioeventfd_count; + struct rcu_head rcu; struct kvm_io_range range[]; }; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 870ad8ea93a7..bcef324ccbf2 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1320,6 +1320,7 @@ static void kvm_destroy_vm(struct kvm *kvm) kvm_free_memslots(kvm, &kvm->__memslots[i][1]); } cleanup_srcu_struct(&kvm->irq_srcu); + srcu_barrier(&kvm->srcu); cleanup_srcu_struct(&kvm->srcu); #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES xa_destroy(&kvm->mem_attr_array); @@ -5952,6 +5953,13 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, } EXPORT_SYMBOL_GPL(kvm_io_bus_read); +static void __free_bus(struct rcu_head *rcu) +{ + struct kvm_io_bus *bus = container_of(rcu, struct kvm_io_bus, rcu); + + kfree(bus); +} + int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, struct kvm_io_device *dev) { @@ -5990,8 +5998,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, memcpy(new_bus->range + i + 1, bus->range + i, (bus->dev_count - i) * sizeof(struct kvm_io_range)); rcu_assign_pointer(kvm->buses[bus_idx], new_bus); - synchronize_srcu_expedited(&kvm->srcu); - kfree(bus); + call_srcu(&kvm->srcu, &bus->rcu, __free_bus); return 0; } -- cgit v1.2.3 From be975448a45cd024e2b98598eefc0e164ad93f09 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 22 Jul 2025 14:25:35 -0700 Subject: srcu: Document __srcu_read_{,un}lock_fast() implicit RCU readers This commit documents the implicit RCU readers that are implied by the this_cpu_inc() and atomic_long_inc() operations in __srcu_read_lock_fast() and __srcu_read_unlock_fast(). While in the area, fix the documentation of the memory pairing of atomic_long_inc() in __srcu_read_lock_fast(). [ paulmck: Apply Joel Fernandes feedback. ] Signed-off-by: Paul E. McKenney Cc: Mathieu Desnoyers Cc: Steven Rostedt Cc: Sebastian Andrzej Siewior Cc: --- include/linux/srcutree.h | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 4d2fee4d3828..42098e0fa0b7 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -232,9 +232,27 @@ static inline struct srcu_ctr __percpu *__srcu_ctr_to_ptr(struct srcu_struct *ss * srcu_read_unlock_fast(). * * Note that both this_cpu_inc() and atomic_long_inc() are RCU read-side - * critical sections either because they disables interrupts, because they - * are a single instruction, or because they are a read-modify-write atomic - * operation, depending on the whims of the architecture. + * critical sections either because they disables interrupts, because + * they are a single instruction, or because they are read-modify-write + * atomic operations, depending on the whims of the architecture. + * This matters because the SRCU-fast grace-period mechanism uses either + * synchronize_rcu() or synchronize_rcu_expedited(), that is, RCU, + * *not* SRCU, in order to eliminate the need for the read-side smp_mb() + * invocations that are used by srcu_read_lock() and srcu_read_unlock(). + * The __srcu_read_unlock_fast() function also relies on this same RCU + * (again, *not* SRCU) trick to eliminate the need for smp_mb(). + * + * The key point behind this RCU trick is that if any part of a given + * RCU reader precedes the beginning of a given RCU grace period, then + * the entirety of that RCU reader and everything preceding it happens + * before the end of that same RCU grace period. Similarly, if any part + * of a given RCU reader follows the end of a given RCU grace period, + * then the entirety of that RCU reader and everything following it + * happens after the beginning of that same RCU grace period. Therefore, + * the operations labeled Y in __srcu_read_lock_fast() and those labeled Z + * in __srcu_read_unlock_fast() are ordered against the corresponding SRCU + * read-side critical section from the viewpoint of the SRCU grace period. + * This is all the ordering that is required, hence no calls to smp_mb(). * * This means that __srcu_read_lock_fast() is not all that fast * on architectures that support NMIs but do not supply NMI-safe @@ -245,9 +263,9 @@ static inline struct srcu_ctr __percpu notrace *__srcu_read_lock_fast(struct src struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp); if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE)) - this_cpu_inc(scp->srcu_locks.counter); /* Y */ + this_cpu_inc(scp->srcu_locks.counter); // Y, and implicit RCU reader. else - atomic_long_inc(raw_cpu_ptr(&scp->srcu_locks)); /* Z */ + atomic_long_inc(raw_cpu_ptr(&scp->srcu_locks)); // Y, and implicit RCU reader. barrier(); /* Avoid leaking the critical section. */ return scp; } @@ -258,23 +276,17 @@ static inline struct srcu_ctr __percpu notrace *__srcu_read_lock_fast(struct src * different CPU than that which was incremented by the corresponding * srcu_read_lock_fast(), but it must be within the same task. * - * Note that both this_cpu_inc() and atomic_long_inc() are RCU read-side - * critical sections either because they disables interrupts, because they - * are a single instruction, or because they are a read-modify-write atomic - * operation, depending on the whims of the architecture. - * - * This means that __srcu_read_unlock_fast() is not all that fast - * on architectures that support NMIs but do not supply NMI-safe - * implementations of this_cpu_inc(). + * Please see the __srcu_read_lock_fast() function's header comment for + * information on implicit RCU readers and NMI safety. */ static inline void notrace __srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp) { barrier(); /* Avoid leaking the critical section. */ if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE)) - this_cpu_inc(scp->srcu_unlocks.counter); /* Z */ + this_cpu_inc(scp->srcu_unlocks.counter); // Z, and implicit RCU reader. else - atomic_long_inc(raw_cpu_ptr(&scp->srcu_unlocks)); /* Z */ + atomic_long_inc(raw_cpu_ptr(&scp->srcu_unlocks)); // Z, and implicit RCU reader. } void __srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor); -- cgit v1.2.3 From 5e0ae59159e3a07391a35865bb79ff335473fa79 Mon Sep 17 00:00:00 2001 From: Angela Czubak Date: Mon, 18 Aug 2025 23:08:42 +0000 Subject: HID: add haptics page defines Introduce haptic usages as defined in HID Usage Tables specification. Add HID units for newton and gram. Signed-off-by: Angela Czubak Co-developed-by: Jonathan Denose Signed-off-by: Jonathan Denose Signed-off-by: Benjamin Tissoires --- include/linux/hid.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index 2cc4f1e4ea96..10f113c758fe 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -156,6 +156,7 @@ struct hid_item { #define HID_UP_TELEPHONY 0x000b0000 #define HID_UP_CONSUMER 0x000c0000 #define HID_UP_DIGITIZER 0x000d0000 +#define HID_UP_HAPTIC 0x000e0000 #define HID_UP_PID 0x000f0000 #define HID_UP_BATTERY 0x00850000 #define HID_UP_CAMERA 0x00900000 @@ -316,6 +317,28 @@ struct hid_item { #define HID_DG_TOOLSERIALNUMBER 0x000d005b #define HID_DG_LATENCYMODE 0x000d0060 +#define HID_HP_SIMPLECONTROLLER 0x000e0001 +#define HID_HP_WAVEFORMLIST 0x000e0010 +#define HID_HP_DURATIONLIST 0x000e0011 +#define HID_HP_AUTOTRIGGER 0x000e0020 +#define HID_HP_MANUALTRIGGER 0x000e0021 +#define HID_HP_AUTOTRIGGERASSOCIATEDCONTROL 0x000e0022 +#define HID_HP_INTENSITY 0x000e0023 +#define HID_HP_REPEATCOUNT 0x000e0024 +#define HID_HP_RETRIGGERPERIOD 0x000e0025 +#define HID_HP_WAVEFORMVENDORPAGE 0x000e0026 +#define HID_HP_WAVEFORMVENDORID 0x000e0027 +#define HID_HP_WAVEFORMCUTOFFTIME 0x000e0028 +#define HID_HP_WAVEFORMNONE 0x000e1001 +#define HID_HP_WAVEFORMSTOP 0x000e1002 +#define HID_HP_WAVEFORMCLICK 0x000e1003 +#define HID_HP_WAVEFORMBUZZCONTINUOUS 0x000e1004 +#define HID_HP_WAVEFORMRUMBLECONTINUOUS 0x000e1005 +#define HID_HP_WAVEFORMPRESS 0x000e1006 +#define HID_HP_WAVEFORMRELEASE 0x000e1007 +#define HID_HP_VENDORWAVEFORMMIN 0x000e2001 +#define HID_HP_VENDORWAVEFORMMAX 0x000e2fff + #define HID_BAT_ABSOLUTESTATEOFCHARGE 0x00850065 #define HID_BAT_CHARGING 0x00850044 @@ -423,6 +446,12 @@ struct hid_item { #define HID_REPORT_PROTOCOL 1 #define HID_BOOT_PROTOCOL 0 +/* + * HID units + */ +#define HID_UNIT_GRAM 0x0101 +#define HID_UNIT_NEWTON 0xe111 + /* * This is the global environment of the parser. This information is * persistent for main-items. The global environment can be saved and -- cgit v1.2.3 From 4e584ac737884ef0fd80f6836b917972fad86b17 Mon Sep 17 00:00:00 2001 From: Angela Czubak Date: Mon, 18 Aug 2025 23:08:50 +0000 Subject: Input: MT - add INPUT_MT_TOTAL_FORCE flags Add a flag to generate ABS_PRESSURE as sum of ABS_MT_PRESSURE across all slots. This flag should be set if one knows a device reports true force and would like to report total force to the userspace. Signed-off-by: Angela Czubak Co-developed-by: Jonathan Denose Signed-off-by: Jonathan Denose Acked-by: Dmitry Torokhov Signed-off-by: Benjamin Tissoires --- drivers/input/input-mt.c | 14 ++++++++++---- include/linux/input/mt.h | 1 + 2 files changed, 11 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/input-mt.c b/drivers/input/input-mt.c index 337006dd9dcf..09f518897d4a 100644 --- a/drivers/input/input-mt.c +++ b/drivers/input/input-mt.c @@ -198,6 +198,7 @@ void input_mt_report_pointer_emulation(struct input_dev *dev, bool use_count) struct input_mt *mt = dev->mt; struct input_mt_slot *oldest; int oldid, count, i; + int p, reported_p = 0; if (!mt) return; @@ -216,6 +217,13 @@ void input_mt_report_pointer_emulation(struct input_dev *dev, bool use_count) oldest = ps; oldid = id; } + if (test_bit(ABS_MT_PRESSURE, dev->absbit)) { + p = input_mt_get_value(ps, ABS_MT_PRESSURE); + if (mt->flags & INPUT_MT_TOTAL_FORCE) + reported_p += p; + else if (oldid == id) + reported_p = p; + } count++; } @@ -245,10 +253,8 @@ void input_mt_report_pointer_emulation(struct input_dev *dev, bool use_count) input_event(dev, EV_ABS, ABS_X, x); input_event(dev, EV_ABS, ABS_Y, y); - if (test_bit(ABS_MT_PRESSURE, dev->absbit)) { - int p = input_mt_get_value(oldest, ABS_MT_PRESSURE); - input_event(dev, EV_ABS, ABS_PRESSURE, p); - } + if (test_bit(ABS_MT_PRESSURE, dev->absbit)) + input_event(dev, EV_ABS, ABS_PRESSURE, reported_p); } else { if (test_bit(ABS_MT_PRESSURE, dev->absbit)) input_event(dev, EV_ABS, ABS_PRESSURE, 0); diff --git a/include/linux/input/mt.h b/include/linux/input/mt.h index 2cf89a538b18..d30286298a00 100644 --- a/include/linux/input/mt.h +++ b/include/linux/input/mt.h @@ -17,6 +17,7 @@ #define INPUT_MT_DROP_UNUSED 0x0004 /* drop contacts not seen in frame */ #define INPUT_MT_TRACK 0x0008 /* use in-kernel tracking */ #define INPUT_MT_SEMI_MT 0x0010 /* semi-mt device, finger count handled manually */ +#define INPUT_MT_TOTAL_FORCE 0x0020 /* calculate total force from slots pressure */ /** * struct input_mt_slot - represents the state of an input MT slot -- cgit v1.2.3 From 880fcc329e2473ba02ffbc446fcd403972ab1fca Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Tue, 9 Sep 2025 00:03:24 -0700 Subject: drivers/perf: riscv: Export PMU event info function The event mapping function can be used in event info function to find out the corresponding SBI PMU event encoding during the get_event_info function as well. Refactor and export it so that it can be invoked from kvm and internal driver. Signed-off-by: Atish Patra Reviewed-by: Anup Patel Acked-by: Paul Walmsley Link: https://lore.kernel.org/r/20250909-pmu_event_info-v6-5-d8f80cacb884@rivosinc.com Signed-off-by: Anup Patel --- drivers/perf/riscv_pmu_sbi.c | 122 ++++++++++++++++++++++------------------- include/linux/perf/riscv_pmu.h | 1 + 2 files changed, 68 insertions(+), 55 deletions(-) (limited to 'include/linux') diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index a6c479f853e1..0392900d828e 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -100,6 +100,7 @@ static unsigned int riscv_pmu_irq; /* Cache the available counters in a bitmask */ static unsigned long cmask; +static int pmu_event_find_cache(u64 config); struct sbi_pmu_event_data { union { union { @@ -412,6 +413,71 @@ static bool pmu_sbi_ctr_is_fw(int cidx) return (info->type == SBI_PMU_CTR_TYPE_FW) ? true : false; } +int riscv_pmu_get_event_info(u32 type, u64 config, u64 *econfig) +{ + int ret = -ENOENT; + + switch (type) { + case PERF_TYPE_HARDWARE: + if (config >= PERF_COUNT_HW_MAX) + return -EINVAL; + ret = pmu_hw_event_map[config].event_idx; + break; + case PERF_TYPE_HW_CACHE: + ret = pmu_event_find_cache(config); + break; + case PERF_TYPE_RAW: + /* + * As per SBI v0.3 specification, + * -- the upper 16 bits must be unused for a hardware raw event. + * As per SBI v2.0 specification, + * -- the upper 8 bits must be unused for a hardware raw event. + * Bits 63:62 are used to distinguish between raw events + * 00 - Hardware raw event + * 10 - SBI firmware events + * 11 - Risc-V platform specific firmware event + */ + switch (config >> 62) { + case 0: + if (sbi_v3_available) { + /* Return error any bits [56-63] is set as it is not allowed by the spec */ + if (!(config & ~RISCV_PMU_RAW_EVENT_V2_MASK)) { + if (econfig) + *econfig = config & RISCV_PMU_RAW_EVENT_V2_MASK; + ret = RISCV_PMU_RAW_EVENT_V2_IDX; + } + /* Return error any bits [48-63] is set as it is not allowed by the spec */ + } else if (!(config & ~RISCV_PMU_RAW_EVENT_MASK)) { + if (econfig) + *econfig = config & RISCV_PMU_RAW_EVENT_MASK; + ret = RISCV_PMU_RAW_EVENT_IDX; + } + break; + case 2: + ret = (config & 0xFFFF) | (SBI_PMU_EVENT_TYPE_FW << 16); + break; + case 3: + /* + * For Risc-V platform specific firmware events + * Event code - 0xFFFF + * Event data - raw event encoding + */ + ret = SBI_PMU_EVENT_TYPE_FW << 16 | RISCV_PLAT_FW_EVENT; + if (econfig) + *econfig = config & RISCV_PMU_PLAT_FW_EVENT_MASK; + break; + default: + break; + } + break; + default: + break; + } + + return ret; +} +EXPORT_SYMBOL_GPL(riscv_pmu_get_event_info); + /* * Returns the counter width of a programmable counter and number of hardware * counters. As we don't support heterogeneous CPUs yet, it is okay to just @@ -577,7 +643,6 @@ static int pmu_sbi_event_map(struct perf_event *event, u64 *econfig) { u32 type = event->attr.type; u64 config = event->attr.config; - int ret = -ENOENT; /* * Ensure we are finished checking standard hardware events for @@ -585,60 +650,7 @@ static int pmu_sbi_event_map(struct perf_event *event, u64 *econfig) */ flush_work(&check_std_events_work); - switch (type) { - case PERF_TYPE_HARDWARE: - if (config >= PERF_COUNT_HW_MAX) - return -EINVAL; - ret = pmu_hw_event_map[event->attr.config].event_idx; - break; - case PERF_TYPE_HW_CACHE: - ret = pmu_event_find_cache(config); - break; - case PERF_TYPE_RAW: - /* - * As per SBI v0.3 specification, - * -- the upper 16 bits must be unused for a hardware raw event. - * As per SBI v2.0 specification, - * -- the upper 8 bits must be unused for a hardware raw event. - * Bits 63:62 are used to distinguish between raw events - * 00 - Hardware raw event - * 10 - SBI firmware events - * 11 - Risc-V platform specific firmware event - */ - - switch (config >> 62) { - case 0: - if (sbi_v3_available) { - if (!(config & ~RISCV_PMU_RAW_EVENT_V2_MASK)) { - *econfig = config & RISCV_PMU_RAW_EVENT_V2_MASK; - ret = RISCV_PMU_RAW_EVENT_V2_IDX; - } - } else if (!(config & ~RISCV_PMU_RAW_EVENT_MASK)) { - *econfig = config & RISCV_PMU_RAW_EVENT_MASK; - ret = RISCV_PMU_RAW_EVENT_IDX; - } - break; - case 2: - ret = (config & 0xFFFF) | (SBI_PMU_EVENT_TYPE_FW << 16); - break; - case 3: - /* - * For Risc-V platform specific firmware events - * Event code - 0xFFFF - * Event data - raw event encoding - */ - ret = SBI_PMU_EVENT_TYPE_FW << 16 | RISCV_PLAT_FW_EVENT; - *econfig = config & RISCV_PMU_PLAT_FW_EVENT_MASK; - break; - default: - break; - } - break; - default: - break; - } - - return ret; + return riscv_pmu_get_event_info(type, config, econfig); } static void pmu_sbi_snapshot_free(struct riscv_pmu *pmu) diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h index 701974639ff2..f82a28040594 100644 --- a/include/linux/perf/riscv_pmu.h +++ b/include/linux/perf/riscv_pmu.h @@ -89,6 +89,7 @@ static inline void riscv_pmu_legacy_skip_init(void) {}; struct riscv_pmu *riscv_pmu_alloc(void); #ifdef CONFIG_RISCV_PMU_SBI int riscv_pmu_get_hpm_info(u32 *hw_ctr_width, u32 *num_hw_ctr); +int riscv_pmu_get_event_info(u32 type, u64 config, u64 *econfig); #endif #endif /* CONFIG_RISCV_PMU */ -- cgit v1.2.3 From 3baeae36039afc233d4a42d6ff4aa7019892619f Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Fri, 29 Aug 2025 16:10:57 +0300 Subject: PCI: Use pci_release_resource() instead of release_resource() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A few places in setup-bus.c call release_resource() directly and end up duplicating functionality from pci_release_resource() such as parent check, logging, and clearing the resource. Worse yet, the way the resource is cleared is inconsistent between different sites. Convert release_resource() calls into pci_release_resource() to remove code duplication. This will also make the resource start, end, and flags behavior consistent, i.e., start address is cleared, and only IORESOURCE_UNSET is asserted for the resource. While at it, eliminate the unnecessary initialization of idx variable in pci_bridge_release_resources(). Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20250829131113.36754-9-ilpo.jarvinen@linux.intel.com --- drivers/pci/setup-bus.c | 46 ++++++++++++++-------------------------------- drivers/pci/setup-res.c | 11 ++++++++--- include/linux/pci.h | 2 +- 3 files changed, 23 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 6bdc1af887da..b62465665abc 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -473,8 +473,6 @@ static void __assign_resources_sorted(struct list_head *head, struct pci_dev_resource *dev_res, *tmp_res, *dev_res2; struct resource *res; struct pci_dev *dev; - const char *res_name; - int idx; unsigned long fail_type; resource_size_t add_align, align; @@ -582,14 +580,7 @@ assign: res = dev_res->res; dev = dev_res->dev; - if (!res->parent) - continue; - - idx = pci_resource_num(dev, res); - res_name = pci_resource_name(dev, idx); - pci_dbg(dev, "%s %pR: releasing\n", res_name, res); - - release_resource(res); + pci_release_resource(dev, pci_resource_num(dev, res)); restore_dev_resource(dev_res); } /* Restore start/end/flags from saved list */ @@ -1732,7 +1723,7 @@ static void pci_bridge_release_resources(struct pci_bus *bus, struct resource *r; unsigned int old_flags; struct resource *b_res; - int idx = 1; + int idx, ret; b_res = &dev->resource[PCI_BRIDGE_RESOURCES]; @@ -1766,21 +1757,18 @@ static void pci_bridge_release_resources(struct pci_bus *bus, /* If there are children, release them all */ release_child_resources(r); - if (!release_resource(r)) { - type = old_flags = r->flags & PCI_RES_TYPE_MASK; - pci_info(dev, "resource %d %pR released\n", - PCI_BRIDGE_RESOURCES + idx, r); - /* Keep the old size */ - resource_set_range(r, 0, resource_size(r)); - r->flags = 0; - /* Avoiding touch the one without PREF */ - if (type & IORESOURCE_PREFETCH) - type = IORESOURCE_PREFETCH; - __pci_setup_bridge(bus, type); - /* For next child res under same bridge */ - r->flags = old_flags; - } + type = old_flags = r->flags & PCI_RES_TYPE_MASK; + ret = pci_release_resource(dev, PCI_BRIDGE_RESOURCES + idx); + if (ret) + return; + + /* Avoiding touch the one without PREF */ + if (type & IORESOURCE_PREFETCH) + type = IORESOURCE_PREFETCH; + __pci_setup_bridge(bus, type); + /* For next child res under same bridge */ + r->flags = old_flags; } enum release_type { @@ -2425,7 +2413,6 @@ int pci_reassign_bridge_resources(struct pci_dev *bridge, unsigned long type) for (i = PCI_BRIDGE_RESOURCES; i < PCI_BRIDGE_RESOURCE_END; i++) { struct resource *res = &bridge->resource[i]; - const char *res_name = pci_resource_name(bridge, i); if ((res->flags ^ type) & PCI_RES_TYPE_MASK) continue; @@ -2438,12 +2425,7 @@ int pci_reassign_bridge_resources(struct pci_dev *bridge, unsigned long type) if (ret) goto cleanup; - pci_info(bridge, "%s %pR: releasing\n", res_name, res); - - if (res->parent) - release_resource(res); - res->start = 0; - res->end = 0; + pci_release_resource(bridge, i); break; } if (i == PCI_BRIDGE_RESOURCE_END) diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index d2b3ed51e880..0468c058b598 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -406,20 +406,25 @@ int pci_reassign_resource(struct pci_dev *dev, int resno, return 0; } -void pci_release_resource(struct pci_dev *dev, int resno) +int pci_release_resource(struct pci_dev *dev, int resno) { struct resource *res = pci_resource_n(dev, resno); const char *res_name = pci_resource_name(dev, resno); + int ret; if (!res->parent) - return; + return 0; pci_info(dev, "%s %pR: releasing\n", res_name, res); - release_resource(res); + ret = release_resource(res); + if (ret) + return ret; res->end = resource_size(res) - 1; res->start = 0; res->flags |= IORESOURCE_UNSET; + + return 0; } EXPORT_SYMBOL(pci_release_resource); diff --git a/include/linux/pci.h b/include/linux/pci.h index 59876de13860..275df4058767 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1417,7 +1417,7 @@ void pci_reset_secondary_bus(struct pci_dev *dev); void pcibios_reset_secondary_bus(struct pci_dev *dev); void pci_update_resource(struct pci_dev *dev, int resno); int __must_check pci_assign_resource(struct pci_dev *dev, int i); -void pci_release_resource(struct pci_dev *dev, int resno); +int pci_release_resource(struct pci_dev *dev, int resno); static inline int pci_rebar_bytes_to_size(u64 bytes) { bytes = roundup_pow_of_two(bytes); -- cgit v1.2.3 From 4292a1e45fd464551efac7b2b52fd3606e956c28 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Fri, 29 Aug 2025 16:11:09 +0300 Subject: PCI: Refactor distributing available memory to use loops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pci_bus_distribute_available_resources() and pci_bridge_distribute_available_resources() retain bridge window resources and related data needed for distributing the available window in independent variables for io, memory, and prefetchable memory windows. The code is essentially the same for all of them and therefore repeated three times with different variable names. Refactor pci_bus_distribute_available_resources() to take an array. This is complicated slightly by the function taking advantage of passing the struct as value, which cannot be done for arrays in C. Therefore, copy the data into a local array in the stack in the first loop. Variable names are (hopefully) improved slightly as well. Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20250829131113.36754-21-ilpo.jarvinen@linux.intel.com --- drivers/pci/setup-bus.c | 162 +++++++++++++++++++++--------------------------- include/linux/pci.h | 3 +- 2 files changed, 74 insertions(+), 91 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 720159bca54d..3bc329b1b923 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -2059,15 +2059,16 @@ static void remove_dev_resource(struct resource *avail, struct pci_dev *dev, avail->start = min(avail->start + tmp, avail->end + 1); } -static void remove_dev_resources(struct pci_dev *dev, struct resource *io, - struct resource *mmio, - struct resource *mmio_pref) +static void remove_dev_resources(struct pci_dev *dev, + struct resource available[PCI_P2P_BRIDGE_RESOURCE_NUM]) { + struct resource *mmio_pref = &available[PCI_BUS_BRIDGE_PREF_MEM_WINDOW]; struct resource *res; pci_dev_for_each_resource(dev, res) { if (resource_type(res) == IORESOURCE_IO) { - remove_dev_resource(io, dev, res); + remove_dev_resource(&available[PCI_BUS_BRIDGE_IO_WINDOW], + dev, res); } else if (resource_type(res) == IORESOURCE_MEM) { /* @@ -2081,10 +2082,13 @@ static void remove_dev_resources(struct pci_dev *dev, struct resource *io, */ if ((res->flags & IORESOURCE_PREFETCH) && ((res->flags & IORESOURCE_MEM_64) == - (mmio_pref->flags & IORESOURCE_MEM_64))) - remove_dev_resource(mmio_pref, dev, res); - else - remove_dev_resource(mmio, dev, res); + (mmio_pref->flags & IORESOURCE_MEM_64))) { + remove_dev_resource(&available[PCI_BUS_BRIDGE_PREF_MEM_WINDOW], + dev, res); + } else { + remove_dev_resource(&available[PCI_BUS_BRIDGE_MEM_WINDOW], + dev, res); + } } } } @@ -2099,45 +2103,39 @@ static void remove_dev_resources(struct pci_dev *dev, struct resource *io, * shared with the bridges. */ static void pci_bus_distribute_available_resources(struct pci_bus *bus, - struct list_head *add_list, - struct resource io, - struct resource mmio, - struct resource mmio_pref) + struct list_head *add_list, + struct resource available_in[PCI_P2P_BRIDGE_RESOURCE_NUM]) { + struct resource available[PCI_P2P_BRIDGE_RESOURCE_NUM]; unsigned int normal_bridges = 0, hotplug_bridges = 0; - struct resource *io_res, *mmio_res, *mmio_pref_res; struct pci_dev *dev, *bridge = bus->self; - resource_size_t io_per_b, mmio_per_b, mmio_pref_per_b, align; + resource_size_t per_bridge[PCI_P2P_BRIDGE_RESOURCE_NUM]; + resource_size_t align; + int i; - io_res = &bridge->resource[PCI_BRIDGE_IO_WINDOW]; - mmio_res = &bridge->resource[PCI_BRIDGE_MEM_WINDOW]; - mmio_pref_res = &bridge->resource[PCI_BRIDGE_PREF_MEM_WINDOW]; + for (i = 0; i < PCI_P2P_BRIDGE_RESOURCE_NUM; i++) { + struct resource *res = pci_bus_resource_n(bus, i); - /* - * The alignment of this bridge is yet to be considered, hence it must - * be done now before extending its bridge window. - */ - align = pci_resource_alignment(bridge, io_res); - if (!io_res->parent && align) - io.start = min(ALIGN(io.start, align), io.end + 1); - - align = pci_resource_alignment(bridge, mmio_res); - if (!mmio_res->parent && align) - mmio.start = min(ALIGN(mmio.start, align), mmio.end + 1); + available[i] = available_in[i]; - align = pci_resource_alignment(bridge, mmio_pref_res); - if (!mmio_pref_res->parent && align) - mmio_pref.start = min(ALIGN(mmio_pref.start, align), - mmio_pref.end + 1); + /* + * The alignment of this bridge is yet to be considered, + * hence it must be done now before extending its bridge + * window. + */ + align = pci_resource_alignment(bridge, res); + if (!res->parent && align) + available[i].start = min(ALIGN(available[i].start, align), + available[i].end + 1); - /* - * Now that we have adjusted for alignment, update the bridge window - * resources to fill as much remaining resource space as possible. - */ - adjust_bridge_window(bridge, io_res, add_list, resource_size(&io)); - adjust_bridge_window(bridge, mmio_res, add_list, resource_size(&mmio)); - adjust_bridge_window(bridge, mmio_pref_res, add_list, - resource_size(&mmio_pref)); + /* + * Now that we have adjusted for alignment, update the + * bridge window resources to fill as much remaining + * resource space as possible. + */ + adjust_bridge_window(bridge, res, add_list, + resource_size(&available[i])); + } /* * Calculate how many hotplug bridges and normal bridges there @@ -2161,7 +2159,7 @@ static void pci_bus_distribute_available_resources(struct pci_bus *bus, */ list_for_each_entry(dev, &bus->devices, bus_list) { if (!dev->is_virtfn) - remove_dev_resources(dev, &io, &mmio, &mmio_pref); + remove_dev_resources(dev, available); } /* @@ -2173,16 +2171,9 @@ static void pci_bus_distribute_available_resources(struct pci_bus *bus, * split between non-hotplug bridges. This is to allow possible * hotplug bridges below them to get the extra space as well. */ - if (hotplug_bridges) { - io_per_b = div64_ul(resource_size(&io), hotplug_bridges); - mmio_per_b = div64_ul(resource_size(&mmio), hotplug_bridges); - mmio_pref_per_b = div64_ul(resource_size(&mmio_pref), - hotplug_bridges); - } else { - io_per_b = div64_ul(resource_size(&io), normal_bridges); - mmio_per_b = div64_ul(resource_size(&mmio), normal_bridges); - mmio_pref_per_b = div64_ul(resource_size(&mmio_pref), - normal_bridges); + for (i = 0; i < PCI_P2P_BRIDGE_RESOURCE_NUM; i++) { + per_bridge[i] = div64_ul(resource_size(&available[i]), + hotplug_bridges ?: normal_bridges); } for_each_pci_bridge(dev, bus) { @@ -2195,49 +2186,41 @@ static void pci_bus_distribute_available_resources(struct pci_bus *bus, if (hotplug_bridges && !dev->is_hotplug_bridge) continue; - res = &dev->resource[PCI_BRIDGE_IO_WINDOW]; + for (i = 0; i < PCI_P2P_BRIDGE_RESOURCE_NUM; i++) { + res = pci_bus_resource_n(bus, i); - /* - * Make sure the split resource space is properly aligned - * for bridge windows (align it down to avoid going above - * what is available). - */ - align = pci_resource_alignment(dev, res); - resource_set_size(&io, ALIGN_DOWN_IF_NONZERO(io_per_b, align)); - - /* - * The x_per_b holds the extra resource space that can be - * added for each bridge but there is the minimal already - * reserved as well so adjust x.start down accordingly to - * cover the whole space. - */ - io.start -= resource_size(res); - - res = &dev->resource[PCI_BRIDGE_MEM_WINDOW]; - align = pci_resource_alignment(dev, res); - resource_set_size(&mmio, - ALIGN_DOWN_IF_NONZERO(mmio_per_b,align)); - mmio.start -= resource_size(res); + /* + * Make sure the split resource space is properly + * aligned for bridge windows (align it down to + * avoid going above what is available). + */ + align = pci_resource_alignment(dev, res); + resource_set_size(&available[i], + ALIGN_DOWN_IF_NONZERO(per_bridge[i], + align)); - res = &dev->resource[PCI_BRIDGE_PREF_MEM_WINDOW]; - align = pci_resource_alignment(dev, res); - resource_set_size(&mmio_pref, - ALIGN_DOWN_IF_NONZERO(mmio_pref_per_b, align)); - mmio_pref.start -= resource_size(res); + /* + * The per_bridge holds the extra resource space + * that can be added for each bridge but there is + * the minimal already reserved as well so adjust + * x.start down accordingly to cover the whole + * space. + */ + available[i].start -= resource_size(res); + } - pci_bus_distribute_available_resources(b, add_list, io, mmio, - mmio_pref); + pci_bus_distribute_available_resources(b, add_list, available); - io.start += io.end + 1; - mmio.start += mmio.end + 1; - mmio_pref.start += mmio_pref.end + 1; + for (i = 0; i < PCI_P2P_BRIDGE_RESOURCE_NUM; i++) + available[i].start += available[i].end + 1; } } static void pci_bridge_distribute_available_resources(struct pci_dev *bridge, struct list_head *add_list) { - struct resource available_io, available_mmio, available_mmio_pref; + struct resource *res, available[PCI_P2P_BRIDGE_RESOURCE_NUM]; + unsigned int i; if (!bridge->is_hotplug_bridge) return; @@ -2245,14 +2228,13 @@ static void pci_bridge_distribute_available_resources(struct pci_dev *bridge, pci_dbg(bridge, "distributing available resources\n"); /* Take the initial extra resources from the hotplug port */ - available_io = bridge->resource[PCI_BRIDGE_IO_WINDOW]; - available_mmio = bridge->resource[PCI_BRIDGE_MEM_WINDOW]; - available_mmio_pref = bridge->resource[PCI_BRIDGE_PREF_MEM_WINDOW]; + for (i = 0; i < PCI_P2P_BRIDGE_RESOURCE_NUM; i++) { + res = pci_resource_n(bridge, PCI_BRIDGE_RESOURCES + i); + available[i] = *res; + } pci_bus_distribute_available_resources(bridge->subordinate, - add_list, available_io, - available_mmio, - available_mmio_pref); + add_list, available); } static bool pci_bridge_resources_not_assigned(struct pci_dev *dev) diff --git a/include/linux/pci.h b/include/linux/pci.h index 275df4058767..723e9cede69d 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -119,7 +119,8 @@ enum { #define PCI_CB_BRIDGE_MEM_1_WINDOW (PCI_BRIDGE_RESOURCES + 3) /* Total number of bridge resources for P2P and CardBus */ -#define PCI_BRIDGE_RESOURCE_NUM 4 +#define PCI_P2P_BRIDGE_RESOURCE_NUM 3 +#define PCI_BRIDGE_RESOURCE_NUM 4 /* Resources assigned to buses behind the bridge */ PCI_BRIDGE_RESOURCES, -- cgit v1.2.3 From ea6bb47fd6a4c5a332f9349c39bf7462e3e7a35b Mon Sep 17 00:00:00 2001 From: Alan Borzeszkowski Date: Wed, 27 Aug 2025 13:56:46 +0200 Subject: thunderbolt: Update thunderbolt.h header file Make Thunderbolt header file compliant with current kernel-doc standards. No functional changes. Signed-off-by: Alan Borzeszkowski Signed-off-by: Mika Westerberg --- include/linux/thunderbolt.h | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 75247486616b..0ba112175bb3 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -213,7 +213,7 @@ enum tb_link_width { * queried first * @service_ids: Used to generate IDs for the services * @in_hopids: Input HopIDs for DMA tunneling - * @out_hopids; Output HopIDs for DMA tunneling + * @out_hopids: Output HopIDs for DMA tunneling * @local_property_block: Local block of properties * @local_property_block_gen: Generation of @local_property_block * @local_property_block_len: Length of the @local_property_block in dwords @@ -356,7 +356,7 @@ int tb_xdomain_request(struct tb_xdomain *xd, const void *request, unsigned int timeout_msec); /** - * tb_protocol_handler - Protocol specific handler + * struct tb_protocol_handler - Protocol specific handler * @uuid: XDomain messages with this UUID are dispatched to this handler * @callback: Callback called with the XDomain message. Returning %1 * here tells the XDomain core that the message was handled @@ -437,7 +437,7 @@ static inline struct tb_service *tb_to_service(struct device *dev) } /** - * tb_service_driver - Thunderbolt service driver + * struct tb_service_driver - Thunderbolt service driver * @driver: Driver structure * @probe: Called when the driver is probed * @remove: Called when the driver is removed (optional) @@ -519,6 +519,7 @@ struct tb_nhi { * @head: Head of the ring (write next descriptor here) * @tail: Tail of the ring (complete next descriptor here) * @descriptors: Allocated descriptors for this ring + * @descriptors_dma: DMA address of descriptors for this ring * @queue: Queue holding frames to be transferred over this ring * @in_flight: Queue holding frames that are currently in flight * @work: Interrupt work structure @@ -571,12 +572,12 @@ typedef void (*ring_cb)(struct tb_ring *, struct ring_frame *, bool canceled); /** * enum ring_desc_flags - Flags for DMA ring descriptor - * %RING_DESC_ISOCH: Enable isonchronous DMA (Tx only) - * %RING_DESC_CRC_ERROR: In frame mode CRC check failed for the frame (Rx only) - * %RING_DESC_COMPLETED: Descriptor completed (set by NHI) - * %RING_DESC_POSTED: Always set this - * %RING_DESC_BUFFER_OVERRUN: RX buffer overrun - * %RING_DESC_INTERRUPT: Request an interrupt on completion + * @RING_DESC_ISOCH: Enable isonchronous DMA (Tx only) + * @RING_DESC_CRC_ERROR: In frame mode CRC check failed for the frame (Rx only) + * @RING_DESC_COMPLETED: Descriptor completed (set by NHI) + * @RING_DESC_POSTED: Always set this + * @RING_DESC_BUFFER_OVERRUN: RX buffer overrun + * @RING_DESC_INTERRUPT: Request an interrupt on completion */ enum ring_desc_flags { RING_DESC_ISOCH = 0x1, @@ -636,7 +637,7 @@ int __tb_ring_enqueue(struct tb_ring *ring, struct ring_frame *frame); * If ring_stop() is called after the packet has been enqueued * @frame->callback will be called with canceled set to true. * - * Return: Returns %-ESHUTDOWN if ring_stop has been called. Zero otherwise. + * Return: %-ESHUTDOWN if ring_stop() has been called, %0 otherwise. */ static inline int tb_ring_rx(struct tb_ring *ring, struct ring_frame *frame) { @@ -657,7 +658,7 @@ static inline int tb_ring_rx(struct tb_ring *ring, struct ring_frame *frame) * If ring_stop() is called after the packet has been enqueued @frame->callback * will be called with canceled set to true. * - * Return: Returns %-ESHUTDOWN if ring_stop has been called. Zero otherwise. + * Return: %-ESHUTDOWN if ring_stop has been called, %0 otherwise. */ static inline int tb_ring_tx(struct tb_ring *ring, struct ring_frame *frame) { @@ -675,6 +676,8 @@ void tb_ring_poll_complete(struct tb_ring *ring); * * Use this function when you are mapping DMA for buffers that are * passed to the ring for sending/receiving. + * + * Return: Pointer to device used for DMA mapping. */ static inline struct device *tb_ring_dma_device(struct tb_ring *ring) { -- cgit v1.2.3 From d1dd75c6500c74b91c5286fd3277710371d3e3ca Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Sat, 13 Sep 2025 16:12:54 +0000 Subject: HID: core: Change hid_driver to use a const char* for name name is never mutated by the core HID stack. Making name a const char* simplifies passing the string from Rust to C. Otherwise, it becomes difficult to pass a 'static lifetime CStr from Rust to a char*, rather than a const char*, due to lack of guarantee that the underlying data of the CStr will not be mutated by the C code. Signed-off-by: Rahul Rameshbabu Signed-off-by: Benjamin Tissoires --- include/linux/hid.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index 2cc4f1e4ea96..426b22ed42b4 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -816,7 +816,7 @@ struct hid_usage_id { * zero from them. */ struct hid_driver { - char *name; + const char *name; const struct hid_device_id *id_table; struct list_head dyn_list; -- cgit v1.2.3 From 45fe729be9a6be326a1ca25af82d34de32ba2ce8 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 16 Sep 2025 10:16:20 +0800 Subject: usb: typec: Stub out typec_switch APIs when CONFIG_TYPEC=n Ease driver development by adding stubs for the typec_switch APIs when CONFIG_TYPEC=n. Copy the same method used for the typec_mux APIs to be consistent. Acked-by: Greg Kroah-Hartman Reviewed-by: Heikki Krogerus Signed-off-by: Stephen Boyd Signed-off-by: Xu Yang Link: https://lore.kernel.org/r/20250916021620.1303995-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec_mux.h | 46 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 41 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/typec_mux.h b/include/linux/usb/typec_mux.h index 2489a7857d8e..aa9ebb7e2fe0 100644 --- a/include/linux/usb/typec_mux.h +++ b/include/linux/usb/typec_mux.h @@ -3,6 +3,7 @@ #ifndef __USB_TYPEC_MUX #define __USB_TYPEC_MUX +#include #include #include @@ -24,16 +25,13 @@ struct typec_switch_desc { void *drvdata; }; +#if IS_ENABLED(CONFIG_TYPEC) + struct typec_switch *fwnode_typec_switch_get(struct fwnode_handle *fwnode); void typec_switch_put(struct typec_switch *sw); int typec_switch_set(struct typec_switch *sw, enum typec_orientation orientation); -static inline struct typec_switch *typec_switch_get(struct device *dev) -{ - return fwnode_typec_switch_get(dev_fwnode(dev)); -} - struct typec_switch_dev * typec_switch_register(struct device *parent, const struct typec_switch_desc *desc); @@ -42,6 +40,44 @@ void typec_switch_unregister(struct typec_switch_dev *sw); void typec_switch_set_drvdata(struct typec_switch_dev *sw, void *data); void *typec_switch_get_drvdata(struct typec_switch_dev *sw); +#else + +static inline struct typec_switch * +fwnode_typec_switch_get(struct fwnode_handle *fwnode) +{ + return NULL; +} + +static inline void typec_switch_put(struct typec_switch *sw) {} + +static inline int typec_switch_set(struct typec_switch *sw, + enum typec_orientation orientation) +{ + return 0; +} + +static inline struct typec_switch_dev * +typec_switch_register(struct device *parent, + const struct typec_switch_desc *desc) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void typec_switch_unregister(struct typec_switch_dev *sw) {} + +static inline void typec_switch_set_drvdata(struct typec_switch_dev *sw, void *data) {} +static inline void *typec_switch_get_drvdata(struct typec_switch_dev *sw) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +#endif /* CONFIG_TYPEC */ + +static inline struct typec_switch *typec_switch_get(struct device *dev) +{ + return fwnode_typec_switch_get(dev_fwnode(dev)); +} + struct typec_mux_state { struct typec_altmode *alt; unsigned long mode; -- cgit v1.2.3 From bfb1d99d969fe3b892db30848aeebfa19d21f57f Mon Sep 17 00:00:00 2001 From: Kuen-Han Tsai Date: Tue, 16 Sep 2025 16:21:32 +0800 Subject: usb: gadget: Store endpoint pointer in usb_request Gadget function drivers often have goto-based error handling in their bind paths, which can be bug-prone. Refactoring these paths to use __free() scope-based cleanup is desirable, but currently blocked. The blocker is that usb_ep_free_request(ep, req) requires two parameters, while the __free() mechanism can only pass a pointer to the request itself. Store an endpoint pointer in the struct usb_request. The pointer is populated centrally in usb_ep_alloc_request() on every successful allocation, making the request object self-contained. Signed-off-by: Kuen-Han Tsai Link: https://lore.kernel.org/r/20250916-ready-v1-1-4997bf277548@google.com Signed-off-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20250916-ready-v1-1-4997bf277548@google.com --- drivers/usb/gadget/udc/core.c | 3 +++ include/linux/usb/gadget.h | 2 ++ 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index d709e24c1fd4..e3d63b8fa0f4 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -194,6 +194,9 @@ struct usb_request *usb_ep_alloc_request(struct usb_ep *ep, req = ep->ops->alloc_request(ep, gfp_flags); + if (req) + req->ep = ep; + trace_usb_ep_alloc_request(ep, req, req ? 0 : -ENOMEM); return req; diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 0f28c5512fcb..0f2079476088 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -32,6 +32,7 @@ struct usb_ep; /** * struct usb_request - describes one i/o request + * @ep: The associated endpoint set by usb_ep_alloc_request(). * @buf: Buffer used for data. Always provide this; some controllers * only use PIO, or don't use DMA for some endpoints. * @dma: DMA address corresponding to 'buf'. If you don't set this @@ -98,6 +99,7 @@ struct usb_ep; */ struct usb_request { + struct usb_ep *ep; void *buf; unsigned length; dma_addr_t dma; -- cgit v1.2.3 From 201c53c687f2b55a7cc6d9f4000af4797860174b Mon Sep 17 00:00:00 2001 From: Kuen-Han Tsai Date: Tue, 16 Sep 2025 16:21:33 +0800 Subject: usb: gadget: Introduce free_usb_request helper Introduce the free_usb_request() function that frees both the request's buffer and the request itself. This function serves as the cleanup callback for DEFINE_FREE() to enable automatic, scope-based cleanup for usb_request pointers. Signed-off-by: Kuen-Han Tsai Link: https://lore.kernel.org/r/20250916-ready-v1-2-4997bf277548@google.com Signed-off-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20250916-ready-v1-2-4997bf277548@google.com --- include/linux/usb/gadget.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 0f2079476088..3aaf19e77558 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -15,6 +15,7 @@ #ifndef __LINUX_USB_GADGET_H #define __LINUX_USB_GADGET_H +#include #include #include #include @@ -293,6 +294,28 @@ static inline void usb_ep_fifo_flush(struct usb_ep *ep) /*-------------------------------------------------------------------------*/ +/** + * free_usb_request - frees a usb_request object and its buffer + * @req: the request being freed + * + * This helper function frees both the request's buffer and the request object + * itself by calling usb_ep_free_request(). Its signature is designed to be used + * with DEFINE_FREE() to enable automatic, scope-based cleanup for usb_request + * pointers. + */ +static inline void free_usb_request(struct usb_request *req) +{ + if (!req) + return; + + kfree(req->buf); + usb_ep_free_request(req->ep, req); +} + +DEFINE_FREE(free_usb_request, struct usb_request *, free_usb_request(_T)) + +/*-------------------------------------------------------------------------*/ + struct usb_dcd_config_params { __u8 bU1devExitLat; /* U1 Device exit Latency */ #define USB_DEFAULT_U1_DEV_EXIT_LAT 0x01 /* Less then 1 microsec */ -- cgit v1.2.3 From 5c5db9efe323dd0b0d7917dbe5b9c0999c95e79e Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Thu, 28 Aug 2025 10:59:43 +0000 Subject: irqchip/gic-v5: Drop has_gcie_v3_compat from gic_kvm_info The presence of FEAT_GCIE_LEGACY is now handled as a CPU feature. Therefore, drop the check and flag from the GIC driver and gic_kvm_info as it is no longer required or used by KVM. Signed-off-by: Sascha Bischoff Acked-by: Thomas Gleixner Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v5.c | 7 ------- include/linux/irqchip/arm-vgic-info.h | 2 -- 2 files changed, 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/irqchip/irq-gic-v5.c b/drivers/irqchip/irq-gic-v5.c index 4bd224f359a7..41ef286c4d78 100644 --- a/drivers/irqchip/irq-gic-v5.c +++ b/drivers/irqchip/irq-gic-v5.c @@ -1062,16 +1062,9 @@ static void gicv5_set_cpuif_idbits(void) #ifdef CONFIG_KVM static struct gic_kvm_info gic_v5_kvm_info __initdata; -static bool __init gicv5_cpuif_has_gcie_legacy(void) -{ - u64 idr0 = read_sysreg_s(SYS_ICC_IDR0_EL1); - return !!FIELD_GET(ICC_IDR0_EL1_GCIE_LEGACY, idr0); -} - static void __init gic_of_setup_kvm_info(struct device_node *node) { gic_v5_kvm_info.type = GIC_V5; - gic_v5_kvm_info.has_gcie_v3_compat = gicv5_cpuif_has_gcie_legacy(); /* GIC Virtual CPU interface maintenance interrupt */ gic_v5_kvm_info.no_maint_irq_mask = false; diff --git a/include/linux/irqchip/arm-vgic-info.h b/include/linux/irqchip/arm-vgic-info.h index ca1713fac6e3..a470a73a805a 100644 --- a/include/linux/irqchip/arm-vgic-info.h +++ b/include/linux/irqchip/arm-vgic-info.h @@ -36,8 +36,6 @@ struct gic_kvm_info { bool has_v4_1; /* Deactivation impared, subpar stuff */ bool no_hw_deactivation; - /* v3 compat support (GICv5 hosts, only) */ - bool has_gcie_v3_compat; }; #ifdef CONFIG_KVM -- cgit v1.2.3 From 652b08afba69d5d26fe91098eb832b1bcc0f91c2 Mon Sep 17 00:00:00 2001 From: Cristian Birsan Date: Thu, 21 Nov 2024 20:16:38 +0200 Subject: ARM: at91: remove default values for PMC_PLL_ACR Remove default values for PMC PLL Analog Control Register(ACR) as the values are specific for each SoC and PLL and load them from PLL characteristics structure Co-developed-by: Andrei Simion Signed-off-by: Andrei Simion Signed-off-by: Cristian Birsan [nicolas.ferre@microchip.com: fix pll acr write sequence, preserve val] Signed-off-by: Nicolas Ferre --- drivers/clk/at91/clk-sam9x60-pll.c | 7 ++----- include/linux/clk/at91_pmc.h | 2 -- 2 files changed, 2 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/clk/at91/clk-sam9x60-pll.c b/drivers/clk/at91/clk-sam9x60-pll.c index a035dc15454b..3dc75a394ce1 100644 --- a/drivers/clk/at91/clk-sam9x60-pll.c +++ b/drivers/clk/at91/clk-sam9x60-pll.c @@ -103,11 +103,8 @@ static int sam9x60_frac_pll_set(struct sam9x60_pll_core *core) (cmul == frac->mul && cfrac == frac->frac)) goto unlock; - /* Recommended value for PMC_PLL_ACR */ - if (core->characteristics->upll) - val = AT91_PMC_PLL_ACR_DEFAULT_UPLL; - else - val = AT91_PMC_PLL_ACR_DEFAULT_PLLA; + /* Load recommended value for PMC_PLL_ACR */ + val = core->characteristics->acr; regmap_write(regmap, AT91_PMC_PLL_ACR, val); regmap_write(regmap, AT91_PMC_PLL_CTRL1, diff --git a/include/linux/clk/at91_pmc.h b/include/linux/clk/at91_pmc.h index 7af499bdbecb..d60ce9708ea2 100644 --- a/include/linux/clk/at91_pmc.h +++ b/include/linux/clk/at91_pmc.h @@ -47,8 +47,6 @@ #define AT91_PMC_PCSR 0x18 /* Peripheral Clock Status Register */ #define AT91_PMC_PLL_ACR 0x18 /* PLL Analog Control Register [for SAM9X60] */ -#define AT91_PMC_PLL_ACR_DEFAULT_UPLL UL(0x12020010) /* Default PLL ACR value for UPLL */ -#define AT91_PMC_PLL_ACR_DEFAULT_PLLA UL(0x00020010) /* Default PLL ACR value for PLLA */ #define AT91_PMC_PLL_ACR_UTMIVR (1 << 12) /* UPLL Voltage regulator Control */ #define AT91_PMC_PLL_ACR_UTMIBG (1 << 13) /* UPLL Bandgap Control */ -- cgit v1.2.3 From 6b88293aae7fb78872e5cc1ec36e2f750ae12e38 Mon Sep 17 00:00:00 2001 From: Markus Stockhausen Date: Wed, 10 Sep 2025 14:32:58 -0400 Subject: mtd: nand: move nand_check_erased_ecc_chunk() to nand/core The check function for bitflips in erased blocks will be needed by the Realtek ECC engine driver (which is currently under development). Right now it is located in raw/nand_base.c. While this is sufficient for the current usecases, there is no real dependency for an ECC engine on the raw nand library. Move the function over to a more generic place in core library. Suggested-by: Miquel Raynal Signed-off-by: Markus Stockhausen Signed-off-by: Miquel Raynal --- drivers/mtd/nand/core.c | 131 +++++++++++++++++++++++++++++++++++++++ drivers/mtd/nand/raw/nand_base.c | 131 --------------------------------------- include/linux/mtd/nand.h | 5 ++ include/linux/mtd/rawnand.h | 5 -- 4 files changed, 136 insertions(+), 136 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/core.c b/drivers/mtd/nand/core.c index 7737b1a4a177..3e76d127715f 100644 --- a/drivers/mtd/nand/core.c +++ b/drivers/mtd/nand/core.c @@ -12,6 +12,137 @@ #include #include +/** + * nand_check_erased_buf - check if a buffer contains (almost) only 0xff data + * @buf: buffer to test + * @len: buffer length + * @bitflips_threshold: maximum number of bitflips + * + * Check if a buffer contains only 0xff, which means the underlying region + * has been erased and is ready to be programmed. + * The bitflips_threshold specify the maximum number of bitflips before + * considering the region is not erased. + * Note: The logic of this function has been extracted from the memweight + * implementation, except that nand_check_erased_buf function exit before + * testing the whole buffer if the number of bitflips exceed the + * bitflips_threshold value. + * + * Returns a positive number of bitflips less than or equal to + * bitflips_threshold, or -ERROR_CODE for bitflips in excess of the + * threshold. + */ +static int nand_check_erased_buf(void *buf, int len, int bitflips_threshold) +{ + const unsigned char *bitmap = buf; + int bitflips = 0; + int weight; + + for (; len && ((uintptr_t)bitmap) % sizeof(long); + len--, bitmap++) { + weight = hweight8(*bitmap); + bitflips += BITS_PER_BYTE - weight; + if (unlikely(bitflips > bitflips_threshold)) + return -EBADMSG; + } + + for (; len >= sizeof(long); + len -= sizeof(long), bitmap += sizeof(long)) { + unsigned long d = *((unsigned long *)bitmap); + if (d == ~0UL) + continue; + weight = hweight_long(d); + bitflips += BITS_PER_LONG - weight; + if (unlikely(bitflips > bitflips_threshold)) + return -EBADMSG; + } + + for (; len > 0; len--, bitmap++) { + weight = hweight8(*bitmap); + bitflips += BITS_PER_BYTE - weight; + if (unlikely(bitflips > bitflips_threshold)) + return -EBADMSG; + } + + return bitflips; +} + +/** + * nand_check_erased_ecc_chunk - check if an ECC chunk contains (almost) only + * 0xff data + * @data: data buffer to test + * @datalen: data length + * @ecc: ECC buffer + * @ecclen: ECC length + * @extraoob: extra OOB buffer + * @extraooblen: extra OOB length + * @bitflips_threshold: maximum number of bitflips + * + * Check if a data buffer and its associated ECC and OOB data contains only + * 0xff pattern, which means the underlying region has been erased and is + * ready to be programmed. + * The bitflips_threshold specify the maximum number of bitflips before + * considering the region as not erased. + * + * Note: + * 1/ ECC algorithms are working on pre-defined block sizes which are usually + * different from the NAND page size. When fixing bitflips, ECC engines will + * report the number of errors per chunk, and the NAND core infrastructure + * expect you to return the maximum number of bitflips for the whole page. + * This is why you should always use this function on a single chunk and + * not on the whole page. After checking each chunk you should update your + * max_bitflips value accordingly. + * 2/ When checking for bitflips in erased pages you should not only check + * the payload data but also their associated ECC data, because a user might + * have programmed almost all bits to 1 but a few. In this case, we + * shouldn't consider the chunk as erased, and checking ECC bytes prevent + * this case. + * 3/ The extraoob argument is optional, and should be used if some of your OOB + * data are protected by the ECC engine. + * It could also be used if you support subpages and want to attach some + * extra OOB data to an ECC chunk. + * + * Returns a positive number of bitflips less than or equal to + * bitflips_threshold, or -ERROR_CODE for bitflips in excess of the + * threshold. In case of success, the passed buffers are filled with 0xff. + */ +int nand_check_erased_ecc_chunk(void *data, int datalen, + void *ecc, int ecclen, + void *extraoob, int extraooblen, + int bitflips_threshold) +{ + int data_bitflips = 0, ecc_bitflips = 0, extraoob_bitflips = 0; + + data_bitflips = nand_check_erased_buf(data, datalen, + bitflips_threshold); + if (data_bitflips < 0) + return data_bitflips; + + bitflips_threshold -= data_bitflips; + + ecc_bitflips = nand_check_erased_buf(ecc, ecclen, bitflips_threshold); + if (ecc_bitflips < 0) + return ecc_bitflips; + + bitflips_threshold -= ecc_bitflips; + + extraoob_bitflips = nand_check_erased_buf(extraoob, extraooblen, + bitflips_threshold); + if (extraoob_bitflips < 0) + return extraoob_bitflips; + + if (data_bitflips) + memset(data, 0xff, datalen); + + if (ecc_bitflips) + memset(ecc, 0xff, ecclen); + + if (extraoob_bitflips) + memset(extraoob, 0xff, extraooblen); + + return data_bitflips + ecc_bitflips + extraoob_bitflips; +} +EXPORT_SYMBOL(nand_check_erased_ecc_chunk); + /** * nanddev_isbad() - Check if a block is bad * @nand: NAND device diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index 13e4060bd1b6..c7d9501f646b 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -2783,137 +2783,6 @@ int nand_set_features(struct nand_chip *chip, int addr, return nand_set_features_op(chip, addr, subfeature_param); } -/** - * nand_check_erased_buf - check if a buffer contains (almost) only 0xff data - * @buf: buffer to test - * @len: buffer length - * @bitflips_threshold: maximum number of bitflips - * - * Check if a buffer contains only 0xff, which means the underlying region - * has been erased and is ready to be programmed. - * The bitflips_threshold specify the maximum number of bitflips before - * considering the region is not erased. - * Note: The logic of this function has been extracted from the memweight - * implementation, except that nand_check_erased_buf function exit before - * testing the whole buffer if the number of bitflips exceed the - * bitflips_threshold value. - * - * Returns a positive number of bitflips less than or equal to - * bitflips_threshold, or -ERROR_CODE for bitflips in excess of the - * threshold. - */ -static int nand_check_erased_buf(void *buf, int len, int bitflips_threshold) -{ - const unsigned char *bitmap = buf; - int bitflips = 0; - int weight; - - for (; len && ((uintptr_t)bitmap) % sizeof(long); - len--, bitmap++) { - weight = hweight8(*bitmap); - bitflips += BITS_PER_BYTE - weight; - if (unlikely(bitflips > bitflips_threshold)) - return -EBADMSG; - } - - for (; len >= sizeof(long); - len -= sizeof(long), bitmap += sizeof(long)) { - unsigned long d = *((unsigned long *)bitmap); - if (d == ~0UL) - continue; - weight = hweight_long(d); - bitflips += BITS_PER_LONG - weight; - if (unlikely(bitflips > bitflips_threshold)) - return -EBADMSG; - } - - for (; len > 0; len--, bitmap++) { - weight = hweight8(*bitmap); - bitflips += BITS_PER_BYTE - weight; - if (unlikely(bitflips > bitflips_threshold)) - return -EBADMSG; - } - - return bitflips; -} - -/** - * nand_check_erased_ecc_chunk - check if an ECC chunk contains (almost) only - * 0xff data - * @data: data buffer to test - * @datalen: data length - * @ecc: ECC buffer - * @ecclen: ECC length - * @extraoob: extra OOB buffer - * @extraooblen: extra OOB length - * @bitflips_threshold: maximum number of bitflips - * - * Check if a data buffer and its associated ECC and OOB data contains only - * 0xff pattern, which means the underlying region has been erased and is - * ready to be programmed. - * The bitflips_threshold specify the maximum number of bitflips before - * considering the region as not erased. - * - * Note: - * 1/ ECC algorithms are working on pre-defined block sizes which are usually - * different from the NAND page size. When fixing bitflips, ECC engines will - * report the number of errors per chunk, and the NAND core infrastructure - * expect you to return the maximum number of bitflips for the whole page. - * This is why you should always use this function on a single chunk and - * not on the whole page. After checking each chunk you should update your - * max_bitflips value accordingly. - * 2/ When checking for bitflips in erased pages you should not only check - * the payload data but also their associated ECC data, because a user might - * have programmed almost all bits to 1 but a few. In this case, we - * shouldn't consider the chunk as erased, and checking ECC bytes prevent - * this case. - * 3/ The extraoob argument is optional, and should be used if some of your OOB - * data are protected by the ECC engine. - * It could also be used if you support subpages and want to attach some - * extra OOB data to an ECC chunk. - * - * Returns a positive number of bitflips less than or equal to - * bitflips_threshold, or -ERROR_CODE for bitflips in excess of the - * threshold. In case of success, the passed buffers are filled with 0xff. - */ -int nand_check_erased_ecc_chunk(void *data, int datalen, - void *ecc, int ecclen, - void *extraoob, int extraooblen, - int bitflips_threshold) -{ - int data_bitflips = 0, ecc_bitflips = 0, extraoob_bitflips = 0; - - data_bitflips = nand_check_erased_buf(data, datalen, - bitflips_threshold); - if (data_bitflips < 0) - return data_bitflips; - - bitflips_threshold -= data_bitflips; - - ecc_bitflips = nand_check_erased_buf(ecc, ecclen, bitflips_threshold); - if (ecc_bitflips < 0) - return ecc_bitflips; - - bitflips_threshold -= ecc_bitflips; - - extraoob_bitflips = nand_check_erased_buf(extraoob, extraooblen, - bitflips_threshold); - if (extraoob_bitflips < 0) - return extraoob_bitflips; - - if (data_bitflips) - memset(data, 0xff, datalen); - - if (ecc_bitflips) - memset(ecc, 0xff, ecclen); - - if (extraoob_bitflips) - memset(extraoob, 0xff, extraooblen); - - return data_bitflips + ecc_bitflips + extraoob_bitflips; -} -EXPORT_SYMBOL(nand_check_erased_ecc_chunk); - /** * nand_read_page_raw_notsupp - dummy read raw page function * @chip: nand chip info structure diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 07486168d104..09c8c93e4dba 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -1136,4 +1136,9 @@ static inline bool nanddev_bbt_is_initialized(struct nand_device *nand) int nanddev_mtd_erase(struct mtd_info *mtd, struct erase_info *einfo); int nanddev_mtd_max_bad_blocks(struct mtd_info *mtd, loff_t offs, size_t len); +int nand_check_erased_ecc_chunk(void *data, int datalen, + void *ecc, int ecclen, + void *extraoob, int extraooblen, + int threshold); + #endif /* __LINUX_MTD_NAND_H */ diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index e84522e31301..d30bdc3fcfd7 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1519,11 +1519,6 @@ int rawnand_sw_bch_correct(struct nand_chip *chip, unsigned char *buf, unsigned char *read_ecc, unsigned char *calc_ecc); void rawnand_sw_bch_cleanup(struct nand_chip *chip); -int nand_check_erased_ecc_chunk(void *data, int datalen, - void *ecc, int ecclen, - void *extraoob, int extraooblen, - int threshold); - int nand_ecc_choose_conf(struct nand_chip *chip, const struct nand_ecc_caps *caps, int oobavail); -- cgit v1.2.3 From bee8a520eb84950193d0566ea2c2e46406a4b6ce Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 9 Sep 2025 17:50:56 +0800 Subject: rhashtable: Use rcu_dereference_all and rcu_dereference_all_check Add rcu_dereference_all and rcu_dereference_all_check so that library code such as rhashtable can be used with any RCU variant. As it stands rcu_dereference is used within rashtable, which creates false-positive warnings if the user calls it from another RCU context, such as preempt_disable(). Use the rcu_dereference_all and rcu_dereference_all_check calls in rhashtable to suppress these warnings. Also replace the rcu_dereference_raw calls in the list iterators with rcu_dereference_all to uncover buggy calls. Reported-by: Menglong Dong Signed-off-by: Herbert Xu Reviewed-by: Paul E. McKenney Signed-off-by: Herbert Xu --- include/linux/rcupdate.h | 26 ++++++++++++++++++++++++++ include/linux/rhashtable.h | 14 +++++++------- 2 files changed, 33 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 120536f4c6eb..448eb1f0cb48 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -713,6 +713,24 @@ do { \ (c) || rcu_read_lock_sched_held(), \ __rcu) +/** + * rcu_dereference_all_check() - rcu_dereference_all with debug checking + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place + * + * This is similar to rcu_dereference_check(), but allows protection + * by all forms of vanilla RCU readers, including preemption disabled, + * bh-disabled, and interrupt-disabled regions of code. Note that "vanilla + * RCU" excludes SRCU and the various Tasks RCU flavors. Please note + * that this macro should not be backported to any Linux-kernel version + * preceding v5.0 due to changes in synchronize_rcu() semantics prior + * to that version. + */ +#define rcu_dereference_all_check(p, c) \ + __rcu_dereference_check((p), __UNIQUE_ID(rcu), \ + (c) || rcu_read_lock_any_held(), \ + __rcu) + /* * The tracing infrastructure traces RCU (we want that), but unfortunately * some of the RCU checks causes tracing to lock up the system. @@ -767,6 +785,14 @@ do { \ */ #define rcu_dereference_sched(p) rcu_dereference_sched_check(p, 0) +/** + * rcu_dereference_all() - fetch RCU-all-protected pointer for dereferencing + * @p: The pointer to read, prior to dereferencing + * + * Makes rcu_dereference_check() do the dirty work. + */ +#define rcu_dereference_all(p) rcu_dereference_all_check(p, 0) + /** * rcu_pointer_handoff() - Hand off a pointer from RCU to other mechanism * @p: The pointer to hand off diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index e740157f3cd7..05a221ce79a6 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -272,13 +272,13 @@ struct rhash_lock_head __rcu **rht_bucket_nested_insert( rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht)) #define rht_dereference_rcu(p, ht) \ - rcu_dereference_check(p, lockdep_rht_mutex_is_held(ht)) + rcu_dereference_all_check(p, lockdep_rht_mutex_is_held(ht)) #define rht_dereference_bucket(p, tbl, hash) \ rcu_dereference_protected(p, lockdep_rht_bucket_is_held(tbl, hash)) #define rht_dereference_bucket_rcu(p, tbl, hash) \ - rcu_dereference_check(p, lockdep_rht_bucket_is_held(tbl, hash)) + rcu_dereference_all_check(p, lockdep_rht_bucket_is_held(tbl, hash)) #define rht_entry(tpos, pos, member) \ ({ tpos = container_of(pos, typeof(*tpos), member); 1; }) @@ -373,7 +373,7 @@ static inline struct rhash_head *__rht_ptr( static inline struct rhash_head *rht_ptr_rcu( struct rhash_lock_head __rcu *const *bkt) { - return __rht_ptr(rcu_dereference(*bkt), bkt); + return __rht_ptr(rcu_dereference_all(*bkt), bkt); } static inline struct rhash_head *rht_ptr( @@ -497,7 +497,7 @@ static inline void rht_assign_unlock(struct bucket_table *tbl, for (({barrier(); }), \ pos = head; \ !rht_is_a_nulls(pos); \ - pos = rcu_dereference_raw(pos->next)) + pos = rcu_dereference_all(pos->next)) /** * rht_for_each_rcu - iterate over rcu hash chain @@ -513,7 +513,7 @@ static inline void rht_assign_unlock(struct bucket_table *tbl, for (({barrier(); }), \ pos = rht_ptr_rcu(rht_bucket(tbl, hash)); \ !rht_is_a_nulls(pos); \ - pos = rcu_dereference_raw(pos->next)) + pos = rcu_dereference_all(pos->next)) /** * rht_for_each_entry_rcu_from - iterated over rcu hash chain from given head @@ -560,7 +560,7 @@ static inline void rht_assign_unlock(struct bucket_table *tbl, * list returned by rhltable_lookup. */ #define rhl_for_each_rcu(pos, list) \ - for (pos = list; pos; pos = rcu_dereference_raw(pos->next)) + for (pos = list; pos; pos = rcu_dereference_all(pos->next)) /** * rhl_for_each_entry_rcu - iterate over rcu hash table list of given type @@ -574,7 +574,7 @@ static inline void rht_assign_unlock(struct bucket_table *tbl, */ #define rhl_for_each_entry_rcu(tpos, pos, list, member) \ for (pos = list; pos && rht_entry(tpos, pos, member); \ - pos = rcu_dereference_raw(pos->next)) + pos = rcu_dereference_all(pos->next)) static inline int rhashtable_compare(struct rhashtable_compare_arg *arg, const void *obj) -- cgit v1.2.3 From 3d716c51e0e8791f8dd72479a3e6d5e7650ac35e Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Sat, 13 Sep 2025 18:57:51 +0800 Subject: crypto: hisilicon/qm - mask axi error before memory init After the device memory is cleared, if the software sends the doorbell operation, the hardware may trigger a axi error when processing the doorbell. This error is caused by memory clearing and hardware access to address 0. Therefore, the axi error is masked during this period. Signed-off-by: Weili Qian Signed-off-by: Chenghai Huang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_main.c | 100 +++++++++++++++++++---------- drivers/crypto/hisilicon/qm.c | 66 +++++++++++++------ drivers/crypto/hisilicon/sec2/sec_main.c | 90 ++++++++++++++++++-------- drivers/crypto/hisilicon/zip/zip_main.c | 102 ++++++++++++++++++++---------- include/linux/hisi_acc_qm.h | 21 ++++-- 5 files changed, 257 insertions(+), 122 deletions(-) (limited to 'include/linux') diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index f437f361a2c9..718abe3fa5fe 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -39,6 +39,7 @@ #define HPRE_HAC_RAS_NFE_ENB 0x301414 #define HPRE_HAC_RAS_FE_ENB 0x301418 #define HPRE_HAC_INT_SET 0x301500 +#define HPRE_AXI_ERROR_MASK GENMASK(21, 10) #define HPRE_RNG_TIMEOUT_NUM 0x301A34 #define HPRE_CORE_INT_ENABLE 0 #define HPRE_RDCHN_INI_ST 0x301a00 @@ -798,8 +799,7 @@ static void hpre_master_ooo_ctrl(struct hisi_qm *qm, bool enable) val1 = readl(qm->io_base + HPRE_AM_OOO_SHUTDOWN_ENB); if (enable) { val1 |= HPRE_AM_OOO_SHUTDOWN_ENABLE; - val2 = hisi_qm_get_hw_info(qm, hpre_basic_info, - HPRE_OOO_SHUTDOWN_MASK_CAP, qm->cap_ver); + val2 = qm->err_info.dev_err.shutdown_mask; } else { val1 &= ~HPRE_AM_OOO_SHUTDOWN_ENABLE; val2 = 0x0; @@ -813,38 +813,33 @@ static void hpre_master_ooo_ctrl(struct hisi_qm *qm, bool enable) static void hpre_hw_error_disable(struct hisi_qm *qm) { - u32 ce, nfe; - - ce = hisi_qm_get_hw_info(qm, hpre_basic_info, HPRE_CE_MASK_CAP, qm->cap_ver); - nfe = hisi_qm_get_hw_info(qm, hpre_basic_info, HPRE_NFE_MASK_CAP, qm->cap_ver); + struct hisi_qm_err_mask *dev_err = &qm->err_info.dev_err; + u32 err_mask = dev_err->ce | dev_err->nfe | dev_err->fe; /* disable hpre hw error interrupts */ - writel(ce | nfe | HPRE_HAC_RAS_FE_ENABLE, qm->io_base + HPRE_INT_MASK); + writel(err_mask, qm->io_base + HPRE_INT_MASK); /* disable HPRE block master OOO when nfe occurs on Kunpeng930 */ hpre_master_ooo_ctrl(qm, false); } static void hpre_hw_error_enable(struct hisi_qm *qm) { - u32 ce, nfe, err_en; - - ce = hisi_qm_get_hw_info(qm, hpre_basic_info, HPRE_CE_MASK_CAP, qm->cap_ver); - nfe = hisi_qm_get_hw_info(qm, hpre_basic_info, HPRE_NFE_MASK_CAP, qm->cap_ver); + struct hisi_qm_err_mask *dev_err = &qm->err_info.dev_err; + u32 err_mask = dev_err->ce | dev_err->nfe | dev_err->fe; /* clear HPRE hw error source if having */ - writel(ce | nfe | HPRE_HAC_RAS_FE_ENABLE, qm->io_base + HPRE_HAC_SOURCE_INT); + writel(err_mask, qm->io_base + HPRE_HAC_SOURCE_INT); /* configure error type */ - writel(ce, qm->io_base + HPRE_RAS_CE_ENB); - writel(nfe, qm->io_base + HPRE_RAS_NFE_ENB); - writel(HPRE_HAC_RAS_FE_ENABLE, qm->io_base + HPRE_RAS_FE_ENB); + writel(dev_err->ce, qm->io_base + HPRE_RAS_CE_ENB); + writel(dev_err->nfe, qm->io_base + HPRE_RAS_NFE_ENB); + writel(dev_err->fe, qm->io_base + HPRE_RAS_FE_ENB); /* enable HPRE block master OOO when nfe occurs on Kunpeng930 */ hpre_master_ooo_ctrl(qm, true); /* enable hpre hw error interrupts */ - err_en = ce | nfe | HPRE_HAC_RAS_FE_ENABLE; - writel(~err_en, qm->io_base + HPRE_INT_MASK); + writel(~err_mask, qm->io_base + HPRE_INT_MASK); } static inline struct hisi_qm *hpre_file_to_qm(struct hpre_debugfs_file *file) @@ -1399,9 +1394,8 @@ static void hpre_clear_hw_err_status(struct hisi_qm *qm, u32 err_sts) static void hpre_disable_error_report(struct hisi_qm *qm, u32 err_type) { - u32 nfe_mask; + u32 nfe_mask = qm->err_info.dev_err.nfe; - nfe_mask = hisi_qm_get_hw_info(qm, hpre_basic_info, HPRE_NFE_MASK_CAP, qm->cap_ver); writel(nfe_mask & (~err_type), qm->io_base + HPRE_RAS_NFE_ENB); } @@ -1422,11 +1416,11 @@ static enum acc_err_result hpre_get_err_result(struct hisi_qm *qm) err_status = hpre_get_hw_err_status(qm); if (err_status) { - if (err_status & qm->err_info.ecc_2bits_mask) + if (err_status & qm->err_info.dev_err.ecc_2bits_mask) qm->err_status.is_dev_ecc_mbit = true; hpre_log_hw_error(qm, err_status); - if (err_status & qm->err_info.dev_reset_mask) { + if (err_status & qm->err_info.dev_err.reset_mask) { /* Disable the same error reporting until device is recovered. */ hpre_disable_error_report(qm, err_status); return ACC_ERR_NEED_RESET; @@ -1442,28 +1436,64 @@ static bool hpre_dev_is_abnormal(struct hisi_qm *qm) u32 err_status; err_status = hpre_get_hw_err_status(qm); - if (err_status & qm->err_info.dev_shutdown_mask) + if (err_status & qm->err_info.dev_err.shutdown_mask) return true; return false; } +static void hpre_disable_axi_error(struct hisi_qm *qm) +{ + struct hisi_qm_err_mask *dev_err = &qm->err_info.dev_err; + u32 err_mask = dev_err->ce | dev_err->nfe | dev_err->fe; + u32 val; + + val = ~(err_mask & (~HPRE_AXI_ERROR_MASK)); + writel(val, qm->io_base + HPRE_INT_MASK); + + if (qm->ver > QM_HW_V2) + writel(dev_err->shutdown_mask & (~HPRE_AXI_ERROR_MASK), + qm->io_base + HPRE_OOO_SHUTDOWN_SEL); +} + +static void hpre_enable_axi_error(struct hisi_qm *qm) +{ + struct hisi_qm_err_mask *dev_err = &qm->err_info.dev_err; + u32 err_mask = dev_err->ce | dev_err->nfe | dev_err->fe; + + /* clear axi error source */ + writel(HPRE_AXI_ERROR_MASK, qm->io_base + HPRE_HAC_SOURCE_INT); + + writel(~err_mask, qm->io_base + HPRE_INT_MASK); + + if (qm->ver > QM_HW_V2) + writel(dev_err->shutdown_mask, qm->io_base + HPRE_OOO_SHUTDOWN_SEL); +} + static void hpre_err_info_init(struct hisi_qm *qm) { struct hisi_qm_err_info *err_info = &qm->err_info; + struct hisi_qm_err_mask *qm_err = &err_info->qm_err; + struct hisi_qm_err_mask *dev_err = &err_info->dev_err; + + qm_err->fe = HPRE_HAC_RAS_FE_ENABLE; + qm_err->ce = hisi_qm_get_hw_info(qm, hpre_basic_info, HPRE_QM_CE_MASK_CAP, qm->cap_ver); + qm_err->nfe = hisi_qm_get_hw_info(qm, hpre_basic_info, HPRE_QM_NFE_MASK_CAP, qm->cap_ver); + qm_err->shutdown_mask = hisi_qm_get_hw_info(qm, hpre_basic_info, + HPRE_QM_OOO_SHUTDOWN_MASK_CAP, qm->cap_ver); + qm_err->reset_mask = hisi_qm_get_hw_info(qm, hpre_basic_info, + HPRE_QM_RESET_MASK_CAP, qm->cap_ver); + qm_err->ecc_2bits_mask = QM_ECC_MBIT; + + dev_err->fe = HPRE_HAC_RAS_FE_ENABLE; + dev_err->ce = hisi_qm_get_hw_info(qm, hpre_basic_info, HPRE_CE_MASK_CAP, qm->cap_ver); + dev_err->nfe = hisi_qm_get_hw_info(qm, hpre_basic_info, HPRE_NFE_MASK_CAP, qm->cap_ver); + dev_err->shutdown_mask = hisi_qm_get_hw_info(qm, hpre_basic_info, + HPRE_OOO_SHUTDOWN_MASK_CAP, qm->cap_ver); + dev_err->reset_mask = hisi_qm_get_hw_info(qm, hpre_basic_info, + HPRE_RESET_MASK_CAP, qm->cap_ver); + dev_err->ecc_2bits_mask = HPRE_CORE_ECC_2BIT_ERR | HPRE_OOO_ECC_2BIT_ERR; - err_info->fe = HPRE_HAC_RAS_FE_ENABLE; - err_info->ce = hisi_qm_get_hw_info(qm, hpre_basic_info, HPRE_QM_CE_MASK_CAP, qm->cap_ver); - err_info->nfe = hisi_qm_get_hw_info(qm, hpre_basic_info, HPRE_QM_NFE_MASK_CAP, qm->cap_ver); - err_info->ecc_2bits_mask = HPRE_CORE_ECC_2BIT_ERR | HPRE_OOO_ECC_2BIT_ERR; - err_info->dev_shutdown_mask = hisi_qm_get_hw_info(qm, hpre_basic_info, - HPRE_OOO_SHUTDOWN_MASK_CAP, qm->cap_ver); - err_info->qm_shutdown_mask = hisi_qm_get_hw_info(qm, hpre_basic_info, - HPRE_QM_OOO_SHUTDOWN_MASK_CAP, qm->cap_ver); - err_info->qm_reset_mask = hisi_qm_get_hw_info(qm, hpre_basic_info, - HPRE_QM_RESET_MASK_CAP, qm->cap_ver); - err_info->dev_reset_mask = hisi_qm_get_hw_info(qm, hpre_basic_info, - HPRE_RESET_MASK_CAP, qm->cap_ver); err_info->msi_wr_port = HPRE_WR_MSI_PORT; err_info->acpi_rst = "HRST"; } @@ -1481,6 +1511,8 @@ static const struct hisi_qm_err_ini hpre_err_ini = { .err_info_init = hpre_err_info_init, .get_err_result = hpre_get_err_result, .dev_is_abnormal = hpre_dev_is_abnormal, + .disable_axi_error = hpre_disable_axi_error, + .enable_axi_error = hpre_enable_axi_error, }; static int hpre_pf_probe_init(struct hpre *hpre) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index c1ffaae78532..d0d1fc45b8fe 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -147,9 +147,9 @@ #define QM_RAS_CE_TIMES_PER_IRQ 1 #define QM_OOO_SHUTDOWN_SEL 0x1040f8 #define QM_AXI_RRESP_ERR BIT(0) -#define QM_ECC_MBIT BIT(2) #define QM_DB_TIMEOUT BIT(10) #define QM_OF_FIFO_OF BIT(11) +#define QM_RAS_AXI_ERROR (BIT(0) | BIT(1) | BIT(12)) #define QM_RESET_WAIT_TIMEOUT 400 #define QM_PEH_VENDOR_ID 0x1000d8 @@ -165,7 +165,6 @@ #define ACC_MASTER_TRANS_RETURN 0x300150 #define ACC_MASTER_GLOBAL_CTRL 0x300000 #define ACC_AM_CFG_PORT_WR_EN 0x30001c -#define QM_RAS_NFE_MBIT_DISABLE ~QM_ECC_MBIT #define ACC_AM_ROB_ECC_INT_STS 0x300104 #define ACC_ROB_ECC_ERR_MULTPL BIT(1) #define QM_MSI_CAP_ENABLE BIT(16) @@ -522,7 +521,7 @@ static bool qm_check_dev_error(struct hisi_qm *qm) return false; err_status = qm_get_hw_error_status(pf_qm); - if (err_status & pf_qm->err_info.qm_shutdown_mask) + if (err_status & pf_qm->err_info.qm_err.shutdown_mask) return true; if (pf_qm->err_ini->dev_is_abnormal) @@ -1397,17 +1396,17 @@ static void qm_hw_error_init_v1(struct hisi_qm *qm) static void qm_hw_error_cfg(struct hisi_qm *qm) { - struct hisi_qm_err_info *err_info = &qm->err_info; + struct hisi_qm_err_mask *qm_err = &qm->err_info.qm_err; - qm->error_mask = err_info->nfe | err_info->ce | err_info->fe; + qm->error_mask = qm_err->nfe | qm_err->ce | qm_err->fe; /* clear QM hw residual error source */ writel(qm->error_mask, qm->io_base + QM_ABNORMAL_INT_SOURCE); /* configure error type */ - writel(err_info->ce, qm->io_base + QM_RAS_CE_ENABLE); + writel(qm_err->ce, qm->io_base + QM_RAS_CE_ENABLE); writel(QM_RAS_CE_TIMES_PER_IRQ, qm->io_base + QM_RAS_CE_THRESHOLD); - writel(err_info->nfe, qm->io_base + QM_RAS_NFE_ENABLE); - writel(err_info->fe, qm->io_base + QM_RAS_FE_ENABLE); + writel(qm_err->nfe, qm->io_base + QM_RAS_NFE_ENABLE); + writel(qm_err->fe, qm->io_base + QM_RAS_FE_ENABLE); } static void qm_hw_error_init_v2(struct hisi_qm *qm) @@ -1436,7 +1435,7 @@ static void qm_hw_error_init_v3(struct hisi_qm *qm) qm_hw_error_cfg(qm); /* enable close master ooo when hardware error happened */ - writel(qm->err_info.qm_shutdown_mask, qm->io_base + QM_OOO_SHUTDOWN_SEL); + writel(qm->err_info.qm_err.shutdown_mask, qm->io_base + QM_OOO_SHUTDOWN_SEL); irq_unmask = ~qm->error_mask; irq_unmask &= readl(qm->io_base + QM_ABNORMAL_INT_MASK); @@ -1498,6 +1497,7 @@ static void qm_log_hw_error(struct hisi_qm *qm, u32 error_status) static enum acc_err_result qm_hw_error_handle_v2(struct hisi_qm *qm) { + struct hisi_qm_err_mask *qm_err = &qm->err_info.qm_err; u32 error_status; error_status = qm_get_hw_error_status(qm); @@ -1506,17 +1506,16 @@ static enum acc_err_result qm_hw_error_handle_v2(struct hisi_qm *qm) qm->err_status.is_qm_ecc_mbit = true; qm_log_hw_error(qm, error_status); - if (error_status & qm->err_info.qm_reset_mask) { + if (error_status & qm_err->reset_mask) { /* Disable the same error reporting until device is recovered. */ - writel(qm->err_info.nfe & (~error_status), - qm->io_base + QM_RAS_NFE_ENABLE); + writel(qm_err->nfe & (~error_status), qm->io_base + QM_RAS_NFE_ENABLE); return ACC_ERR_NEED_RESET; } /* Clear error source if not need reset. */ writel(error_status, qm->io_base + QM_ABNORMAL_INT_SOURCE); - writel(qm->err_info.nfe, qm->io_base + QM_RAS_NFE_ENABLE); - writel(qm->err_info.ce, qm->io_base + QM_RAS_CE_ENABLE); + writel(qm_err->nfe, qm->io_base + QM_RAS_NFE_ENABLE); + writel(qm_err->ce, qm->io_base + QM_RAS_CE_ENABLE); } return ACC_ERR_RECOVERED; @@ -4227,9 +4226,9 @@ static void qm_dev_ecc_mbit_handle(struct hisi_qm *qm) !qm->err_status.is_qm_ecc_mbit && !qm->err_ini->close_axi_master_ooo) { nfe_enb = readl(qm->io_base + QM_RAS_NFE_ENABLE); - writel(nfe_enb & QM_RAS_NFE_MBIT_DISABLE, + writel(nfe_enb & ~qm->err_info.qm_err.ecc_2bits_mask, qm->io_base + QM_RAS_NFE_ENABLE); - writel(QM_ECC_MBIT, qm->io_base + QM_ABNORMAL_INT_SET); + writel(qm->err_info.qm_err.ecc_2bits_mask, qm->io_base + QM_ABNORMAL_INT_SET); } } @@ -4508,12 +4507,12 @@ static void qm_restart_prepare(struct hisi_qm *qm) qm->io_base + ACC_AM_CFG_PORT_WR_EN); /* clear dev ecc 2bit error source if having */ - value = qm_get_dev_err_status(qm) & qm->err_info.ecc_2bits_mask; + value = qm_get_dev_err_status(qm) & qm->err_info.dev_err.ecc_2bits_mask; if (value && qm->err_ini->clear_dev_hw_err_status) qm->err_ini->clear_dev_hw_err_status(qm, value); /* clear QM ecc mbit error source */ - writel(QM_ECC_MBIT, qm->io_base + QM_ABNORMAL_INT_SOURCE); + writel(qm->err_info.qm_err.ecc_2bits_mask, qm->io_base + QM_ABNORMAL_INT_SOURCE); /* clear AM Reorder Buffer ecc mbit source */ writel(ACC_ROB_ECC_ERR_MULTPL, qm->io_base + ACC_AM_ROB_ECC_INT_STS); @@ -4540,6 +4539,34 @@ clear_flags: qm->err_status.is_dev_ecc_mbit = false; } +static void qm_disable_axi_error(struct hisi_qm *qm) +{ + struct hisi_qm_err_mask *qm_err = &qm->err_info.qm_err; + u32 val; + + val = ~(qm->error_mask & (~QM_RAS_AXI_ERROR)); + writel(val, qm->io_base + QM_ABNORMAL_INT_MASK); + if (qm->ver > QM_HW_V2) + writel(qm_err->shutdown_mask & (~QM_RAS_AXI_ERROR), + qm->io_base + QM_OOO_SHUTDOWN_SEL); + + if (qm->err_ini->disable_axi_error) + qm->err_ini->disable_axi_error(qm); +} + +static void qm_enable_axi_error(struct hisi_qm *qm) +{ + /* clear axi error source */ + writel(QM_RAS_AXI_ERROR, qm->io_base + QM_ABNORMAL_INT_SOURCE); + + writel(~qm->error_mask, qm->io_base + QM_ABNORMAL_INT_MASK); + if (qm->ver > QM_HW_V2) + writel(qm->err_info.qm_err.shutdown_mask, qm->io_base + QM_OOO_SHUTDOWN_SEL); + + if (qm->err_ini->enable_axi_error) + qm->err_ini->enable_axi_error(qm); +} + static int qm_controller_reset_done(struct hisi_qm *qm) { struct pci_dev *pdev = qm->pdev; @@ -4573,6 +4600,7 @@ static int qm_controller_reset_done(struct hisi_qm *qm) qm_restart_prepare(qm); hisi_qm_dev_err_init(qm); + qm_disable_axi_error(qm); if (qm->err_ini->open_axi_master_ooo) qm->err_ini->open_axi_master_ooo(qm); @@ -4595,7 +4623,7 @@ static int qm_controller_reset_done(struct hisi_qm *qm) ret = qm_wait_vf_prepare_finish(qm); if (ret) pci_err(pdev, "failed to start by vfs in soft reset!\n"); - + qm_enable_axi_error(qm); qm_cmd_init(qm); qm_restart_done(qm); diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index bdb2d52ee1b6..19fda486fefb 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -47,6 +47,8 @@ #define SEC_RAS_FE_ENB_MSK 0x0 #define SEC_OOO_SHUTDOWN_SEL 0x301014 #define SEC_RAS_DISABLE 0x0 +#define SEC_AXI_ERROR_MASK (BIT(0) | BIT(1)) + #define SEC_MEM_START_INIT_REG 0x301100 #define SEC_MEM_INIT_DONE_REG 0x301104 @@ -713,8 +715,7 @@ static void sec_master_ooo_ctrl(struct hisi_qm *qm, bool enable) val1 = readl(qm->io_base + SEC_CONTROL_REG); if (enable) { val1 |= SEC_AXI_SHUTDOWN_ENABLE; - val2 = hisi_qm_get_hw_info(qm, sec_basic_info, - SEC_OOO_SHUTDOWN_MASK_CAP, qm->cap_ver); + val2 = qm->err_info.dev_err.shutdown_mask; } else { val1 &= SEC_AXI_SHUTDOWN_DISABLE; val2 = 0x0; @@ -728,7 +729,8 @@ static void sec_master_ooo_ctrl(struct hisi_qm *qm, bool enable) static void sec_hw_error_enable(struct hisi_qm *qm) { - u32 ce, nfe; + struct hisi_qm_err_mask *dev_err = &qm->err_info.dev_err; + u32 err_mask = dev_err->ce | dev_err->nfe | dev_err->fe; if (qm->ver == QM_HW_V1) { writel(SEC_CORE_INT_DISABLE, qm->io_base + SEC_CORE_INT_MASK); @@ -736,22 +738,19 @@ static void sec_hw_error_enable(struct hisi_qm *qm) return; } - ce = hisi_qm_get_hw_info(qm, sec_basic_info, SEC_CE_MASK_CAP, qm->cap_ver); - nfe = hisi_qm_get_hw_info(qm, sec_basic_info, SEC_NFE_MASK_CAP, qm->cap_ver); - /* clear SEC hw error source if having */ - writel(ce | nfe | SEC_RAS_FE_ENB_MSK, qm->io_base + SEC_CORE_INT_SOURCE); + writel(err_mask, qm->io_base + SEC_CORE_INT_SOURCE); /* enable RAS int */ - writel(ce, qm->io_base + SEC_RAS_CE_REG); - writel(SEC_RAS_FE_ENB_MSK, qm->io_base + SEC_RAS_FE_REG); - writel(nfe, qm->io_base + SEC_RAS_NFE_REG); + writel(dev_err->ce, qm->io_base + SEC_RAS_CE_REG); + writel(dev_err->fe, qm->io_base + SEC_RAS_FE_REG); + writel(dev_err->nfe, qm->io_base + SEC_RAS_NFE_REG); /* enable SEC block master OOO when nfe occurs on Kunpeng930 */ sec_master_ooo_ctrl(qm, true); /* enable SEC hw error interrupts */ - writel(ce | nfe | SEC_RAS_FE_ENB_MSK, qm->io_base + SEC_CORE_INT_MASK); + writel(err_mask, qm->io_base + SEC_CORE_INT_MASK); } static void sec_hw_error_disable(struct hisi_qm *qm) @@ -1108,9 +1107,8 @@ static void sec_clear_hw_err_status(struct hisi_qm *qm, u32 err_sts) static void sec_disable_error_report(struct hisi_qm *qm, u32 err_type) { - u32 nfe_mask; + u32 nfe_mask = qm->err_info.dev_err.nfe; - nfe_mask = hisi_qm_get_hw_info(qm, sec_basic_info, SEC_NFE_MASK_CAP, qm->cap_ver); writel(nfe_mask & (~err_type), qm->io_base + SEC_RAS_NFE_REG); } @@ -1129,11 +1127,11 @@ static enum acc_err_result sec_get_err_result(struct hisi_qm *qm) err_status = sec_get_hw_err_status(qm); if (err_status) { - if (err_status & qm->err_info.ecc_2bits_mask) + if (err_status & qm->err_info.dev_err.ecc_2bits_mask) qm->err_status.is_dev_ecc_mbit = true; sec_log_hw_error(qm, err_status); - if (err_status & qm->err_info.dev_reset_mask) { + if (err_status & qm->err_info.dev_err.reset_mask) { /* Disable the same error reporting until device is recovered. */ sec_disable_error_report(qm, err_status); return ACC_ERR_NEED_RESET; @@ -1149,28 +1147,62 @@ static bool sec_dev_is_abnormal(struct hisi_qm *qm) u32 err_status; err_status = sec_get_hw_err_status(qm); - if (err_status & qm->err_info.dev_shutdown_mask) + if (err_status & qm->err_info.dev_err.shutdown_mask) return true; return false; } +static void sec_disable_axi_error(struct hisi_qm *qm) +{ + struct hisi_qm_err_mask *dev_err = &qm->err_info.dev_err; + u32 err_mask = dev_err->ce | dev_err->nfe | dev_err->fe; + + writel(err_mask & ~SEC_AXI_ERROR_MASK, qm->io_base + SEC_CORE_INT_MASK); + + if (qm->ver > QM_HW_V2) + writel(dev_err->shutdown_mask & (~SEC_AXI_ERROR_MASK), + qm->io_base + SEC_OOO_SHUTDOWN_SEL); +} + +static void sec_enable_axi_error(struct hisi_qm *qm) +{ + struct hisi_qm_err_mask *dev_err = &qm->err_info.dev_err; + u32 err_mask = dev_err->ce | dev_err->nfe | dev_err->fe; + + /* clear axi error source */ + writel(SEC_AXI_ERROR_MASK, qm->io_base + SEC_CORE_INT_SOURCE); + + writel(err_mask, qm->io_base + SEC_CORE_INT_MASK); + + if (qm->ver > QM_HW_V2) + writel(dev_err->shutdown_mask, qm->io_base + SEC_OOO_SHUTDOWN_SEL); +} + static void sec_err_info_init(struct hisi_qm *qm) { struct hisi_qm_err_info *err_info = &qm->err_info; + struct hisi_qm_err_mask *qm_err = &err_info->qm_err; + struct hisi_qm_err_mask *dev_err = &err_info->dev_err; + + qm_err->fe = SEC_RAS_FE_ENB_MSK; + qm_err->ce = hisi_qm_get_hw_info(qm, sec_basic_info, SEC_QM_CE_MASK_CAP, qm->cap_ver); + qm_err->nfe = hisi_qm_get_hw_info(qm, sec_basic_info, SEC_QM_NFE_MASK_CAP, qm->cap_ver); + qm_err->shutdown_mask = hisi_qm_get_hw_info(qm, sec_basic_info, + SEC_QM_OOO_SHUTDOWN_MASK_CAP, qm->cap_ver); + qm_err->reset_mask = hisi_qm_get_hw_info(qm, sec_basic_info, + SEC_QM_RESET_MASK_CAP, qm->cap_ver); + qm_err->ecc_2bits_mask = QM_ECC_MBIT; + + dev_err->fe = SEC_RAS_FE_ENB_MSK; + dev_err->ce = hisi_qm_get_hw_info(qm, sec_basic_info, SEC_CE_MASK_CAP, qm->cap_ver); + dev_err->nfe = hisi_qm_get_hw_info(qm, sec_basic_info, SEC_NFE_MASK_CAP, qm->cap_ver); + dev_err->shutdown_mask = hisi_qm_get_hw_info(qm, sec_basic_info, + SEC_OOO_SHUTDOWN_MASK_CAP, qm->cap_ver); + dev_err->reset_mask = hisi_qm_get_hw_info(qm, sec_basic_info, + SEC_RESET_MASK_CAP, qm->cap_ver); + dev_err->ecc_2bits_mask = SEC_CORE_INT_STATUS_M_ECC; - err_info->fe = SEC_RAS_FE_ENB_MSK; - err_info->ce = hisi_qm_get_hw_info(qm, sec_basic_info, SEC_QM_CE_MASK_CAP, qm->cap_ver); - err_info->nfe = hisi_qm_get_hw_info(qm, sec_basic_info, SEC_QM_NFE_MASK_CAP, qm->cap_ver); - err_info->ecc_2bits_mask = SEC_CORE_INT_STATUS_M_ECC; - err_info->qm_shutdown_mask = hisi_qm_get_hw_info(qm, sec_basic_info, - SEC_QM_OOO_SHUTDOWN_MASK_CAP, qm->cap_ver); - err_info->dev_shutdown_mask = hisi_qm_get_hw_info(qm, sec_basic_info, - SEC_OOO_SHUTDOWN_MASK_CAP, qm->cap_ver); - err_info->qm_reset_mask = hisi_qm_get_hw_info(qm, sec_basic_info, - SEC_QM_RESET_MASK_CAP, qm->cap_ver); - err_info->dev_reset_mask = hisi_qm_get_hw_info(qm, sec_basic_info, - SEC_RESET_MASK_CAP, qm->cap_ver); err_info->msi_wr_port = BIT(0); err_info->acpi_rst = "SRST"; } @@ -1188,6 +1220,8 @@ static const struct hisi_qm_err_ini sec_err_ini = { .err_info_init = sec_err_info_init, .get_err_result = sec_get_err_result, .dev_is_abnormal = sec_dev_is_abnormal, + .disable_axi_error = sec_disable_axi_error, + .enable_axi_error = sec_enable_axi_error, }; static int sec_pf_probe_init(struct sec_dev *sec) diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index 62cd090e13af..6b5cad82c856 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -65,6 +65,7 @@ #define HZIP_SRAM_ECC_ERR_NUM_SHIFT 16 #define HZIP_SRAM_ECC_ERR_ADDR_SHIFT 24 #define HZIP_CORE_INT_MASK_ALL GENMASK(12, 0) +#define HZIP_AXI_ERROR_MASK (BIT(2) | BIT(3)) #define HZIP_SQE_SIZE 128 #define HZIP_PF_DEF_Q_NUM 64 #define HZIP_PF_DEF_Q_BASE 0 @@ -662,8 +663,7 @@ static void hisi_zip_master_ooo_ctrl(struct hisi_qm *qm, bool enable) val1 = readl(qm->io_base + HZIP_SOFT_CTRL_ZIP_CONTROL); if (enable) { val1 |= HZIP_AXI_SHUTDOWN_ENABLE; - val2 = hisi_qm_get_hw_info(qm, zip_basic_cap_info, - ZIP_OOO_SHUTDOWN_MASK_CAP, qm->cap_ver); + val2 = qm->err_info.dev_err.shutdown_mask; } else { val1 &= ~HZIP_AXI_SHUTDOWN_ENABLE; val2 = 0x0; @@ -677,7 +677,8 @@ static void hisi_zip_master_ooo_ctrl(struct hisi_qm *qm, bool enable) static void hisi_zip_hw_error_enable(struct hisi_qm *qm) { - u32 nfe, ce; + struct hisi_qm_err_mask *dev_err = &qm->err_info.dev_err; + u32 err_mask = dev_err->ce | dev_err->nfe | dev_err->fe; if (qm->ver == QM_HW_V1) { writel(HZIP_CORE_INT_MASK_ALL, @@ -686,33 +687,29 @@ static void hisi_zip_hw_error_enable(struct hisi_qm *qm) return; } - nfe = hisi_qm_get_hw_info(qm, zip_basic_cap_info, ZIP_NFE_MASK_CAP, qm->cap_ver); - ce = hisi_qm_get_hw_info(qm, zip_basic_cap_info, ZIP_CE_MASK_CAP, qm->cap_ver); - /* clear ZIP hw error source if having */ - writel(ce | nfe | HZIP_CORE_INT_RAS_FE_ENB_MASK, qm->io_base + HZIP_CORE_INT_SOURCE); + writel(err_mask, qm->io_base + HZIP_CORE_INT_SOURCE); /* configure error type */ - writel(ce, qm->io_base + HZIP_CORE_INT_RAS_CE_ENB); - writel(HZIP_CORE_INT_RAS_FE_ENB_MASK, qm->io_base + HZIP_CORE_INT_RAS_FE_ENB); - writel(nfe, qm->io_base + HZIP_CORE_INT_RAS_NFE_ENB); + writel(dev_err->ce, qm->io_base + HZIP_CORE_INT_RAS_CE_ENB); + writel(dev_err->fe, qm->io_base + HZIP_CORE_INT_RAS_FE_ENB); + writel(dev_err->nfe, qm->io_base + HZIP_CORE_INT_RAS_NFE_ENB); hisi_zip_master_ooo_ctrl(qm, true); /* enable ZIP hw error interrupts */ - writel(0, qm->io_base + HZIP_CORE_INT_MASK_REG); + writel(~err_mask, qm->io_base + HZIP_CORE_INT_MASK_REG); hisi_dae_hw_error_enable(qm); } static void hisi_zip_hw_error_disable(struct hisi_qm *qm) { - u32 nfe, ce; + struct hisi_qm_err_mask *dev_err = &qm->err_info.dev_err; + u32 err_mask = dev_err->ce | dev_err->nfe | dev_err->fe; /* disable ZIP hw error interrupts */ - nfe = hisi_qm_get_hw_info(qm, zip_basic_cap_info, ZIP_NFE_MASK_CAP, qm->cap_ver); - ce = hisi_qm_get_hw_info(qm, zip_basic_cap_info, ZIP_CE_MASK_CAP, qm->cap_ver); - writel(ce | nfe | HZIP_CORE_INT_RAS_FE_ENB_MASK, qm->io_base + HZIP_CORE_INT_MASK_REG); + writel(err_mask, qm->io_base + HZIP_CORE_INT_MASK_REG); hisi_zip_master_ooo_ctrl(qm, false); @@ -1186,9 +1183,8 @@ static void hisi_zip_clear_hw_err_status(struct hisi_qm *qm, u32 err_sts) static void hisi_zip_disable_error_report(struct hisi_qm *qm, u32 err_type) { - u32 nfe_mask; + u32 nfe_mask = qm->err_info.dev_err.nfe; - nfe_mask = hisi_qm_get_hw_info(qm, zip_basic_cap_info, ZIP_NFE_MASK_CAP, qm->cap_ver); writel(nfe_mask & (~err_type), qm->io_base + HZIP_CORE_INT_RAS_NFE_ENB); } @@ -1230,14 +1226,14 @@ static enum acc_err_result hisi_zip_get_err_result(struct hisi_qm *qm) /* Get device hardware new error status */ err_status = hisi_zip_get_hw_err_status(qm); if (err_status) { - if (err_status & qm->err_info.ecc_2bits_mask) + if (err_status & qm->err_info.dev_err.ecc_2bits_mask) qm->err_status.is_dev_ecc_mbit = true; hisi_zip_log_hw_error(qm, err_status); - if (err_status & qm->err_info.dev_reset_mask) { + if (err_status & qm->err_info.dev_err.reset_mask) { /* Disable the same error reporting until device is recovered. */ hisi_zip_disable_error_report(qm, err_status); - return ACC_ERR_NEED_RESET; + zip_result = ACC_ERR_NEED_RESET; } else { hisi_zip_clear_hw_err_status(qm, err_status); } @@ -1255,7 +1251,7 @@ static bool hisi_zip_dev_is_abnormal(struct hisi_qm *qm) u32 err_status; err_status = hisi_zip_get_hw_err_status(qm); - if (err_status & qm->err_info.dev_shutdown_mask) + if (err_status & qm->err_info.dev_err.shutdown_mask) return true; return hisi_dae_dev_is_abnormal(qm); @@ -1266,23 +1262,59 @@ static int hisi_zip_set_priv_status(struct hisi_qm *qm) return hisi_dae_close_axi_master_ooo(qm); } +static void hisi_zip_disable_axi_error(struct hisi_qm *qm) +{ + struct hisi_qm_err_mask *dev_err = &qm->err_info.dev_err; + u32 err_mask = dev_err->ce | dev_err->nfe | dev_err->fe; + u32 val; + + val = ~(err_mask & (~HZIP_AXI_ERROR_MASK)); + writel(val, qm->io_base + HZIP_CORE_INT_MASK_REG); + + if (qm->ver > QM_HW_V2) + writel(dev_err->shutdown_mask & (~HZIP_AXI_ERROR_MASK), + qm->io_base + HZIP_OOO_SHUTDOWN_SEL); +} + +static void hisi_zip_enable_axi_error(struct hisi_qm *qm) +{ + struct hisi_qm_err_mask *dev_err = &qm->err_info.dev_err; + u32 err_mask = dev_err->ce | dev_err->nfe | dev_err->fe; + + /* clear axi error source */ + writel(HZIP_AXI_ERROR_MASK, qm->io_base + HZIP_CORE_INT_SOURCE); + + writel(~err_mask, qm->io_base + HZIP_CORE_INT_MASK_REG); + + if (qm->ver > QM_HW_V2) + writel(dev_err->shutdown_mask, qm->io_base + HZIP_OOO_SHUTDOWN_SEL); +} + static void hisi_zip_err_info_init(struct hisi_qm *qm) { struct hisi_qm_err_info *err_info = &qm->err_info; + struct hisi_qm_err_mask *qm_err = &err_info->qm_err; + struct hisi_qm_err_mask *dev_err = &err_info->dev_err; + + qm_err->fe = HZIP_CORE_INT_RAS_FE_ENB_MASK; + qm_err->ce = hisi_qm_get_hw_info(qm, zip_basic_cap_info, ZIP_QM_CE_MASK_CAP, qm->cap_ver); + qm_err->nfe = hisi_qm_get_hw_info(qm, zip_basic_cap_info, + ZIP_QM_NFE_MASK_CAP, qm->cap_ver); + qm_err->ecc_2bits_mask = QM_ECC_MBIT; + qm_err->reset_mask = hisi_qm_get_hw_info(qm, zip_basic_cap_info, + ZIP_QM_RESET_MASK_CAP, qm->cap_ver); + qm_err->shutdown_mask = hisi_qm_get_hw_info(qm, zip_basic_cap_info, + ZIP_QM_OOO_SHUTDOWN_MASK_CAP, qm->cap_ver); + + dev_err->fe = HZIP_CORE_INT_RAS_FE_ENB_MASK; + dev_err->ce = hisi_qm_get_hw_info(qm, zip_basic_cap_info, ZIP_CE_MASK_CAP, qm->cap_ver); + dev_err->nfe = hisi_qm_get_hw_info(qm, zip_basic_cap_info, ZIP_NFE_MASK_CAP, qm->cap_ver); + dev_err->ecc_2bits_mask = HZIP_CORE_INT_STATUS_M_ECC; + dev_err->shutdown_mask = hisi_qm_get_hw_info(qm, zip_basic_cap_info, + ZIP_OOO_SHUTDOWN_MASK_CAP, qm->cap_ver); + dev_err->reset_mask = hisi_qm_get_hw_info(qm, zip_basic_cap_info, + ZIP_RESET_MASK_CAP, qm->cap_ver); - err_info->fe = HZIP_CORE_INT_RAS_FE_ENB_MASK; - err_info->ce = hisi_qm_get_hw_info(qm, zip_basic_cap_info, ZIP_QM_CE_MASK_CAP, qm->cap_ver); - err_info->nfe = hisi_qm_get_hw_info(qm, zip_basic_cap_info, - ZIP_QM_NFE_MASK_CAP, qm->cap_ver); - err_info->ecc_2bits_mask = HZIP_CORE_INT_STATUS_M_ECC; - err_info->qm_shutdown_mask = hisi_qm_get_hw_info(qm, zip_basic_cap_info, - ZIP_QM_OOO_SHUTDOWN_MASK_CAP, qm->cap_ver); - err_info->dev_shutdown_mask = hisi_qm_get_hw_info(qm, zip_basic_cap_info, - ZIP_OOO_SHUTDOWN_MASK_CAP, qm->cap_ver); - err_info->qm_reset_mask = hisi_qm_get_hw_info(qm, zip_basic_cap_info, - ZIP_QM_RESET_MASK_CAP, qm->cap_ver); - err_info->dev_reset_mask = hisi_qm_get_hw_info(qm, zip_basic_cap_info, - ZIP_RESET_MASK_CAP, qm->cap_ver); err_info->msi_wr_port = HZIP_WR_PORT; err_info->acpi_rst = "ZRST"; } @@ -1302,6 +1334,8 @@ static const struct hisi_qm_err_ini hisi_zip_err_ini = { .get_err_result = hisi_zip_get_err_result, .set_priv_status = hisi_zip_set_priv_status, .dev_is_abnormal = hisi_zip_dev_is_abnormal, + .disable_axi_error = hisi_zip_disable_axi_error, + .enable_axi_error = hisi_zip_enable_axi_error, }; static int hisi_zip_pf_probe_init(struct hisi_zip *hisi_zip) diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index f2254ddc327c..c4690e365ade 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -104,6 +104,8 @@ #define UACCE_MODE_SVA 1 /* use uacce sva mode */ #define UACCE_MODE_DESC "0(default) means only register to crypto, 1 means both register to crypto and uacce" +#define QM_ECC_MBIT BIT(2) + enum qm_stop_reason { QM_NORMAL, QM_SOFT_RESET, @@ -240,19 +242,22 @@ enum acc_err_result { ACC_ERR_RECOVERED, }; -struct hisi_qm_err_info { - char *acpi_rst; - u32 msi_wr_port; +struct hisi_qm_err_mask { u32 ecc_2bits_mask; - u32 qm_shutdown_mask; - u32 dev_shutdown_mask; - u32 qm_reset_mask; - u32 dev_reset_mask; + u32 shutdown_mask; + u32 reset_mask; u32 ce; u32 nfe; u32 fe; }; +struct hisi_qm_err_info { + char *acpi_rst; + u32 msi_wr_port; + struct hisi_qm_err_mask qm_err; + struct hisi_qm_err_mask dev_err; +}; + struct hisi_qm_err_status { u32 is_qm_ecc_mbit; u32 is_dev_ecc_mbit; @@ -273,6 +278,8 @@ struct hisi_qm_err_ini { enum acc_err_result (*get_err_result)(struct hisi_qm *qm); bool (*dev_is_abnormal)(struct hisi_qm *qm); int (*set_priv_status)(struct hisi_qm *qm); + void (*disable_axi_error)(struct hisi_qm *qm); + void (*enable_axi_error)(struct hisi_qm *qm); }; struct hisi_qm_cap_info { -- cgit v1.2.3 From 2ee3a75e42081db3d951c0893f5d654f16d1c0e8 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 18 Jul 2025 11:26:15 +1000 Subject: nfsd: discard nfsd_file_get_local() This interface was deprecated by commit e6f7e1487ab5 ("nfs_localio: simplify interface to nfsd for getting nfsd_file") and is now unused. So let's remove it. Signed-off-by: NeilBrown Reviewed-by: Mike Snitzer Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 21 --------------------- fs/nfsd/filecache.h | 1 - fs/nfsd/localio.c | 1 - include/linux/nfslocalio.h | 1 - 4 files changed, 24 deletions(-) (limited to 'include/linux') diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 732abf6b92a5..75bc48031c07 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -391,27 +391,6 @@ nfsd_file_put_local(struct nfsd_file __rcu **pnf) return net; } -/** - * nfsd_file_get_local - get nfsd_file reference and reference to net - * @nf: nfsd_file of which to put the reference - * - * Get reference to both the nfsd_file and nf->nf_net. - */ -struct nfsd_file * -nfsd_file_get_local(struct nfsd_file *nf) -{ - struct net *net = nf->nf_net; - - if (nfsd_net_try_get(net)) { - nf = nfsd_file_get(nf); - if (!nf) - nfsd_net_put(net); - } else { - nf = NULL; - } - return nf; -} - /** * nfsd_file_file - get the backing file of an nfsd_file * @nf: nfsd_file of which to access the backing file. diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index 722b26c71e45..24ddf60e8434 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -63,7 +63,6 @@ int nfsd_file_cache_start_net(struct net *net); void nfsd_file_cache_shutdown_net(struct net *net); void nfsd_file_put(struct nfsd_file *nf); struct net *nfsd_file_put_local(struct nfsd_file __rcu **nf); -struct nfsd_file *nfsd_file_get_local(struct nfsd_file *nf); struct nfsd_file *nfsd_file_get(struct nfsd_file *nf); struct file *nfsd_file_file(struct nfsd_file *nf); void nfsd_file_close_inode_sync(struct inode *inode); diff --git a/fs/nfsd/localio.c b/fs/nfsd/localio.c index cb237f1b902a..269fa9391dc4 100644 --- a/fs/nfsd/localio.c +++ b/fs/nfsd/localio.c @@ -122,7 +122,6 @@ static const struct nfsd_localio_operations nfsd_localio_ops = { .nfsd_net_put = nfsd_net_put, .nfsd_open_local_fh = nfsd_open_local_fh, .nfsd_file_put_local = nfsd_file_put_local, - .nfsd_file_get_local = nfsd_file_get_local, .nfsd_file_file = nfsd_file_file, }; diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h index 5c7c92659e73..59ea90bd136b 100644 --- a/include/linux/nfslocalio.h +++ b/include/linux/nfslocalio.h @@ -63,7 +63,6 @@ struct nfsd_localio_operations { struct nfsd_file __rcu **pnf, const fmode_t); struct net *(*nfsd_file_put_local)(struct nfsd_file __rcu **); - struct nfsd_file *(*nfsd_file_get_local)(struct nfsd_file *); struct file *(*nfsd_file_file)(struct nfsd_file *); } ____cacheline_aligned; -- cgit v1.2.3 From c97b737ef8f10f28424822c139e3b22b9e9bcc2b Mon Sep 17 00:00:00 2001 From: Sergey Bashirov Date: Fri, 18 Jul 2025 11:09:56 +0300 Subject: sunrpc: Change ret code of xdr_stream_decode_opaque_fixed Since the opaque is fixed in size, the caller already knows how many bytes were decoded, on success. Thus, xdr_stream_decode_opaque_fixed() doesn't need to return that value. And, xdr_stream_decode_u32 and _u64 both return zero on success. This patch simplifies the caller's error checking to avoid potential integer promotion issues. Suggested-by: Dan Carpenter Signed-off-by: Sergey Bashirov Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdr.h | 4 ++-- .../sunrpc/xdrgen/templates/C/typedef/decoder/fixed_length_opaque.j2 | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 8a9ec617cf66..8d354015d762 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -721,7 +721,7 @@ xdr_stream_decode_u64(struct xdr_stream *xdr, __u64 *ptr) * @len: size of buffer pointed to by @ptr * * Return values: - * On success, returns size of object stored in @ptr + * %0 on success * %-EBADMSG on XDR buffer overflow */ static inline ssize_t @@ -732,7 +732,7 @@ xdr_stream_decode_opaque_fixed(struct xdr_stream *xdr, void *ptr, size_t len) if (unlikely(!p)) return -EBADMSG; xdr_decode_opaque_fixed(p, ptr, len); - return len; + return 0; } /** diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/fixed_length_opaque.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/fixed_length_opaque.j2 index 8b4ff08c49e5..bdc7bd24ffb1 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/fixed_length_opaque.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/fixed_length_opaque.j2 @@ -13,5 +13,5 @@ xdrgen_decode_{{ name }}(struct xdr_stream *xdr, {{ classifier }}{{ name }} *ptr {% if annotate %} /* (fixed-length opaque) */ {% endif %} - return xdr_stream_decode_opaque_fixed(xdr, ptr, {{ size }}) >= 0; + return xdr_stream_decode_opaque_fixed(xdr, ptr, {{ size }}) == 0; }; -- cgit v1.2.3 From afc5b36e29b95fbd31a60b9630d148857e5e513d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 30 Jul 2025 09:24:32 -0400 Subject: vfs: add ATTR_CTIME_SET flag When ATTR_ATIME_SET and ATTR_MTIME_SET are set in the ia_valid mask, the notify_change() logic takes that to mean that the request should set those values explicitly, and not override them with "now". With the advent of delegated timestamps, similar functionality is needed for the ctime. Add a ATTR_CTIME_SET flag, and use that to indicate that the ctime should be accepted as-is. Also, clean up the if statements to eliminate the extra negatives. In setattr_copy() and setattr_copy_mgtime() use inode_set_ctime_deleg() when ATTR_CTIME_SET is set, instead of basing the decision on ATTR_DELEG. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/attr.c | 44 +++++++++++++++++++------------------------- include/linux/fs.h | 1 + 2 files changed, 20 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/fs/attr.c b/fs/attr.c index 5425c1dbbff9..795f231d00e8 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -286,20 +286,12 @@ static void setattr_copy_mgtime(struct inode *inode, const struct iattr *attr) unsigned int ia_valid = attr->ia_valid; struct timespec64 now; - if (ia_valid & ATTR_CTIME) { - /* - * In the case of an update for a write delegation, we must respect - * the value in ia_ctime and not use the current time. - */ - if (ia_valid & ATTR_DELEG) - now = inode_set_ctime_deleg(inode, attr->ia_ctime); - else - now = inode_set_ctime_current(inode); - } else { - /* If ATTR_CTIME isn't set, then ATTR_MTIME shouldn't be either. */ - WARN_ON_ONCE(ia_valid & ATTR_MTIME); + if (ia_valid & ATTR_CTIME_SET) + now = inode_set_ctime_deleg(inode, attr->ia_ctime); + else if (ia_valid & ATTR_CTIME) + now = inode_set_ctime_current(inode); + else now = current_time(inode); - } if (ia_valid & ATTR_ATIME_SET) inode_set_atime_to_ts(inode, attr->ia_atime); @@ -359,12 +351,11 @@ void setattr_copy(struct mnt_idmap *idmap, struct inode *inode, inode_set_atime_to_ts(inode, attr->ia_atime); if (ia_valid & ATTR_MTIME) inode_set_mtime_to_ts(inode, attr->ia_mtime); - if (ia_valid & ATTR_CTIME) { - if (ia_valid & ATTR_DELEG) - inode_set_ctime_deleg(inode, attr->ia_ctime); - else - inode_set_ctime_to_ts(inode, attr->ia_ctime); - } + + if (ia_valid & ATTR_CTIME_SET) + inode_set_ctime_deleg(inode, attr->ia_ctime); + else if (ia_valid & ATTR_CTIME) + inode_set_ctime_to_ts(inode, attr->ia_ctime); } EXPORT_SYMBOL(setattr_copy); @@ -463,15 +454,18 @@ int notify_change(struct mnt_idmap *idmap, struct dentry *dentry, now = current_time(inode); - attr->ia_ctime = now; - if (!(ia_valid & ATTR_ATIME_SET)) - attr->ia_atime = now; - else + if (ia_valid & ATTR_ATIME_SET) attr->ia_atime = timestamp_truncate(attr->ia_atime, inode); - if (!(ia_valid & ATTR_MTIME_SET)) - attr->ia_mtime = now; else + attr->ia_atime = now; + if (ia_valid & ATTR_CTIME_SET) + attr->ia_ctime = timestamp_truncate(attr->ia_ctime, inode); + else + attr->ia_ctime = now; + if (ia_valid & ATTR_MTIME_SET) attr->ia_mtime = timestamp_truncate(attr->ia_mtime, inode); + else + attr->ia_mtime = now; if (ia_valid & ATTR_KILL_PRIV) { error = security_inode_need_killpriv(dentry); diff --git a/include/linux/fs.h b/include/linux/fs.h index 601d036a6c78..74f2bfc51926 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -238,6 +238,7 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define ATTR_ATIME_SET (1 << 7) #define ATTR_MTIME_SET (1 << 8) #define ATTR_FORCE (1 << 9) /* Not a change, but a change it */ +#define ATTR_CTIME_SET (1 << 10) #define ATTR_KILL_SUID (1 << 11) #define ATTR_KILL_SGID (1 << 12) #define ATTR_FILE (1 << 13) -- cgit v1.2.3 From 898374fdd7f06fa4c4a66e8be3135efeae6128d5 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 19 Aug 2025 14:04:02 -0400 Subject: nfsd: unregister with rpcbind when deleting a transport When a listener is added, a part of creation of transport also registers program/port with rpcbind. However, when the listener is removed, while transport goes away, rpcbind still has the entry for that port/type. When deleting the transport, unregister with rpcbind when appropriate. ---v2 created a new xpt_flag XPT_RPCB_UNREG to mark TCP and UDP transport and at xprt destroy send rpcbind unregister if flag set. Suggested-by: Chuck Lever Fixes: d093c9089260 ("nfsd: fix management of listener transports") Cc: stable@vger.kernel.org Signed-off-by: Olga Kornievskaia Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_xprt.h | 3 +++ net/sunrpc/svc_xprt.c | 13 +++++++++++++ net/sunrpc/svcsock.c | 2 ++ 3 files changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 369a89aea186..2b886f7eb295 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -104,6 +104,9 @@ enum { * it has access to. It is NOT counted * in ->sv_tmpcnt. */ + XPT_RPCB_UNREG, /* transport that needs unregistering + * with rpcbind (TCP, UDP) on destroy + */ }; /* diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 8b1837228799..b800d704d807 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -1014,6 +1014,19 @@ static void svc_delete_xprt(struct svc_xprt *xprt) struct svc_serv *serv = xprt->xpt_server; struct svc_deferred_req *dr; + /* unregister with rpcbind for when transport type is TCP or UDP. + */ + if (test_bit(XPT_RPCB_UNREG, &xprt->xpt_flags)) { + struct svc_sock *svsk = container_of(xprt, struct svc_sock, + sk_xprt); + struct socket *sock = svsk->sk_sock; + + if (svc_register(serv, xprt->xpt_net, sock->sk->sk_family, + sock->sk->sk_protocol, 0) < 0) + pr_warn("failed to unregister %s with rpcbind\n", + xprt->xpt_class->xcl_name); + } + if (test_and_set_bit(XPT_DEAD, &xprt->xpt_flags)) return; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index c0d5a27ba674..7b90abc5cf0e 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -836,6 +836,7 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv) /* data might have come in before data_ready set up */ set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); + set_bit(XPT_RPCB_UNREG, &svsk->sk_xprt.xpt_flags); /* make sure we get destination address info */ switch (svsk->sk_sk->sk_family) { @@ -1350,6 +1351,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) if (sk->sk_state == TCP_LISTEN) { strcpy(svsk->sk_xprt.xpt_remotebuf, "listener"); set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags); + set_bit(XPT_RPCB_UNREG, &svsk->sk_xprt.xpt_flags); sk->sk_data_ready = svc_tcp_listen_data_ready; set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); } else { -- cgit v1.2.3 From d73d06dac604043b94a5f18ebb6a69da1b867702 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 20 Aug 2025 10:27:28 -0400 Subject: SUNRPC: Move the svc_rpcb_cleanup() call sites Clean up: because svc_rpcb_cleanup() and svc_xprt_destroy_all() are always invoked in pairs, we can deduplicate code by moving the svc_rpcb_cleanup() call sites into svc_xprt_destroy_all(). Tested-by: Olga Kornievskaia Signed-off-by: Chuck Lever --- fs/lockd/svc.c | 6 ++---- fs/nfs/callback.c | 2 +- fs/nfsd/nfsctl.c | 2 +- fs/nfsd/nfssvc.c | 7 ++----- include/linux/sunrpc/svc_xprt.h | 3 ++- net/sunrpc/svc.c | 1 - net/sunrpc/svc_xprt.c | 7 ++++++- 7 files changed, 14 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index e80262a51884..d68afa196535 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -216,8 +216,7 @@ out_err: if (warned++ == 0) printk(KERN_WARNING "lockd_up: makesock failed, error=%d\n", err); - svc_xprt_destroy_all(serv, net); - svc_rpcb_cleanup(serv, net); + svc_xprt_destroy_all(serv, net, true); return err; } @@ -255,8 +254,7 @@ static void lockd_down_net(struct svc_serv *serv, struct net *net) nlm_shutdown_hosts_net(net); cancel_delayed_work_sync(&ln->grace_period_end); locks_end_grace(&ln->lockd_manager); - svc_xprt_destroy_all(serv, net); - svc_rpcb_cleanup(serv, net); + svc_xprt_destroy_all(serv, net, true); } } else { pr_err("%s: no users! net=%x\n", diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 511f80878809..c8b837006bb2 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -136,7 +136,7 @@ static void nfs_callback_down_net(u32 minorversion, struct svc_serv *serv, struc return; dprintk("NFS: destroy per-net callback data; net=%x\n", net->ns.inum); - svc_xprt_destroy_all(serv, net); + svc_xprt_destroy_all(serv, net, false); } static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index bc6b776fc657..63d52edcad72 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1993,7 +1993,7 @@ int nfsd_nl_listener_set_doit(struct sk_buff *skb, struct genl_info *info) * remaining listeners and recreate the list. */ if (delete) - svc_xprt_destroy_all(serv, net); + svc_xprt_destroy_all(serv, net, false); /* walk list of addrs again, open any that still don't exist */ nlmsg_for_each_attr_type(attr, NFSD_A_SERVER_SOCK_ADDR, info->nlhdr, diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 82b0111ac469..7057ddd7a0a8 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -535,16 +535,13 @@ void nfsd_destroy_serv(struct net *net) #endif } - svc_xprt_destroy_all(serv, net); - /* * write_ports can create the server without actually starting - * any threads--if we get shut down before any threads are + * any threads. If we get shut down before any threads are * started, then nfsd_destroy_serv will be run before any of this * other initialization has been done except the rpcb information. */ - svc_rpcb_cleanup(serv, net); - + svc_xprt_destroy_all(serv, net, true); nfsd_shutdown_net(net); svc_destroy(&serv); } diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 2b886f7eb295..da2a2531e110 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -168,7 +168,8 @@ int svc_xprt_create(struct svc_serv *serv, const char *xprt_name, struct net *net, const int family, const unsigned short port, int flags, const struct cred *cred); -void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net); +void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net, + bool unregister); void svc_xprt_received(struct svc_xprt *xprt); void svc_xprt_enqueue(struct svc_xprt *xprt); void svc_xprt_put(struct svc_xprt *xprt); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index fc70e13b1cb9..cb4010e2dc0c 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -436,7 +436,6 @@ void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net) svc_unregister(serv, net); rpcb_put_local(net); } -EXPORT_SYMBOL_GPL(svc_rpcb_cleanup); static int svc_uses_rpcbind(struct svc_serv *serv) { diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index b800d704d807..6973184ff667 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -1115,6 +1115,7 @@ static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net) * svc_xprt_destroy_all - Destroy transports associated with @serv * @serv: RPC service to be shut down * @net: target network namespace + * @unregister: true if it is OK to unregister the destroyed xprts * * Server threads may still be running (especially in the case where the * service is still running in other network namespaces). @@ -1127,7 +1128,8 @@ static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net) * threads, we may need to wait a little while and then check again to * see if they're done. */ -void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net) +void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net, + bool unregister) { int delay = 0; @@ -1137,6 +1139,9 @@ void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net) svc_clean_up_xprts(serv, net); msleep(delay++); } + + if (unregister) + svc_rpcb_cleanup(serv, net); } EXPORT_SYMBOL_GPL(svc_xprt_destroy_all); -- cgit v1.2.3 From 1abc1b212effe920f4729353880c8e03f1d76b4b Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 31 Jul 2025 13:23:40 +0100 Subject: coresight: Appropriately disable programming clocks Some CoreSight components have programming clocks (pclk) and are enabled using clk_get() and clk_prepare_enable(). However, in many cases, these clocks are not disabled when modules exit and only released by clk_put(). To fix the issue, this commit refactors programming clock by replacing clk_get() and clk_prepare_enable() with devm_clk_get_optional_enabled() for enabling APB clock. If the "apb_pclk" clock is not found, a NULL pointer is returned, and the function proceeds to attempt enabling the "apb" clock. Since ACPI platforms rely on firmware to manage clocks, returning a NULL pointer in this case leaves clock management to the firmware rather than the driver. This effectively avoids a clock imbalance issue during module removal - where the clock could be disabled twice: once during the ACPI runtime suspend and again during the devm resource release. Callers are updated to reuse the returned error value. With the change, programming clocks are managed as resources in driver model layer, allowing clock cleanup to be handled automatically. As a result, manual cleanup operations are no longer needed and are removed from the Coresight drivers. Fixes: 73d779a03a76 ("coresight: etm4x: Change etm4_platform_driver driver for MMIO devices") Reviewed-by: Yeoreum Yun Tested-by: James Clark Signed-off-by: Leo Yan Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250731-arm_cs_fix_clock_v4-v6-4-1dfe10bb3f6f@arm.com --- drivers/hwtracing/coresight/coresight-catu.c | 9 ++------- drivers/hwtracing/coresight/coresight-cpu-debug.c | 6 +----- drivers/hwtracing/coresight/coresight-ctcu-core.c | 10 ++-------- drivers/hwtracing/coresight/coresight-etm4x-core.c | 9 ++------- drivers/hwtracing/coresight/coresight-funnel.c | 6 +----- drivers/hwtracing/coresight/coresight-replicator.c | 6 +----- drivers/hwtracing/coresight/coresight-stm.c | 4 +--- drivers/hwtracing/coresight/coresight-tmc-core.c | 4 +--- drivers/hwtracing/coresight/coresight-tpiu.c | 4 +--- include/linux/coresight.h | 22 +++++++++------------- 10 files changed, 21 insertions(+), 59 deletions(-) (limited to 'include/linux') diff --git a/drivers/hwtracing/coresight/coresight-catu.c b/drivers/hwtracing/coresight/coresight-catu.c index af2a55f0c907..4c345ff2cff1 100644 --- a/drivers/hwtracing/coresight/coresight-catu.c +++ b/drivers/hwtracing/coresight/coresight-catu.c @@ -636,7 +636,7 @@ static int catu_platform_probe(struct platform_device *pdev) drvdata->pclk = coresight_get_enable_apb_pclk(&pdev->dev); if (IS_ERR(drvdata->pclk)) - return -ENODEV; + return PTR_ERR(drvdata->pclk); pm_runtime_get_noresume(&pdev->dev); pm_runtime_set_active(&pdev->dev); @@ -645,11 +645,8 @@ static int catu_platform_probe(struct platform_device *pdev) dev_set_drvdata(&pdev->dev, drvdata); ret = __catu_probe(&pdev->dev, res); pm_runtime_put(&pdev->dev); - if (ret) { + if (ret) pm_runtime_disable(&pdev->dev); - if (!IS_ERR_OR_NULL(drvdata->pclk)) - clk_put(drvdata->pclk); - } return ret; } @@ -663,8 +660,6 @@ static void catu_platform_remove(struct platform_device *pdev) __catu_remove(&pdev->dev); pm_runtime_disable(&pdev->dev); - if (!IS_ERR_OR_NULL(drvdata->pclk)) - clk_put(drvdata->pclk); } #ifdef CONFIG_PM diff --git a/drivers/hwtracing/coresight/coresight-cpu-debug.c b/drivers/hwtracing/coresight/coresight-cpu-debug.c index a871d997330b..e39dfb886688 100644 --- a/drivers/hwtracing/coresight/coresight-cpu-debug.c +++ b/drivers/hwtracing/coresight/coresight-cpu-debug.c @@ -699,7 +699,7 @@ static int debug_platform_probe(struct platform_device *pdev) drvdata->pclk = coresight_get_enable_apb_pclk(&pdev->dev); if (IS_ERR(drvdata->pclk)) - return -ENODEV; + return PTR_ERR(drvdata->pclk); dev_set_drvdata(&pdev->dev, drvdata); pm_runtime_get_noresume(&pdev->dev); @@ -710,8 +710,6 @@ static int debug_platform_probe(struct platform_device *pdev) if (ret) { pm_runtime_put_noidle(&pdev->dev); pm_runtime_disable(&pdev->dev); - if (!IS_ERR_OR_NULL(drvdata->pclk)) - clk_put(drvdata->pclk); } return ret; } @@ -725,8 +723,6 @@ static void debug_platform_remove(struct platform_device *pdev) __debug_remove(&pdev->dev); pm_runtime_disable(&pdev->dev); - if (!IS_ERR_OR_NULL(drvdata->pclk)) - clk_put(drvdata->pclk); } #ifdef CONFIG_ACPI diff --git a/drivers/hwtracing/coresight/coresight-ctcu-core.c b/drivers/hwtracing/coresight/coresight-ctcu-core.c index c6bafc96db96..de279efe3405 100644 --- a/drivers/hwtracing/coresight/coresight-ctcu-core.c +++ b/drivers/hwtracing/coresight/coresight-ctcu-core.c @@ -209,7 +209,7 @@ static int ctcu_probe(struct platform_device *pdev) drvdata->apb_clk = coresight_get_enable_apb_pclk(dev); if (IS_ERR(drvdata->apb_clk)) - return -ENODEV; + return PTR_ERR(drvdata->apb_clk); cfgs = of_device_get_match_data(dev); if (cfgs) { @@ -233,12 +233,8 @@ static int ctcu_probe(struct platform_device *pdev) desc.access = CSDEV_ACCESS_IOMEM(base); drvdata->csdev = coresight_register(&desc); - if (IS_ERR(drvdata->csdev)) { - if (!IS_ERR_OR_NULL(drvdata->apb_clk)) - clk_put(drvdata->apb_clk); - + if (IS_ERR(drvdata->csdev)) return PTR_ERR(drvdata->csdev); - } return 0; } @@ -275,8 +271,6 @@ static void ctcu_platform_remove(struct platform_device *pdev) ctcu_remove(pdev); pm_runtime_disable(&pdev->dev); - if (!IS_ERR_OR_NULL(drvdata->apb_clk)) - clk_put(drvdata->apb_clk); } #ifdef CONFIG_PM diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c index 81f20a167e00..4b98a7bf4cb7 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c @@ -2309,14 +2309,12 @@ static int etm4_probe_platform_dev(struct platform_device *pdev) drvdata->pclk = coresight_get_enable_apb_pclk(&pdev->dev); if (IS_ERR(drvdata->pclk)) - return -ENODEV; + return PTR_ERR(drvdata->pclk); if (res) { drvdata->base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(drvdata->base)) { - clk_put(drvdata->pclk); + if (IS_ERR(drvdata->base)) return PTR_ERR(drvdata->base); - } } dev_set_drvdata(&pdev->dev, drvdata); @@ -2423,9 +2421,6 @@ static void etm4_remove_platform_dev(struct platform_device *pdev) if (drvdata) etm4_remove_dev(drvdata); pm_runtime_disable(&pdev->dev); - - if (drvdata && !IS_ERR_OR_NULL(drvdata->pclk)) - clk_put(drvdata->pclk); } static const struct amba_id etm4_ids[] = { diff --git a/drivers/hwtracing/coresight/coresight-funnel.c b/drivers/hwtracing/coresight/coresight-funnel.c index b1922dbe9292..36fc4e991458 100644 --- a/drivers/hwtracing/coresight/coresight-funnel.c +++ b/drivers/hwtracing/coresight/coresight-funnel.c @@ -240,7 +240,7 @@ static int funnel_probe(struct device *dev, struct resource *res) drvdata->pclk = coresight_get_enable_apb_pclk(dev); if (IS_ERR(drvdata->pclk)) - return -ENODEV; + return PTR_ERR(drvdata->pclk); /* * Map the device base for dynamic-funnel, which has been @@ -284,8 +284,6 @@ static int funnel_probe(struct device *dev, struct resource *res) out_disable_clk: if (ret && !IS_ERR_OR_NULL(drvdata->atclk)) clk_disable_unprepare(drvdata->atclk); - if (ret && !IS_ERR_OR_NULL(drvdata->pclk)) - clk_disable_unprepare(drvdata->pclk); return ret; } @@ -355,8 +353,6 @@ static void funnel_platform_remove(struct platform_device *pdev) funnel_remove(&pdev->dev); pm_runtime_disable(&pdev->dev); - if (!IS_ERR_OR_NULL(drvdata->pclk)) - clk_put(drvdata->pclk); } static const struct of_device_id funnel_match[] = { diff --git a/drivers/hwtracing/coresight/coresight-replicator.c b/drivers/hwtracing/coresight/coresight-replicator.c index 06efd2b01a0f..6dd24eb10a94 100644 --- a/drivers/hwtracing/coresight/coresight-replicator.c +++ b/drivers/hwtracing/coresight/coresight-replicator.c @@ -247,7 +247,7 @@ static int replicator_probe(struct device *dev, struct resource *res) drvdata->pclk = coresight_get_enable_apb_pclk(dev); if (IS_ERR(drvdata->pclk)) - return -ENODEV; + return PTR_ERR(drvdata->pclk); /* * Map the device base for dynamic-replicator, which has been @@ -296,8 +296,6 @@ static int replicator_probe(struct device *dev, struct resource *res) out_disable_clk: if (ret && !IS_ERR_OR_NULL(drvdata->atclk)) clk_disable_unprepare(drvdata->atclk); - if (ret && !IS_ERR_OR_NULL(drvdata->pclk)) - clk_disable_unprepare(drvdata->pclk); return ret; } @@ -335,8 +333,6 @@ static void replicator_platform_remove(struct platform_device *pdev) replicator_remove(&pdev->dev); pm_runtime_disable(&pdev->dev); - if (!IS_ERR_OR_NULL(drvdata->pclk)) - clk_put(drvdata->pclk); } #ifdef CONFIG_PM diff --git a/drivers/hwtracing/coresight/coresight-stm.c b/drivers/hwtracing/coresight/coresight-stm.c index 464b0c85c3f7..f2de16e4d3b4 100644 --- a/drivers/hwtracing/coresight/coresight-stm.c +++ b/drivers/hwtracing/coresight/coresight-stm.c @@ -851,7 +851,7 @@ static int __stm_probe(struct device *dev, struct resource *res) drvdata->pclk = coresight_get_enable_apb_pclk(dev); if (IS_ERR(drvdata->pclk)) - return -ENODEV; + return PTR_ERR(drvdata->pclk); dev_set_drvdata(dev, drvdata); base = devm_ioremap_resource(dev, res); @@ -1033,8 +1033,6 @@ static void stm_platform_remove(struct platform_device *pdev) __stm_remove(&pdev->dev); pm_runtime_disable(&pdev->dev); - if (!IS_ERR_OR_NULL(drvdata->pclk)) - clk_put(drvdata->pclk); } #ifdef CONFIG_ACPI diff --git a/drivers/hwtracing/coresight/coresight-tmc-core.c b/drivers/hwtracing/coresight/coresight-tmc-core.c index 85ba037c27f7..519f22542807 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-core.c +++ b/drivers/hwtracing/coresight/coresight-tmc-core.c @@ -981,7 +981,7 @@ static int tmc_platform_probe(struct platform_device *pdev) drvdata->pclk = coresight_get_enable_apb_pclk(&pdev->dev); if (IS_ERR(drvdata->pclk)) - return -ENODEV; + return PTR_ERR(drvdata->pclk); dev_set_drvdata(&pdev->dev, drvdata); pm_runtime_get_noresume(&pdev->dev); @@ -1005,8 +1005,6 @@ static void tmc_platform_remove(struct platform_device *pdev) __tmc_remove(&pdev->dev); pm_runtime_disable(&pdev->dev); - if (!IS_ERR_OR_NULL(drvdata->pclk)) - clk_put(drvdata->pclk); } #ifdef CONFIG_PM diff --git a/drivers/hwtracing/coresight/coresight-tpiu.c b/drivers/hwtracing/coresight/coresight-tpiu.c index 3e0159288428..b2559c6fac6d 100644 --- a/drivers/hwtracing/coresight/coresight-tpiu.c +++ b/drivers/hwtracing/coresight/coresight-tpiu.c @@ -153,7 +153,7 @@ static int __tpiu_probe(struct device *dev, struct resource *res) drvdata->pclk = coresight_get_enable_apb_pclk(dev); if (IS_ERR(drvdata->pclk)) - return -ENODEV; + return PTR_ERR(drvdata->pclk); dev_set_drvdata(dev, drvdata); /* Validity for the resource is already checked by the AMBA core */ @@ -293,8 +293,6 @@ static void tpiu_platform_remove(struct platform_device *pdev) __tpiu_remove(&pdev->dev); pm_runtime_disable(&pdev->dev); - if (!IS_ERR_OR_NULL(drvdata->pclk)) - clk_put(drvdata->pclk); } #ifdef CONFIG_ACPI diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 4ac65c68bbf4..1e652e157841 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -6,6 +6,7 @@ #ifndef _LINUX_CORESIGHT_H #define _LINUX_CORESIGHT_H +#include #include #include #include @@ -480,26 +481,21 @@ static inline bool is_coresight_device(void __iomem *base) * Returns: * * clk - Clock is found and enabled - * NULL - clock is not found + * NULL - Clock is controlled by firmware (ACPI device only) * ERROR - Clock is found but failed to enable */ static inline struct clk *coresight_get_enable_apb_pclk(struct device *dev) { struct clk *pclk; - int ret; - pclk = clk_get(dev, "apb_pclk"); - if (IS_ERR(pclk)) { - pclk = clk_get(dev, "apb"); - if (IS_ERR(pclk)) - return NULL; - } + /* Firmware controls clocks for an ACPI device. */ + if (has_acpi_companion(dev)) + return NULL; + + pclk = devm_clk_get_optional_enabled(dev, "apb_pclk"); + if (!pclk) + pclk = devm_clk_get_optional_enabled(dev, "apb"); - ret = clk_prepare_enable(pclk); - if (ret) { - clk_put(pclk); - return ERR_PTR(ret); - } return pclk; } -- cgit v1.2.3 From d091c6312561821f216ced63a7ad17c946b6d335 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 31 Jul 2025 13:23:42 +0100 Subject: coresight: Avoid enable programming clock duplicately The programming clock is enabled by AMBA bus driver before a dynamic probe. As a result, a CoreSight driver may redundantly enable the same clock. To avoid this, add a check for device type and skip enabling the programming clock for AMBA devices. The returned NULL pointer will be tolerated by the drivers. Fixes: 73d779a03a76 ("coresight: etm4x: Change etm4_platform_driver driver for MMIO devices") Reviewed-by: Anshuman Khandual Reviewed-by: Yeoreum Yun Tested-by: James Clark Signed-off-by: Leo Yan Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250731-arm_cs_fix_clock_v4-v6-6-1dfe10bb3f6f@arm.com --- include/linux/coresight.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 1e652e157841..bb49080ec8f9 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -481,20 +481,23 @@ static inline bool is_coresight_device(void __iomem *base) * Returns: * * clk - Clock is found and enabled - * NULL - Clock is controlled by firmware (ACPI device only) + * NULL - Clock is controlled by firmware (ACPI device only) or when managed + * by the AMBA bus driver instead * ERROR - Clock is found but failed to enable */ static inline struct clk *coresight_get_enable_apb_pclk(struct device *dev) { - struct clk *pclk; + struct clk *pclk = NULL; /* Firmware controls clocks for an ACPI device. */ if (has_acpi_companion(dev)) return NULL; - pclk = devm_clk_get_optional_enabled(dev, "apb_pclk"); - if (!pclk) - pclk = devm_clk_get_optional_enabled(dev, "apb"); + if (!dev_is_amba(dev)) { + pclk = devm_clk_get_optional_enabled(dev, "apb_pclk"); + if (!pclk) + pclk = devm_clk_get_optional_enabled(dev, "apb"); + } return pclk; } -- cgit v1.2.3 From fbe7514a7912959e384acf108931ac1bfbb16466 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 31 Jul 2025 13:23:43 +0100 Subject: coresight: Consolidate clock enabling CoreSight drivers enable pclk and atclk conditionally. For example, pclk is only enabled in the static probe, while atclk is an optional clock that it is enabled for both dynamic and static probes, if it is present. In the current CoreSight drivers, these two clocks are initialized separately. This causes complex and duplicate codes. CoreSight drivers are refined so that clocks are initialized in one go. For this purpose, this commit renames coresight_get_enable_apb_pclk() to coresight_get_enable_clocks() and encapsulates clock initialization logic: - If a clock is initialized successfully, its clock pointer is assigned to the double pointer passed as an argument. - For ACPI devices, clocks are controlled by firmware, directly bail out. - Skip enabling pclk for an AMBA device. - If atclk is not found, the corresponding double pointer is set to NULL. The function returns Success (0) to guide callers can proceed with no error. - Otherwise, an error number is returned for failures. The function became complex, move it from the header to the CoreSight core layer and the symbol is exported. Added comments for recording details. Suggested-by: Suzuki K Poulose Reviewed-by: Anshuman Khandual Reviewed-by: Yeoreum Yun Tested-by: James Clark Signed-off-by: Leo Yan Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250731-arm_cs_fix_clock_v4-v6-7-1dfe10bb3f6f@arm.com --- drivers/hwtracing/coresight/coresight-catu.c | 10 ++--- drivers/hwtracing/coresight/coresight-core.c | 48 ++++++++++++++++++++++ drivers/hwtracing/coresight/coresight-cpu-debug.c | 8 ++-- drivers/hwtracing/coresight/coresight-ctcu-core.c | 8 ++-- drivers/hwtracing/coresight/coresight-etm4x-core.c | 11 ++--- drivers/hwtracing/coresight/coresight-funnel.c | 11 ++--- drivers/hwtracing/coresight/coresight-replicator.c | 11 ++--- drivers/hwtracing/coresight/coresight-stm.c | 9 ++-- drivers/hwtracing/coresight/coresight-tmc-core.c | 10 ++--- drivers/hwtracing/coresight/coresight-tpiu.c | 10 ++--- include/linux/coresight.h | 30 +------------- 11 files changed, 83 insertions(+), 83 deletions(-) (limited to 'include/linux') diff --git a/drivers/hwtracing/coresight/coresight-catu.c b/drivers/hwtracing/coresight/coresight-catu.c index 4c345ff2cff1..0f476a0cbd74 100644 --- a/drivers/hwtracing/coresight/coresight-catu.c +++ b/drivers/hwtracing/coresight/coresight-catu.c @@ -520,9 +520,9 @@ static int __catu_probe(struct device *dev, struct resource *res) struct coresight_platform_data *pdata = NULL; void __iomem *base; - drvdata->atclk = devm_clk_get_optional_enabled(dev, "atclk"); - if (IS_ERR(drvdata->atclk)) - return PTR_ERR(drvdata->atclk); + ret = coresight_get_enable_clocks(dev, &drvdata->pclk, &drvdata->atclk); + if (ret) + return ret; catu_desc.name = coresight_alloc_device_name(&catu_devs, dev); if (!catu_desc.name) @@ -634,10 +634,6 @@ static int catu_platform_probe(struct platform_device *pdev) if (!drvdata) return -ENOMEM; - drvdata->pclk = coresight_get_enable_apb_pclk(&pdev->dev); - if (IS_ERR(drvdata->pclk)) - return PTR_ERR(drvdata->pclk); - pm_runtime_get_noresume(&pdev->dev); pm_runtime_set_active(&pdev->dev); pm_runtime_enable(&pdev->dev); diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c index 1accd7cbd54b..3267192f0c1c 100644 --- a/drivers/hwtracing/coresight/coresight-core.c +++ b/drivers/hwtracing/coresight/coresight-core.c @@ -3,6 +3,7 @@ * Copyright (c) 2012, The Linux Foundation. All rights reserved. */ +#include #include #include #include @@ -1700,6 +1701,53 @@ int coresight_etm_get_trace_id(struct coresight_device *csdev, enum cs_mode mode } EXPORT_SYMBOL_GPL(coresight_etm_get_trace_id); +/* + * Attempt to find and enable programming clock (pclk) and trace clock (atclk) + * for the given device. + * + * For ACPI devices, clocks are controlled by firmware, so bail out early in + * this case. Also, skip enabling pclk if the clock is managed by the AMBA + * bus driver instead. + * + * atclk is an optional clock, it will be only enabled when it is existed. + * Otherwise, a NULL pointer will be returned to caller. + * + * Returns: '0' on Success; Error code otherwise. + */ +int coresight_get_enable_clocks(struct device *dev, struct clk **pclk, + struct clk **atclk) +{ + WARN_ON(!pclk); + + if (has_acpi_companion(dev)) + return 0; + + if (!dev_is_amba(dev)) { + /* + * "apb_pclk" is the default clock name for an Arm Primecell + * peripheral, while "apb" is used only by the CTCU driver. + * + * For easier maintenance, CoreSight drivers should use + * "apb_pclk" as the programming clock name. + */ + *pclk = devm_clk_get_optional_enabled(dev, "apb_pclk"); + if (!*pclk) + *pclk = devm_clk_get_optional_enabled(dev, "apb"); + if (IS_ERR(*pclk)) + return PTR_ERR(*pclk); + } + + /* Initialization of atclk is skipped if it is a NULL pointer. */ + if (atclk) { + *atclk = devm_clk_get_optional_enabled(dev, "atclk"); + if (IS_ERR(*atclk)) + return PTR_ERR(*atclk); + } + + return 0; +} +EXPORT_SYMBOL_GPL(coresight_get_enable_clocks); + MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Pratik Patel "); MODULE_AUTHOR("Mathieu Poirier "); diff --git a/drivers/hwtracing/coresight/coresight-cpu-debug.c b/drivers/hwtracing/coresight/coresight-cpu-debug.c index e39dfb886688..5f6db2fb95d4 100644 --- a/drivers/hwtracing/coresight/coresight-cpu-debug.c +++ b/drivers/hwtracing/coresight/coresight-cpu-debug.c @@ -566,6 +566,10 @@ static int __debug_probe(struct device *dev, struct resource *res) void __iomem *base; int ret; + ret = coresight_get_enable_clocks(dev, &drvdata->pclk, NULL); + if (ret) + return ret; + drvdata->cpu = coresight_get_cpu(dev); if (drvdata->cpu < 0) return drvdata->cpu; @@ -697,10 +701,6 @@ static int debug_platform_probe(struct platform_device *pdev) if (!drvdata) return -ENOMEM; - drvdata->pclk = coresight_get_enable_apb_pclk(&pdev->dev); - if (IS_ERR(drvdata->pclk)) - return PTR_ERR(drvdata->pclk); - dev_set_drvdata(&pdev->dev, drvdata); pm_runtime_get_noresume(&pdev->dev); pm_runtime_set_active(&pdev->dev); diff --git a/drivers/hwtracing/coresight/coresight-ctcu-core.c b/drivers/hwtracing/coresight/coresight-ctcu-core.c index de279efe3405..75b5114ef652 100644 --- a/drivers/hwtracing/coresight/coresight-ctcu-core.c +++ b/drivers/hwtracing/coresight/coresight-ctcu-core.c @@ -188,7 +188,7 @@ static int ctcu_probe(struct platform_device *pdev) const struct ctcu_config *cfgs; struct ctcu_drvdata *drvdata; void __iomem *base; - int i; + int i, ret; desc.name = coresight_alloc_device_name(&ctcu_devs, dev); if (!desc.name) @@ -207,9 +207,9 @@ static int ctcu_probe(struct platform_device *pdev) if (IS_ERR(base)) return PTR_ERR(base); - drvdata->apb_clk = coresight_get_enable_apb_pclk(dev); - if (IS_ERR(drvdata->apb_clk)) - return PTR_ERR(drvdata->apb_clk); + ret = coresight_get_enable_clocks(dev, &drvdata->apb_clk, NULL); + if (ret) + return ret; cfgs = of_device_get_match_data(dev); if (cfgs) { diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c index 4b98a7bf4cb7..020f070bf17d 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c @@ -2217,13 +2217,14 @@ static int etm4_probe(struct device *dev) struct csdev_access access = { 0 }; struct etm4_init_arg init_arg = { 0 }; struct etm4_init_arg *delayed; + int ret; if (WARN_ON(!drvdata)) return -ENOMEM; - drvdata->atclk = devm_clk_get_optional_enabled(dev, "atclk"); - if (IS_ERR(drvdata->atclk)) - return PTR_ERR(drvdata->atclk); + ret = coresight_get_enable_clocks(dev, &drvdata->pclk, &drvdata->atclk); + if (ret) + return ret; if (pm_save_enable == PARAM_PM_SAVE_FIRMWARE) pm_save_enable = coresight_loses_context_with_cpu(dev) ? @@ -2307,10 +2308,6 @@ static int etm4_probe_platform_dev(struct platform_device *pdev) if (!drvdata) return -ENOMEM; - drvdata->pclk = coresight_get_enable_apb_pclk(&pdev->dev); - if (IS_ERR(drvdata->pclk)) - return PTR_ERR(drvdata->pclk); - if (res) { drvdata->base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(drvdata->base)) diff --git a/drivers/hwtracing/coresight/coresight-funnel.c b/drivers/hwtracing/coresight/coresight-funnel.c index b044a4125310..02e0dc678a32 100644 --- a/drivers/hwtracing/coresight/coresight-funnel.c +++ b/drivers/hwtracing/coresight/coresight-funnel.c @@ -217,6 +217,7 @@ static int funnel_probe(struct device *dev, struct resource *res) struct coresight_platform_data *pdata = NULL; struct funnel_drvdata *drvdata; struct coresight_desc desc = { 0 }; + int ret; if (is_of_node(dev_fwnode(dev)) && of_device_is_compatible(dev->of_node, "arm,coresight-funnel")) @@ -230,13 +231,9 @@ static int funnel_probe(struct device *dev, struct resource *res) if (!drvdata) return -ENOMEM; - drvdata->atclk = devm_clk_get_optional_enabled(dev, "atclk"); - if (IS_ERR(drvdata->atclk)) - return PTR_ERR(drvdata->atclk); - - drvdata->pclk = coresight_get_enable_apb_pclk(dev); - if (IS_ERR(drvdata->pclk)) - return PTR_ERR(drvdata->pclk); + ret = coresight_get_enable_clocks(dev, &drvdata->pclk, &drvdata->atclk); + if (ret) + return ret; /* * Map the device base for dynamic-funnel, which has been diff --git a/drivers/hwtracing/coresight/coresight-replicator.c b/drivers/hwtracing/coresight/coresight-replicator.c index 9e8bd36e7a9a..f1bbd12e63e0 100644 --- a/drivers/hwtracing/coresight/coresight-replicator.c +++ b/drivers/hwtracing/coresight/coresight-replicator.c @@ -223,6 +223,7 @@ static int replicator_probe(struct device *dev, struct resource *res) struct replicator_drvdata *drvdata; struct coresight_desc desc = { 0 }; void __iomem *base; + int ret; if (is_of_node(dev_fwnode(dev)) && of_device_is_compatible(dev->of_node, "arm,coresight-replicator")) @@ -237,13 +238,9 @@ static int replicator_probe(struct device *dev, struct resource *res) if (!drvdata) return -ENOMEM; - drvdata->atclk = devm_clk_get_optional_enabled(dev, "atclk"); - if (IS_ERR(drvdata->atclk)) - return PTR_ERR(drvdata->atclk); - - drvdata->pclk = coresight_get_enable_apb_pclk(dev); - if (IS_ERR(drvdata->pclk)) - return PTR_ERR(drvdata->pclk); + ret = coresight_get_enable_clocks(dev, &drvdata->pclk, &drvdata->atclk); + if (ret) + return ret; /* * Map the device base for dynamic-replicator, which has been diff --git a/drivers/hwtracing/coresight/coresight-stm.c b/drivers/hwtracing/coresight/coresight-stm.c index 275d67b91dfd..23ba33ac4d2e 100644 --- a/drivers/hwtracing/coresight/coresight-stm.c +++ b/drivers/hwtracing/coresight/coresight-stm.c @@ -842,13 +842,10 @@ static int __stm_probe(struct device *dev, struct resource *res) if (!drvdata) return -ENOMEM; - drvdata->atclk = devm_clk_get_optional_enabled(dev, "atclk"); - if (IS_ERR(drvdata->atclk)) - return PTR_ERR(drvdata->atclk); + ret = coresight_get_enable_clocks(dev, &drvdata->pclk, &drvdata->atclk); + if (ret) + return ret; - drvdata->pclk = coresight_get_enable_apb_pclk(dev); - if (IS_ERR(drvdata->pclk)) - return PTR_ERR(drvdata->pclk); dev_set_drvdata(dev, drvdata); base = devm_ioremap_resource(dev, res); diff --git a/drivers/hwtracing/coresight/coresight-tmc-core.c b/drivers/hwtracing/coresight/coresight-tmc-core.c index 519f22542807..25c987e2d114 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-core.c +++ b/drivers/hwtracing/coresight/coresight-tmc-core.c @@ -779,9 +779,9 @@ static int __tmc_probe(struct device *dev, struct resource *res) struct coresight_desc desc = { 0 }; struct coresight_dev_list *dev_list = NULL; - drvdata->atclk = devm_clk_get_optional_enabled(dev, "atclk"); - if (IS_ERR(drvdata->atclk)) - return PTR_ERR(drvdata->atclk); + ret = coresight_get_enable_clocks(dev, &drvdata->pclk, &drvdata->atclk); + if (ret) + return ret; ret = -ENOMEM; @@ -979,10 +979,6 @@ static int tmc_platform_probe(struct platform_device *pdev) if (!drvdata) return -ENOMEM; - drvdata->pclk = coresight_get_enable_apb_pclk(&pdev->dev); - if (IS_ERR(drvdata->pclk)) - return PTR_ERR(drvdata->pclk); - dev_set_drvdata(&pdev->dev, drvdata); pm_runtime_get_noresume(&pdev->dev); pm_runtime_set_active(&pdev->dev); diff --git a/drivers/hwtracing/coresight/coresight-tpiu.c b/drivers/hwtracing/coresight/coresight-tpiu.c index 8d6179c83e5d..5e47d761e1c4 100644 --- a/drivers/hwtracing/coresight/coresight-tpiu.c +++ b/drivers/hwtracing/coresight/coresight-tpiu.c @@ -132,6 +132,7 @@ static int __tpiu_probe(struct device *dev, struct resource *res) struct coresight_platform_data *pdata = NULL; struct tpiu_drvdata *drvdata; struct coresight_desc desc = { 0 }; + int ret; desc.name = coresight_alloc_device_name(&tpiu_devs, dev); if (!desc.name) @@ -143,13 +144,10 @@ static int __tpiu_probe(struct device *dev, struct resource *res) spin_lock_init(&drvdata->spinlock); - drvdata->atclk = devm_clk_get_optional_enabled(dev, "atclk"); - if (IS_ERR(drvdata->atclk)) - return PTR_ERR(drvdata->atclk); + ret = coresight_get_enable_clocks(dev, &drvdata->pclk, &drvdata->atclk); + if (ret) + return ret; - drvdata->pclk = coresight_get_enable_apb_pclk(dev); - if (IS_ERR(drvdata->pclk)) - return PTR_ERR(drvdata->pclk); dev_set_drvdata(dev, drvdata); /* Validity for the resource is already checked by the AMBA core */ diff --git a/include/linux/coresight.h b/include/linux/coresight.h index bb49080ec8f9..6de59ce8ef8c 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -6,7 +6,6 @@ #ifndef _LINUX_CORESIGHT_H #define _LINUX_CORESIGHT_H -#include #include #include #include @@ -475,33 +474,6 @@ static inline bool is_coresight_device(void __iomem *base) return cid == CORESIGHT_CID; } -/* - * Attempt to find and enable "APB clock" for the given device - * - * Returns: - * - * clk - Clock is found and enabled - * NULL - Clock is controlled by firmware (ACPI device only) or when managed - * by the AMBA bus driver instead - * ERROR - Clock is found but failed to enable - */ -static inline struct clk *coresight_get_enable_apb_pclk(struct device *dev) -{ - struct clk *pclk = NULL; - - /* Firmware controls clocks for an ACPI device. */ - if (has_acpi_companion(dev)) - return NULL; - - if (!dev_is_amba(dev)) { - pclk = devm_clk_get_optional_enabled(dev, "apb_pclk"); - if (!pclk) - pclk = devm_clk_get_optional_enabled(dev, "apb"); - } - - return pclk; -} - #define CORESIGHT_PIDRn(i) (0xFE0 + ((i) * 4)) static inline u32 coresight_get_pid(struct csdev_access *csa) @@ -732,4 +704,6 @@ void coresight_remove_driver(struct amba_driver *amba_drv, struct platform_driver *pdev_drv); int coresight_etm_get_trace_id(struct coresight_device *csdev, enum cs_mode mode, struct coresight_device *sink); +int coresight_get_enable_clocks(struct device *dev, struct clk **pclk, + struct clk **atclk); #endif /* _LINUX_COREISGHT_H */ -- cgit v1.2.3 From ade2105e748f85eb026d26701091213855aea633 Mon Sep 17 00:00:00 2001 From: Elijah Wright Date: Wed, 20 Aug 2025 22:39:07 -0700 Subject: tracing: Move buffer in trace_seq to end of struct TRACE_SEQ_BUFFER_SIZE is dependent on the architecture for its size. on 64-bit systems, it is 8148 bytes. forced 8-byte alignment in size_t and seq_buf means that trace_seq is 8200 bytes on 64-bit systems. moving the buffer to the end of the struct fixes the issue. there shouldn't be any side effects, i.e. pointer arithmetic on trace_seq Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250821053917.23301-1-git@elijahs.space Signed-off-by: Elijah Wright Signed-off-by: Steven Rostedt (Google) --- include/linux/trace_seq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h index a93ed5ac3226..557780fe1c77 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -21,10 +21,10 @@ (sizeof(struct seq_buf) + sizeof(size_t) + sizeof(int))) struct trace_seq { - char buffer[TRACE_SEQ_BUFFER_SIZE]; struct seq_buf seq; size_t readpos; int full; + char buffer[TRACE_SEQ_BUFFER_SIZE]; }; static inline void -- cgit v1.2.3 From 340974c4f709ce8142a672ab11f1889dff94d6dc Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 18 Aug 2025 09:39:00 +0530 Subject: mailbox: Add common header for RPMI messages sent via mailbox The RPMI based mailbox controller drivers and mailbox clients need to share defines related to RPMI messages over mailbox interface so add a common header for this purpose. Acked-by: Jassi Brar Co-developed-by: Rahul Pathak Signed-off-by: Rahul Pathak Signed-off-by: Anup Patel Link: https://lore.kernel.org/r/20250818040920.272664-5-apatel@ventanamicro.com Signed-off-by: Paul Walmsley --- include/linux/mailbox/riscv-rpmi-message.h | 214 +++++++++++++++++++++++++++++ 1 file changed, 214 insertions(+) create mode 100644 include/linux/mailbox/riscv-rpmi-message.h (limited to 'include/linux') diff --git a/include/linux/mailbox/riscv-rpmi-message.h b/include/linux/mailbox/riscv-rpmi-message.h new file mode 100644 index 000000000000..c3a98fc12c0a --- /dev/null +++ b/include/linux/mailbox/riscv-rpmi-message.h @@ -0,0 +1,214 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2025 Ventana Micro Systems Inc. */ + +#ifndef _LINUX_RISCV_RPMI_MESSAGE_H_ +#define _LINUX_RISCV_RPMI_MESSAGE_H_ + +#include +#include +#include +#include + +/* RPMI version encode/decode macros */ +#define RPMI_VER_MAJOR(__ver) upper_16_bits(__ver) +#define RPMI_VER_MINOR(__ver) lower_16_bits(__ver) +#define RPMI_MKVER(__maj, __min) (((u32)(__maj) << 16) | (u16)(__min)) + +/* RPMI message header */ +struct rpmi_message_header { + __le16 servicegroup_id; + u8 service_id; + u8 flags; + __le16 datalen; + __le16 token; +}; + +/* RPMI message */ +struct rpmi_message { + struct rpmi_message_header header; + u8 data[]; +}; + +/* RPMI notification event */ +struct rpmi_notification_event { + __le16 event_datalen; + u8 event_id; + u8 reserved; + u8 event_data[]; +}; + +/* RPMI error codes */ +enum rpmi_error_codes { + RPMI_SUCCESS = 0, + RPMI_ERR_FAILED = -1, + RPMI_ERR_NOTSUPP = -2, + RPMI_ERR_INVALID_PARAM = -3, + RPMI_ERR_DENIED = -4, + RPMI_ERR_INVALID_ADDR = -5, + RPMI_ERR_ALREADY = -6, + RPMI_ERR_EXTENSION = -7, + RPMI_ERR_HW_FAULT = -8, + RPMI_ERR_BUSY = -9, + RPMI_ERR_INVALID_STATE = -10, + RPMI_ERR_BAD_RANGE = -11, + RPMI_ERR_TIMEOUT = -12, + RPMI_ERR_IO = -13, + RPMI_ERR_NO_DATA = -14, + RPMI_ERR_RESERVED_START = -15, + RPMI_ERR_RESERVED_END = -127, + RPMI_ERR_VENDOR_START = -128, +}; + +static inline int rpmi_to_linux_error(int rpmi_error) +{ + switch (rpmi_error) { + case RPMI_SUCCESS: + return 0; + case RPMI_ERR_INVALID_PARAM: + case RPMI_ERR_BAD_RANGE: + case RPMI_ERR_INVALID_STATE: + return -EINVAL; + case RPMI_ERR_DENIED: + return -EPERM; + case RPMI_ERR_INVALID_ADDR: + case RPMI_ERR_HW_FAULT: + return -EFAULT; + case RPMI_ERR_ALREADY: + return -EALREADY; + case RPMI_ERR_BUSY: + return -EBUSY; + case RPMI_ERR_TIMEOUT: + return -ETIMEDOUT; + case RPMI_ERR_IO: + return -ECOMM; + case RPMI_ERR_FAILED: + case RPMI_ERR_NOTSUPP: + case RPMI_ERR_NO_DATA: + case RPMI_ERR_EXTENSION: + default: + return -EOPNOTSUPP; + } +} + +/* RPMI Linux mailbox attribute IDs */ +enum rpmi_mbox_attribute_id { + RPMI_MBOX_ATTR_SPEC_VERSION, + RPMI_MBOX_ATTR_MAX_MSG_DATA_SIZE, + RPMI_MBOX_ATTR_SERVICEGROUP_ID, + RPMI_MBOX_ATTR_SERVICEGROUP_VERSION, + RPMI_MBOX_ATTR_IMPL_ID, + RPMI_MBOX_ATTR_IMPL_VERSION, + RPMI_MBOX_ATTR_MAX_ID +}; + +/* RPMI Linux mailbox message types */ +enum rpmi_mbox_message_type { + RPMI_MBOX_MSG_TYPE_GET_ATTRIBUTE, + RPMI_MBOX_MSG_TYPE_SET_ATTRIBUTE, + RPMI_MBOX_MSG_TYPE_SEND_WITH_RESPONSE, + RPMI_MBOX_MSG_TYPE_SEND_WITHOUT_RESPONSE, + RPMI_MBOX_MSG_TYPE_NOTIFICATION_EVENT, + RPMI_MBOX_MSG_MAX_TYPE +}; + +/* RPMI Linux mailbox message instance */ +struct rpmi_mbox_message { + enum rpmi_mbox_message_type type; + union { + struct { + enum rpmi_mbox_attribute_id id; + u32 value; + } attr; + + struct { + u32 service_id; + void *request; + unsigned long request_len; + void *response; + unsigned long max_response_len; + unsigned long out_response_len; + } data; + + struct { + u16 event_datalen; + u8 event_id; + u8 *event_data; + } notif; + }; + int error; +}; + +/* RPMI Linux mailbox message helper routines */ +static inline void rpmi_mbox_init_get_attribute(struct rpmi_mbox_message *msg, + enum rpmi_mbox_attribute_id id) +{ + msg->type = RPMI_MBOX_MSG_TYPE_GET_ATTRIBUTE; + msg->attr.id = id; + msg->attr.value = 0; + msg->error = 0; +} + +static inline void rpmi_mbox_init_set_attribute(struct rpmi_mbox_message *msg, + enum rpmi_mbox_attribute_id id, + u32 value) +{ + msg->type = RPMI_MBOX_MSG_TYPE_SET_ATTRIBUTE; + msg->attr.id = id; + msg->attr.value = value; + msg->error = 0; +} + +static inline void rpmi_mbox_init_send_with_response(struct rpmi_mbox_message *msg, + u32 service_id, + void *request, + unsigned long request_len, + void *response, + unsigned long max_response_len) +{ + msg->type = RPMI_MBOX_MSG_TYPE_SEND_WITH_RESPONSE; + msg->data.service_id = service_id; + msg->data.request = request; + msg->data.request_len = request_len; + msg->data.response = response; + msg->data.max_response_len = max_response_len; + msg->data.out_response_len = 0; + msg->error = 0; +} + +static inline void rpmi_mbox_init_send_without_response(struct rpmi_mbox_message *msg, + u32 service_id, + void *request, + unsigned long request_len) +{ + msg->type = RPMI_MBOX_MSG_TYPE_SEND_WITHOUT_RESPONSE; + msg->data.service_id = service_id; + msg->data.request = request; + msg->data.request_len = request_len; + msg->data.response = NULL; + msg->data.max_response_len = 0; + msg->data.out_response_len = 0; + msg->error = 0; +} + +static inline void *rpmi_mbox_get_msg_response(struct rpmi_mbox_message *msg) +{ + return msg ? msg->data.response : NULL; +} + +static inline int rpmi_mbox_send_message(struct mbox_chan *chan, + struct rpmi_mbox_message *msg) +{ + int ret; + + /* Send message for the underlying mailbox channel */ + ret = mbox_send_message(chan, msg); + if (ret < 0) + return ret; + + /* Explicitly signal txdone for mailbox channel */ + ret = msg->error; + mbox_client_txdone(chan, ret); + return ret; +} + +#endif /* _LINUX_RISCV_RPMI_MESSAGE_H_ */ -- cgit v1.2.3 From ba879dfc0574878f3e08f217b2b4fdf845c426c0 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 18 Aug 2025 09:39:01 +0530 Subject: mailbox: Allow controller specific mapping using fwnode Introduce optional fw_node() callback which allows a mailbox controller driver to provide controller specific mapping using fwnode. The Linux OF framework already implements fwnode operations for the Linux DD framework so the fw_xlate() callback works fine with device tree as well. Acked-by: Jassi Brar Reviewed-by: Andy Shevchenko Signed-off-by: Anup Patel Link: https://lore.kernel.org/r/20250818040920.272664-6-apatel@ventanamicro.com Signed-off-by: Paul Walmsley --- drivers/mailbox/mailbox.c | 65 +++++++++++++++++++++++--------------- include/linux/mailbox_controller.h | 3 ++ 2 files changed, 43 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c index 5cd8ae222073..2acc6ec229a4 100644 --- a/drivers/mailbox/mailbox.c +++ b/drivers/mailbox/mailbox.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "mailbox.h" @@ -383,34 +384,56 @@ EXPORT_SYMBOL_GPL(mbox_bind_client); */ struct mbox_chan *mbox_request_channel(struct mbox_client *cl, int index) { - struct device *dev = cl->dev; + struct fwnode_reference_args fwspec; + struct fwnode_handle *fwnode; struct mbox_controller *mbox; struct of_phandle_args spec; struct mbox_chan *chan; + struct device *dev; + unsigned int i; int ret; - if (!dev || !dev->of_node) { - pr_debug("%s: No owner device node\n", __func__); + dev = cl->dev; + if (!dev) { + pr_debug("No owner device\n"); return ERR_PTR(-ENODEV); } - ret = of_parse_phandle_with_args(dev->of_node, "mboxes", "#mbox-cells", - index, &spec); + fwnode = dev_fwnode(dev); + if (!fwnode) { + dev_dbg(dev, "No owner fwnode\n"); + return ERR_PTR(-ENODEV); + } + + ret = fwnode_property_get_reference_args(fwnode, "mboxes", "#mbox-cells", + 0, index, &fwspec); if (ret) { - dev_err(dev, "%s: can't parse \"mboxes\" property\n", __func__); + dev_err(dev, "%s: can't parse \"%s\" property\n", __func__, "mboxes"); return ERR_PTR(ret); } + spec.np = to_of_node(fwspec.fwnode); + spec.args_count = fwspec.nargs; + for (i = 0; i < spec.args_count; i++) + spec.args[i] = fwspec.args[i]; + scoped_guard(mutex, &con_mutex) { chan = ERR_PTR(-EPROBE_DEFER); - list_for_each_entry(mbox, &mbox_cons, node) - if (mbox->dev->of_node == spec.np) { - chan = mbox->of_xlate(mbox, &spec); - if (!IS_ERR(chan)) - break; + list_for_each_entry(mbox, &mbox_cons, node) { + if (device_match_fwnode(mbox->dev, fwspec.fwnode)) { + if (mbox->fw_xlate) { + chan = mbox->fw_xlate(mbox, &fwspec); + if (!IS_ERR(chan)) + break; + } else if (mbox->of_xlate) { + chan = mbox->of_xlate(mbox, &spec); + if (!IS_ERR(chan)) + break; + } } + } - of_node_put(spec.np); + fwnode_handle_put(fwspec.fwnode); if (IS_ERR(chan)) return chan; @@ -427,15 +450,8 @@ EXPORT_SYMBOL_GPL(mbox_request_channel); struct mbox_chan *mbox_request_channel_byname(struct mbox_client *cl, const char *name) { - struct device_node *np = cl->dev->of_node; - int index; - - if (!np) { - dev_err(cl->dev, "%s() currently only supports DT\n", __func__); - return ERR_PTR(-EINVAL); - } + int index = device_property_match_string(cl->dev, "mbox-names", name); - index = of_property_match_string(np, "mbox-names", name); if (index < 0) { dev_err(cl->dev, "%s() could not locate channel named \"%s\"\n", __func__, name); @@ -470,9 +486,8 @@ void mbox_free_channel(struct mbox_chan *chan) } EXPORT_SYMBOL_GPL(mbox_free_channel); -static struct mbox_chan * -of_mbox_index_xlate(struct mbox_controller *mbox, - const struct of_phandle_args *sp) +static struct mbox_chan *fw_mbox_index_xlate(struct mbox_controller *mbox, + const struct fwnode_reference_args *sp) { int ind = sp->args[0]; @@ -523,8 +538,8 @@ int mbox_controller_register(struct mbox_controller *mbox) spin_lock_init(&chan->lock); } - if (!mbox->of_xlate) - mbox->of_xlate = of_mbox_index_xlate; + if (!mbox->fw_xlate && !mbox->of_xlate) + mbox->fw_xlate = fw_mbox_index_xlate; scoped_guard(mutex, &con_mutex) list_add_tail(&mbox->node, &mbox_cons); diff --git a/include/linux/mailbox_controller.h b/include/linux/mailbox_controller.h index ad01c4082358..80a427c7ca29 100644 --- a/include/linux/mailbox_controller.h +++ b/include/linux/mailbox_controller.h @@ -66,6 +66,7 @@ struct mbox_chan_ops { * no interrupt rises. Ignored if 'txdone_irq' is set. * @txpoll_period: If 'txdone_poll' is in effect, the API polls for * last TX's status after these many millisecs + * @fw_xlate: Controller driver specific mapping of channel via fwnode * @of_xlate: Controller driver specific mapping of channel via DT * @poll_hrt: API private. hrtimer used to poll for TXDONE on all * channels. @@ -79,6 +80,8 @@ struct mbox_controller { bool txdone_irq; bool txdone_poll; unsigned txpoll_period; + struct mbox_chan *(*fw_xlate)(struct mbox_controller *mbox, + const struct fwnode_reference_args *sp); struct mbox_chan *(*of_xlate)(struct mbox_controller *mbox, const struct of_phandle_args *sp); /* Internal to API */ -- cgit v1.2.3 From 6f01c24f3a7525e953d514367a6d91b39c8f1f6a Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 18 Aug 2025 09:39:02 +0530 Subject: byteorder: Add memcpy_to_le32() and memcpy_from_le32() Add common memcpy APIs for copying u32 array to/from __le32 array. Suggested-by: Andy Shevchenko Reviewed-by: Andy Shevchenko Signed-off-by: Anup Patel Reviewed-by: Linus Walleij Acked-by: Jassi Brar Link: https://lore.kernel.org/r/20250818040920.272664-7-apatel@ventanamicro.com Signed-off-by: Paul Walmsley --- include/linux/byteorder/generic.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/byteorder/generic.h b/include/linux/byteorder/generic.h index c9a4c96c9943..b3705e8bbe2b 100644 --- a/include/linux/byteorder/generic.h +++ b/include/linux/byteorder/generic.h @@ -173,6 +173,22 @@ static inline void cpu_to_le32_array(u32 *buf, unsigned int words) } } +static inline void memcpy_from_le32(u32 *dst, const __le32 *src, size_t words) +{ + size_t i; + + for (i = 0; i < words; i++) + dst[i] = le32_to_cpu(src[i]); +} + +static inline void memcpy_to_le32(__le32 *dst, const u32 *src, size_t words) +{ + size_t i; + + for (i = 0; i < words; i++) + dst[i] = cpu_to_le32(src[i]); +} + static inline void be16_add_cpu(__be16 *var, u16 val) { *var = cpu_to_be16(be16_to_cpu(*var) + val); -- cgit v1.2.3 From 5ba9f520f41a33c99fd5d1eb81b5650ed3517b88 Mon Sep 17 00:00:00 2001 From: Rahul Pathak Date: Mon, 18 Aug 2025 09:39:06 +0530 Subject: clk: Add clock driver for the RISC-V RPMI clock service group The RPMI specification defines a clock service group which can be accessed via SBI MPXY extension or dedicated S-mode RPMI transport. Add mailbox client based clock driver for the RISC-V RPMI clock service group. Reviewed-by: Stephen Boyd Reviewed-by: Andy Shevchenko Co-developed-by: Anup Patel Signed-off-by: Anup Patel Signed-off-by: Rahul Pathak Link: https://lore.kernel.org/r/20250818040920.272664-11-apatel@ventanamicro.com [pjw@kernel.org: converted rpmi_clkrate_u64 macro to a function; replaced bare constant with a macro] Signed-off-by: Paul Walmsley --- drivers/clk/Kconfig | 8 + drivers/clk/Makefile | 1 + drivers/clk/clk-rpmi.c | 620 +++++++++++++++++++++++++++++ include/linux/mailbox/riscv-rpmi-message.h | 16 + 4 files changed, 645 insertions(+) create mode 100644 drivers/clk/clk-rpmi.c (limited to 'include/linux') diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig index 4d56475f94fc..4d51fa589c1e 100644 --- a/drivers/clk/Kconfig +++ b/drivers/clk/Kconfig @@ -501,6 +501,14 @@ config COMMON_CLK_SP7021 Not all features of the PLL are currently supported by the driver. +config COMMON_CLK_RPMI + tristate "Clock driver based on RISC-V RPMI" + depends on MAILBOX + default RISCV + help + Support for clocks based on the clock service group defined by + the RISC-V platform management interface (RPMI) specification. + source "drivers/clk/actions/Kconfig" source "drivers/clk/analogbits/Kconfig" source "drivers/clk/baikal-t1/Kconfig" diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile index 18ed29cfdc11..b74a1767ca27 100644 --- a/drivers/clk/Makefile +++ b/drivers/clk/Makefile @@ -86,6 +86,7 @@ obj-$(CONFIG_COMMON_CLK_PWM) += clk-pwm.o obj-$(CONFIG_CLK_QORIQ) += clk-qoriq.o obj-$(CONFIG_COMMON_CLK_RK808) += clk-rk808.o obj-$(CONFIG_COMMON_CLK_RP1) += clk-rp1.o +obj-$(CONFIG_COMMON_CLK_RPMI) += clk-rpmi.o obj-$(CONFIG_COMMON_CLK_HI655X) += clk-hi655x.o obj-$(CONFIG_COMMON_CLK_S2MPS11) += clk-s2mps11.o obj-$(CONFIG_COMMON_CLK_SCMI) += clk-scmi.o diff --git a/drivers/clk/clk-rpmi.c b/drivers/clk/clk-rpmi.c new file mode 100644 index 000000000000..921296aafa68 --- /dev/null +++ b/drivers/clk/clk-rpmi.c @@ -0,0 +1,620 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * RISC-V MPXY Based Clock Driver + * + * Copyright (C) 2025 Ventana Micro Systems Ltd. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define RPMI_CLK_DISCRETE_MAX_NUM_RATES 16 +#define RPMI_CLK_NAME_LEN 16 + +#define to_rpmi_clk(clk) container_of(clk, struct rpmi_clk, hw) + +enum rpmi_clk_config { + RPMI_CLK_DISABLE = 0, + RPMI_CLK_ENABLE = 1, + RPMI_CLK_CONFIG_MAX_IDX +}; + +#define RPMI_CLK_TYPE_MASK GENMASK(1, 0) +enum rpmi_clk_type { + RPMI_CLK_DISCRETE = 0, + RPMI_CLK_LINEAR = 1, + RPMI_CLK_TYPE_MAX_IDX +}; + +struct rpmi_clk_context { + struct device *dev; + struct mbox_chan *chan; + struct mbox_client client; + u32 max_msg_data_size; +}; + +/* + * rpmi_clk_rates represents the rates format + * as specified by the RPMI specification. + * No other data format (e.g., struct linear_range) + * is required to avoid to and from conversion. + */ +union rpmi_clk_rates { + u64 discrete[RPMI_CLK_DISCRETE_MAX_NUM_RATES]; + struct { + u64 min; + u64 max; + u64 step; + } linear; +}; + +struct rpmi_clk { + struct rpmi_clk_context *context; + u32 id; + u32 num_rates; + u32 transition_latency; + enum rpmi_clk_type type; + union rpmi_clk_rates *rates; + char name[RPMI_CLK_NAME_LEN]; + struct clk_hw hw; +}; + +struct rpmi_clk_rate_discrete { + __le32 lo; + __le32 hi; +}; + +struct rpmi_clk_rate_linear { + __le32 min_lo; + __le32 min_hi; + __le32 max_lo; + __le32 max_hi; + __le32 step_lo; + __le32 step_hi; +}; + +struct rpmi_get_num_clocks_rx { + __le32 status; + __le32 num_clocks; +}; + +struct rpmi_get_attrs_tx { + __le32 clkid; +}; + +struct rpmi_get_attrs_rx { + __le32 status; + __le32 flags; + __le32 num_rates; + __le32 transition_latency; + char name[RPMI_CLK_NAME_LEN]; +}; + +struct rpmi_get_supp_rates_tx { + __le32 clkid; + __le32 clk_rate_idx; +}; + +struct rpmi_get_supp_rates_rx { + __le32 status; + __le32 flags; + __le32 remaining; + __le32 returned; + __le32 rates[]; +}; + +struct rpmi_get_rate_tx { + __le32 clkid; +}; + +struct rpmi_get_rate_rx { + __le32 status; + __le32 lo; + __le32 hi; +}; + +struct rpmi_set_rate_tx { + __le32 clkid; + __le32 flags; + __le32 lo; + __le32 hi; +}; + +struct rpmi_set_rate_rx { + __le32 status; +}; + +struct rpmi_set_config_tx { + __le32 clkid; + __le32 config; +}; + +struct rpmi_set_config_rx { + __le32 status; +}; + +static inline u64 rpmi_clkrate_u64(u32 __hi, u32 __lo) +{ + return (((u64)(__hi) << 32) | (u32)(__lo)); +} + +static u32 rpmi_clk_get_num_clocks(struct rpmi_clk_context *context) +{ + struct rpmi_get_num_clocks_rx rx, *resp; + struct rpmi_mbox_message msg; + int ret; + + rpmi_mbox_init_send_with_response(&msg, RPMI_CLK_SRV_GET_NUM_CLOCKS, + NULL, 0, &rx, sizeof(rx)); + + ret = rpmi_mbox_send_message(context->chan, &msg); + if (ret) + return 0; + + resp = rpmi_mbox_get_msg_response(&msg); + if (!resp || resp->status) + return 0; + + return le32_to_cpu(resp->num_clocks); +} + +static int rpmi_clk_get_attrs(u32 clkid, struct rpmi_clk *rpmi_clk) +{ + struct rpmi_clk_context *context = rpmi_clk->context; + struct rpmi_mbox_message msg; + struct rpmi_get_attrs_tx tx; + struct rpmi_get_attrs_rx rx, *resp; + u8 format; + int ret; + + tx.clkid = cpu_to_le32(clkid); + rpmi_mbox_init_send_with_response(&msg, RPMI_CLK_SRV_GET_ATTRIBUTES, + &tx, sizeof(tx), &rx, sizeof(rx)); + + ret = rpmi_mbox_send_message(context->chan, &msg); + if (ret) + return ret; + + resp = rpmi_mbox_get_msg_response(&msg); + if (!resp) + return -EINVAL; + if (resp->status) + return rpmi_to_linux_error(le32_to_cpu(resp->status)); + + rpmi_clk->id = clkid; + rpmi_clk->num_rates = le32_to_cpu(resp->num_rates); + rpmi_clk->transition_latency = le32_to_cpu(resp->transition_latency); + strscpy(rpmi_clk->name, resp->name, RPMI_CLK_NAME_LEN); + + format = le32_to_cpu(resp->flags) & RPMI_CLK_TYPE_MASK; + if (format >= RPMI_CLK_TYPE_MAX_IDX) + return -EINVAL; + + rpmi_clk->type = format; + + return 0; +} + +static int rpmi_clk_get_supported_rates(u32 clkid, struct rpmi_clk *rpmi_clk) +{ + struct rpmi_clk_context *context = rpmi_clk->context; + struct rpmi_clk_rate_discrete *rate_discrete; + struct rpmi_clk_rate_linear *rate_linear; + struct rpmi_get_supp_rates_tx tx; + struct rpmi_get_supp_rates_rx *resp; + struct rpmi_mbox_message msg; + size_t clk_rate_idx; + int ret, rateidx, j; + + tx.clkid = cpu_to_le32(clkid); + tx.clk_rate_idx = 0; + + /* + * Make sure we allocate rx buffer sufficient to be accommodate all + * the rates sent in one RPMI message. + */ + struct rpmi_get_supp_rates_rx *rx __free(kfree) = + kzalloc(context->max_msg_data_size, GFP_KERNEL); + if (!rx) + return -ENOMEM; + + rpmi_mbox_init_send_with_response(&msg, RPMI_CLK_SRV_GET_SUPPORTED_RATES, + &tx, sizeof(tx), rx, context->max_msg_data_size); + + ret = rpmi_mbox_send_message(context->chan, &msg); + if (ret) + return ret; + + resp = rpmi_mbox_get_msg_response(&msg); + if (!resp) + return -EINVAL; + if (resp->status) + return rpmi_to_linux_error(le32_to_cpu(resp->status)); + if (!le32_to_cpu(resp->returned)) + return -EINVAL; + + if (rpmi_clk->type == RPMI_CLK_DISCRETE) { + rate_discrete = (struct rpmi_clk_rate_discrete *)resp->rates; + + for (rateidx = 0; rateidx < le32_to_cpu(resp->returned); rateidx++) { + rpmi_clk->rates->discrete[rateidx] = + rpmi_clkrate_u64(le32_to_cpu(rate_discrete[rateidx].hi), + le32_to_cpu(rate_discrete[rateidx].lo)); + } + + /* + * Keep sending the request message until all + * the rates are received. + */ + clk_rate_idx = 0; + while (le32_to_cpu(resp->remaining)) { + clk_rate_idx += le32_to_cpu(resp->returned); + tx.clk_rate_idx = cpu_to_le32(clk_rate_idx); + + rpmi_mbox_init_send_with_response(&msg, + RPMI_CLK_SRV_GET_SUPPORTED_RATES, + &tx, sizeof(tx), + rx, context->max_msg_data_size); + + ret = rpmi_mbox_send_message(context->chan, &msg); + if (ret) + return ret; + + resp = rpmi_mbox_get_msg_response(&msg); + if (!resp) + return -EINVAL; + if (resp->status) + return rpmi_to_linux_error(le32_to_cpu(resp->status)); + if (!le32_to_cpu(resp->returned)) + return -EINVAL; + + for (j = 0; j < le32_to_cpu(resp->returned); j++) { + if (rateidx >= clk_rate_idx + le32_to_cpu(resp->returned)) + break; + rpmi_clk->rates->discrete[rateidx++] = + rpmi_clkrate_u64(le32_to_cpu(rate_discrete[j].hi), + le32_to_cpu(rate_discrete[j].lo)); + } + } + } else if (rpmi_clk->type == RPMI_CLK_LINEAR) { + rate_linear = (struct rpmi_clk_rate_linear *)resp->rates; + + rpmi_clk->rates->linear.min = rpmi_clkrate_u64(le32_to_cpu(rate_linear->min_hi), + le32_to_cpu(rate_linear->min_lo)); + rpmi_clk->rates->linear.max = rpmi_clkrate_u64(le32_to_cpu(rate_linear->max_hi), + le32_to_cpu(rate_linear->max_lo)); + rpmi_clk->rates->linear.step = rpmi_clkrate_u64(le32_to_cpu(rate_linear->step_hi), + le32_to_cpu(rate_linear->step_lo)); + } + + return 0; +} + +static unsigned long rpmi_clk_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct rpmi_clk *rpmi_clk = to_rpmi_clk(hw); + struct rpmi_clk_context *context = rpmi_clk->context; + struct rpmi_mbox_message msg; + struct rpmi_get_rate_tx tx; + struct rpmi_get_rate_rx rx, *resp; + int ret; + + tx.clkid = cpu_to_le32(rpmi_clk->id); + + rpmi_mbox_init_send_with_response(&msg, RPMI_CLK_SRV_GET_RATE, + &tx, sizeof(tx), &rx, sizeof(rx)); + + ret = rpmi_mbox_send_message(context->chan, &msg); + if (ret) + return ret; + + resp = rpmi_mbox_get_msg_response(&msg); + if (!resp) + return -EINVAL; + if (resp->status) + return rpmi_to_linux_error(le32_to_cpu(resp->status)); + + return rpmi_clkrate_u64(le32_to_cpu(resp->hi), le32_to_cpu(resp->lo)); +} + +static int rpmi_clk_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) +{ + struct rpmi_clk *rpmi_clk = to_rpmi_clk(hw); + u64 fmin, fmax, ftmp; + + /* + * Keep the requested rate if the clock format + * is of discrete type. Let the platform which + * is actually controlling the clock handle that. + */ + if (rpmi_clk->type == RPMI_CLK_DISCRETE) + return 0; + + fmin = rpmi_clk->rates->linear.min; + fmax = rpmi_clk->rates->linear.max; + + if (req->rate <= fmin) { + req->rate = fmin; + return 0; + } else if (req->rate >= fmax) { + req->rate = fmax; + return 0; + } + + ftmp = req->rate - fmin; + ftmp += rpmi_clk->rates->linear.step - 1; + do_div(ftmp, rpmi_clk->rates->linear.step); + + req->rate = ftmp * rpmi_clk->rates->linear.step + fmin; + + return 0; +} + +static int rpmi_clk_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + struct rpmi_clk *rpmi_clk = to_rpmi_clk(hw); + struct rpmi_clk_context *context = rpmi_clk->context; + struct rpmi_mbox_message msg; + struct rpmi_set_rate_tx tx; + struct rpmi_set_rate_rx rx, *resp; + int ret; + + tx.clkid = cpu_to_le32(rpmi_clk->id); + tx.lo = cpu_to_le32(lower_32_bits(rate)); + tx.hi = cpu_to_le32(upper_32_bits(rate)); + + rpmi_mbox_init_send_with_response(&msg, RPMI_CLK_SRV_SET_RATE, + &tx, sizeof(tx), &rx, sizeof(rx)); + + ret = rpmi_mbox_send_message(context->chan, &msg); + if (ret) + return ret; + + resp = rpmi_mbox_get_msg_response(&msg); + if (!resp) + return -EINVAL; + if (resp->status) + return rpmi_to_linux_error(le32_to_cpu(resp->status)); + + return 0; +} + +static int rpmi_clk_enable(struct clk_hw *hw) +{ + struct rpmi_clk *rpmi_clk = to_rpmi_clk(hw); + struct rpmi_clk_context *context = rpmi_clk->context; + struct rpmi_mbox_message msg; + struct rpmi_set_config_tx tx; + struct rpmi_set_config_rx rx, *resp; + int ret; + + tx.config = cpu_to_le32(RPMI_CLK_ENABLE); + tx.clkid = cpu_to_le32(rpmi_clk->id); + + rpmi_mbox_init_send_with_response(&msg, RPMI_CLK_SRV_SET_CONFIG, + &tx, sizeof(tx), &rx, sizeof(rx)); + + ret = rpmi_mbox_send_message(context->chan, &msg); + if (ret) + return ret; + + resp = rpmi_mbox_get_msg_response(&msg); + if (!resp) + return -EINVAL; + if (resp->status) + return rpmi_to_linux_error(le32_to_cpu(resp->status)); + + return 0; +} + +static void rpmi_clk_disable(struct clk_hw *hw) +{ + struct rpmi_clk *rpmi_clk = to_rpmi_clk(hw); + struct rpmi_clk_context *context = rpmi_clk->context; + struct rpmi_mbox_message msg; + struct rpmi_set_config_tx tx; + struct rpmi_set_config_rx rx; + + tx.config = cpu_to_le32(RPMI_CLK_DISABLE); + tx.clkid = cpu_to_le32(rpmi_clk->id); + + rpmi_mbox_init_send_with_response(&msg, RPMI_CLK_SRV_SET_CONFIG, + &tx, sizeof(tx), &rx, sizeof(rx)); + + rpmi_mbox_send_message(context->chan, &msg); +} + +static const struct clk_ops rpmi_clk_ops = { + .recalc_rate = rpmi_clk_recalc_rate, + .determine_rate = rpmi_clk_determine_rate, + .set_rate = rpmi_clk_set_rate, + .prepare = rpmi_clk_enable, + .unprepare = rpmi_clk_disable, +}; + +static struct clk_hw *rpmi_clk_enumerate(struct rpmi_clk_context *context, u32 clkid) +{ + struct device *dev = context->dev; + unsigned long min_rate, max_rate; + union rpmi_clk_rates *rates; + struct rpmi_clk *rpmi_clk; + struct clk_init_data init = {}; + struct clk_hw *clk_hw; + int ret; + + rates = devm_kzalloc(dev, sizeof(*rates), GFP_KERNEL); + if (!rates) + return ERR_PTR(-ENOMEM); + + rpmi_clk = devm_kzalloc(dev, sizeof(*rpmi_clk), GFP_KERNEL); + if (!rpmi_clk) + return ERR_PTR(-ENOMEM); + + rpmi_clk->context = context; + rpmi_clk->rates = rates; + + ret = rpmi_clk_get_attrs(clkid, rpmi_clk); + if (ret) + return dev_err_ptr_probe(dev, ret, + "Failed to get clk-%u attributes\n", + clkid); + + ret = rpmi_clk_get_supported_rates(clkid, rpmi_clk); + if (ret) + return dev_err_ptr_probe(dev, ret, + "Get supported rates failed for clk-%u\n", + clkid); + + init.flags = CLK_GET_RATE_NOCACHE; + init.num_parents = 0; + init.ops = &rpmi_clk_ops; + init.name = rpmi_clk->name; + clk_hw = &rpmi_clk->hw; + clk_hw->init = &init; + + ret = devm_clk_hw_register(dev, clk_hw); + if (ret) + return dev_err_ptr_probe(dev, ret, + "Unable to register clk-%u\n", + clkid); + + if (rpmi_clk->type == RPMI_CLK_DISCRETE) { + min_rate = rpmi_clk->rates->discrete[0]; + max_rate = rpmi_clk->rates->discrete[rpmi_clk->num_rates - 1]; + } else { + min_rate = rpmi_clk->rates->linear.min; + max_rate = rpmi_clk->rates->linear.max; + } + + clk_hw_set_rate_range(clk_hw, min_rate, max_rate); + + return clk_hw; +} + +static void rpmi_clk_mbox_chan_release(void *data) +{ + struct mbox_chan *chan = data; + + mbox_free_channel(chan); +} + +static int rpmi_clk_probe(struct platform_device *pdev) +{ + int ret; + unsigned int num_clocks, i; + struct clk_hw_onecell_data *clk_data; + struct rpmi_clk_context *context; + struct rpmi_mbox_message msg; + struct clk_hw *hw_ptr; + struct device *dev = &pdev->dev; + + context = devm_kzalloc(dev, sizeof(*context), GFP_KERNEL); + if (!context) + return -ENOMEM; + context->dev = dev; + platform_set_drvdata(pdev, context); + + context->client.dev = context->dev; + context->client.rx_callback = NULL; + context->client.tx_block = false; + context->client.knows_txdone = true; + context->client.tx_tout = 0; + + context->chan = mbox_request_channel(&context->client, 0); + if (IS_ERR(context->chan)) + return PTR_ERR(context->chan); + + ret = devm_add_action_or_reset(dev, rpmi_clk_mbox_chan_release, context->chan); + if (ret) + return dev_err_probe(dev, ret, "Failed to add rpmi mbox channel cleanup\n"); + + rpmi_mbox_init_get_attribute(&msg, RPMI_MBOX_ATTR_SPEC_VERSION); + ret = rpmi_mbox_send_message(context->chan, &msg); + if (ret) + return dev_err_probe(dev, ret, "Failed to get spec version\n"); + if (msg.attr.value < RPMI_MKVER(1, 0)) { + return dev_err_probe(dev, -EINVAL, + "msg protocol version mismatch, expected 0x%x, found 0x%x\n", + RPMI_MKVER(1, 0), msg.attr.value); + } + + rpmi_mbox_init_get_attribute(&msg, RPMI_MBOX_ATTR_SERVICEGROUP_ID); + ret = rpmi_mbox_send_message(context->chan, &msg); + if (ret) + return dev_err_probe(dev, ret, "Failed to get service group ID\n"); + if (msg.attr.value != RPMI_SRVGRP_CLOCK) { + return dev_err_probe(dev, -EINVAL, + "service group match failed, expected 0x%x, found 0x%x\n", + RPMI_SRVGRP_CLOCK, msg.attr.value); + } + + rpmi_mbox_init_get_attribute(&msg, RPMI_MBOX_ATTR_SERVICEGROUP_VERSION); + ret = rpmi_mbox_send_message(context->chan, &msg); + if (ret) + return dev_err_probe(dev, ret, "Failed to get service group version\n"); + if (msg.attr.value < RPMI_MKVER(1, 0)) { + return dev_err_probe(dev, -EINVAL, + "service group version failed, expected 0x%x, found 0x%x\n", + RPMI_MKVER(1, 0), msg.attr.value); + } + + rpmi_mbox_init_get_attribute(&msg, RPMI_MBOX_ATTR_MAX_MSG_DATA_SIZE); + ret = rpmi_mbox_send_message(context->chan, &msg); + if (ret) + return dev_err_probe(dev, ret, "Failed to get max message data size\n"); + + context->max_msg_data_size = msg.attr.value; + num_clocks = rpmi_clk_get_num_clocks(context); + if (!num_clocks) + return dev_err_probe(dev, -ENODEV, "No clocks found\n"); + + clk_data = devm_kzalloc(dev, struct_size(clk_data, hws, num_clocks), + GFP_KERNEL); + if (!clk_data) + return dev_err_probe(dev, -ENOMEM, "No memory for clock data\n"); + clk_data->num = num_clocks; + + for (i = 0; i < clk_data->num; i++) { + hw_ptr = rpmi_clk_enumerate(context, i); + if (IS_ERR(hw_ptr)) { + return dev_err_probe(dev, PTR_ERR(hw_ptr), + "Failed to register clk-%d\n", i); + } + clk_data->hws[i] = hw_ptr; + } + + ret = devm_of_clk_add_hw_provider(dev, of_clk_hw_onecell_get, clk_data); + if (ret) + return dev_err_probe(dev, ret, "Failed to register clock HW provider\n"); + + return 0; +} + +static const struct of_device_id rpmi_clk_of_match[] = { + { .compatible = "riscv,rpmi-clock" }, + { } +}; +MODULE_DEVICE_TABLE(of, rpmi_clk_of_match); + +static struct platform_driver rpmi_clk_driver = { + .driver = { + .name = "riscv-rpmi-clock", + .of_match_table = rpmi_clk_of_match, + }, + .probe = rpmi_clk_probe, +}; +module_platform_driver(rpmi_clk_driver); + +MODULE_AUTHOR("Rahul Pathak "); +MODULE_DESCRIPTION("Clock Driver based on RPMI message protocol"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/mailbox/riscv-rpmi-message.h b/include/linux/mailbox/riscv-rpmi-message.h index c3a98fc12c0a..8176d33747fe 100644 --- a/include/linux/mailbox/riscv-rpmi-message.h +++ b/include/linux/mailbox/riscv-rpmi-message.h @@ -90,6 +90,22 @@ static inline int rpmi_to_linux_error(int rpmi_error) } } +/* RPMI service group IDs */ +#define RPMI_SRVGRP_CLOCK 0x00008 + +/* RPMI clock service IDs */ +enum rpmi_clock_service_id { + RPMI_CLK_SRV_ENABLE_NOTIFICATION = 0x01, + RPMI_CLK_SRV_GET_NUM_CLOCKS = 0x02, + RPMI_CLK_SRV_GET_ATTRIBUTES = 0x03, + RPMI_CLK_SRV_GET_SUPPORTED_RATES = 0x04, + RPMI_CLK_SRV_SET_CONFIG = 0x05, + RPMI_CLK_SRV_GET_CONFIG = 0x06, + RPMI_CLK_SRV_SET_RATE = 0x07, + RPMI_CLK_SRV_GET_RATE = 0x08, + RPMI_CLK_SRV_ID_MAX_COUNT +}; + /* RPMI Linux mailbox attribute IDs */ enum rpmi_mbox_attribute_id { RPMI_MBOX_ATTR_SPEC_VERSION, -- cgit v1.2.3 From aa43953e862c031ff66e44353c88beb7a449e80d Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 18 Aug 2025 09:39:09 +0530 Subject: irqchip: Add driver for the RPMI system MSI service group The RPMI specification defines a system MSI service group which allows application processors to receive MSIs upon system events such as graceful shutdown/reboot request, CPU hotplug event, memory hotplug event, etc. Add an irqchip driver for the RISC-V RPMI system MSI service group to directly receive system MSIs in Linux kernel. Reviewed-by: Thomas Gleixner Signed-off-by: Anup Patel Link: https://lore.kernel.org/r/20250818040920.272664-14-apatel@ventanamicro.com Signed-off-by: Paul Walmsley --- drivers/irqchip/Kconfig | 7 + drivers/irqchip/Makefile | 1 + drivers/irqchip/irq-riscv-rpmi-sysmsi.c | 287 +++++++++++++++++++++++++++++ include/linux/mailbox/riscv-rpmi-message.h | 13 ++ 4 files changed, 308 insertions(+) create mode 100644 drivers/irqchip/irq-riscv-rpmi-sysmsi.c (limited to 'include/linux') diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 6d12c6ab9ea4..e047ba36df16 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -634,6 +634,13 @@ config RISCV_IMSIC select GENERIC_MSI_IRQ select IRQ_MSI_LIB +config RISCV_RPMI_SYSMSI + bool + depends on MAILBOX + select IRQ_DOMAIN_HIERARCHY + select GENERIC_MSI_IRQ + default RISCV + config SIFIVE_PLIC bool depends on RISCV diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index 93e3ced023bb..3de083f5484c 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -106,6 +106,7 @@ obj-$(CONFIG_RISCV_INTC) += irq-riscv-intc.o obj-$(CONFIG_RISCV_APLIC) += irq-riscv-aplic-main.o irq-riscv-aplic-direct.o obj-$(CONFIG_RISCV_APLIC_MSI) += irq-riscv-aplic-msi.o obj-$(CONFIG_RISCV_IMSIC) += irq-riscv-imsic-state.o irq-riscv-imsic-early.o irq-riscv-imsic-platform.o +obj-$(CONFIG_RISCV_RPMI_SYSMSI) += irq-riscv-rpmi-sysmsi.o obj-$(CONFIG_SIFIVE_PLIC) += irq-sifive-plic.o obj-$(CONFIG_STARFIVE_JH8100_INTC) += irq-starfive-jh8100-intc.o obj-$(CONFIG_ACLINT_SSWI) += irq-aclint-sswi.o diff --git a/drivers/irqchip/irq-riscv-rpmi-sysmsi.c b/drivers/irqchip/irq-riscv-rpmi-sysmsi.c new file mode 100644 index 000000000000..92e8847dfccc --- /dev/null +++ b/drivers/irqchip/irq-riscv-rpmi-sysmsi.c @@ -0,0 +1,287 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2025 Ventana Micro Systems Inc. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct rpmi_sysmsi_get_attrs_rx { + __le32 status; + __le32 sys_num_msi; + __le32 flag0; + __le32 flag1; +}; + +#define RPMI_SYSMSI_MSI_ATTRIBUTES_FLAG0_PREF_PRIV BIT(0) + +struct rpmi_sysmsi_set_msi_state_tx { + __le32 sys_msi_index; + __le32 sys_msi_state; +}; + +struct rpmi_sysmsi_set_msi_state_rx { + __le32 status; +}; + +#define RPMI_SYSMSI_MSI_STATE_ENABLE BIT(0) +#define RPMI_SYSMSI_MSI_STATE_PENDING BIT(1) + +struct rpmi_sysmsi_set_msi_target_tx { + __le32 sys_msi_index; + __le32 sys_msi_address_low; + __le32 sys_msi_address_high; + __le32 sys_msi_data; +}; + +struct rpmi_sysmsi_set_msi_target_rx { + __le32 status; +}; + +struct rpmi_sysmsi_priv { + struct device *dev; + struct mbox_client client; + struct mbox_chan *chan; + u32 nr_irqs; + u32 gsi_base; +}; + +static int rpmi_sysmsi_get_num_msi(struct rpmi_sysmsi_priv *priv) +{ + struct rpmi_sysmsi_get_attrs_rx rx; + struct rpmi_mbox_message msg; + int ret; + + rpmi_mbox_init_send_with_response(&msg, RPMI_SYSMSI_SRV_GET_ATTRIBUTES, + NULL, 0, &rx, sizeof(rx)); + ret = rpmi_mbox_send_message(priv->chan, &msg); + if (ret) + return ret; + if (rx.status) + return rpmi_to_linux_error(le32_to_cpu(rx.status)); + + return le32_to_cpu(rx.sys_num_msi); +} + +static int rpmi_sysmsi_set_msi_state(struct rpmi_sysmsi_priv *priv, + u32 sys_msi_index, u32 sys_msi_state) +{ + struct rpmi_sysmsi_set_msi_state_tx tx; + struct rpmi_sysmsi_set_msi_state_rx rx; + struct rpmi_mbox_message msg; + int ret; + + tx.sys_msi_index = cpu_to_le32(sys_msi_index); + tx.sys_msi_state = cpu_to_le32(sys_msi_state); + rpmi_mbox_init_send_with_response(&msg, RPMI_SYSMSI_SRV_SET_MSI_STATE, + &tx, sizeof(tx), &rx, sizeof(rx)); + ret = rpmi_mbox_send_message(priv->chan, &msg); + if (ret) + return ret; + if (rx.status) + return rpmi_to_linux_error(le32_to_cpu(rx.status)); + + return 0; +} + +static int rpmi_sysmsi_set_msi_target(struct rpmi_sysmsi_priv *priv, + u32 sys_msi_index, struct msi_msg *m) +{ + struct rpmi_sysmsi_set_msi_target_tx tx; + struct rpmi_sysmsi_set_msi_target_rx rx; + struct rpmi_mbox_message msg; + int ret; + + tx.sys_msi_index = cpu_to_le32(sys_msi_index); + tx.sys_msi_address_low = cpu_to_le32(m->address_lo); + tx.sys_msi_address_high = cpu_to_le32(m->address_hi); + tx.sys_msi_data = cpu_to_le32(m->data); + rpmi_mbox_init_send_with_response(&msg, RPMI_SYSMSI_SRV_SET_MSI_TARGET, + &tx, sizeof(tx), &rx, sizeof(rx)); + ret = rpmi_mbox_send_message(priv->chan, &msg); + if (ret) + return ret; + if (rx.status) + return rpmi_to_linux_error(le32_to_cpu(rx.status)); + + return 0; +} + +static void rpmi_sysmsi_irq_mask(struct irq_data *d) +{ + struct rpmi_sysmsi_priv *priv = irq_data_get_irq_chip_data(d); + irq_hw_number_t hwirq = irqd_to_hwirq(d); + int ret; + + ret = rpmi_sysmsi_set_msi_state(priv, hwirq, 0); + if (ret) + dev_warn(priv->dev, "Failed to mask hwirq %lu (error %d)\n", hwirq, ret); + irq_chip_mask_parent(d); +} + +static void rpmi_sysmsi_irq_unmask(struct irq_data *d) +{ + struct rpmi_sysmsi_priv *priv = irq_data_get_irq_chip_data(d); + irq_hw_number_t hwirq = irqd_to_hwirq(d); + int ret; + + irq_chip_unmask_parent(d); + ret = rpmi_sysmsi_set_msi_state(priv, hwirq, RPMI_SYSMSI_MSI_STATE_ENABLE); + if (ret) + dev_warn(priv->dev, "Failed to unmask hwirq %lu (error %d)\n", hwirq, ret); +} + +static void rpmi_sysmsi_write_msg(struct irq_data *d, struct msi_msg *msg) +{ + struct rpmi_sysmsi_priv *priv = irq_data_get_irq_chip_data(d); + irq_hw_number_t hwirq = irqd_to_hwirq(d); + int ret; + + /* For zeroed MSI, do nothing as of now */ + if (!msg->address_hi && !msg->address_lo && !msg->data) + return; + + ret = rpmi_sysmsi_set_msi_target(priv, hwirq, msg); + if (ret) + dev_warn(priv->dev, "Failed to set target for hwirq %lu (error %d)\n", hwirq, ret); +} + +static void rpmi_sysmsi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc) +{ + arg->desc = desc; + arg->hwirq = desc->data.icookie.value; +} + +static int rpmi_sysmsi_translate(struct irq_domain *d, struct irq_fwspec *fwspec, + unsigned long *hwirq, unsigned int *type) +{ + struct msi_domain_info *info = d->host_data; + struct rpmi_sysmsi_priv *priv = info->data; + + if (WARN_ON(fwspec->param_count < 1)) + return -EINVAL; + + /* For DT, gsi_base is always zero. */ + *hwirq = fwspec->param[0] - priv->gsi_base; + *type = IRQ_TYPE_NONE; + return 0; +} + +static const struct msi_domain_template rpmi_sysmsi_template = { + .chip = { + .name = "RPMI-SYSMSI", + .irq_mask = rpmi_sysmsi_irq_mask, + .irq_unmask = rpmi_sysmsi_irq_unmask, +#ifdef CONFIG_SMP + .irq_set_affinity = irq_chip_set_affinity_parent, +#endif + .irq_write_msi_msg = rpmi_sysmsi_write_msg, + .flags = IRQCHIP_SET_TYPE_MASKED | + IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_MASK_ON_SUSPEND, + }, + + .ops = { + .set_desc = rpmi_sysmsi_set_desc, + .msi_translate = rpmi_sysmsi_translate, + }, + + .info = { + .bus_token = DOMAIN_BUS_WIRED_TO_MSI, + .flags = MSI_FLAG_USE_DEV_FWNODE, + .handler = handle_simple_irq, + .handler_name = "simple", + }, +}; + +static int rpmi_sysmsi_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct rpmi_sysmsi_priv *priv; + int rc; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + priv->dev = dev; + + /* Setup mailbox client */ + priv->client.dev = priv->dev; + priv->client.rx_callback = NULL; + priv->client.tx_block = false; + priv->client.knows_txdone = true; + priv->client.tx_tout = 0; + + /* Request mailbox channel */ + priv->chan = mbox_request_channel(&priv->client, 0); + if (IS_ERR(priv->chan)) + return PTR_ERR(priv->chan); + + /* Get number of system MSIs */ + rc = rpmi_sysmsi_get_num_msi(priv); + if (rc < 1) { + mbox_free_channel(priv->chan); + if (rc) + return dev_err_probe(dev, rc, "Failed to get number of system MSIs\n"); + else + return dev_err_probe(dev, -ENODEV, "No system MSIs found\n"); + } + priv->nr_irqs = rc; + + /* + * The device MSI domain for platform devices on RISC-V architecture + * is only available after the MSI controller driver is probed so, + * explicitly configure here. + */ + if (!dev_get_msi_domain(dev)) { + /* + * The device MSI domain for OF devices is only set at the + * time of populating/creating OF device. If the device MSI + * domain is discovered later after the OF device is created + * then we need to set it explicitly before using any platform + * MSI functions. + */ + if (dev_of_node(dev)) + of_msi_configure(dev, dev_of_node(dev)); + + if (!dev_get_msi_domain(dev)) { + mbox_free_channel(priv->chan); + return -EPROBE_DEFER; + } + } + + if (!msi_create_device_irq_domain(dev, MSI_DEFAULT_DOMAIN, + &rpmi_sysmsi_template, + priv->nr_irqs, priv, priv)) { + mbox_free_channel(priv->chan); + return dev_err_probe(dev, -ENOMEM, "failed to create MSI irq domain\n"); + } + + dev_info(dev, "%u system MSIs registered\n", priv->nr_irqs); + return 0; +} + +static const struct of_device_id rpmi_sysmsi_match[] = { + { .compatible = "riscv,rpmi-system-msi" }, + {} +}; + +static struct platform_driver rpmi_sysmsi_driver = { + .driver = { + .name = "rpmi-sysmsi", + .of_match_table = rpmi_sysmsi_match, + }, + .probe = rpmi_sysmsi_probe, +}; +builtin_platform_driver(rpmi_sysmsi_driver); diff --git a/include/linux/mailbox/riscv-rpmi-message.h b/include/linux/mailbox/riscv-rpmi-message.h index 8176d33747fe..e135c6564d0c 100644 --- a/include/linux/mailbox/riscv-rpmi-message.h +++ b/include/linux/mailbox/riscv-rpmi-message.h @@ -91,6 +91,7 @@ static inline int rpmi_to_linux_error(int rpmi_error) } /* RPMI service group IDs */ +#define RPMI_SRVGRP_SYSTEM_MSI 0x00002 #define RPMI_SRVGRP_CLOCK 0x00008 /* RPMI clock service IDs */ @@ -106,6 +107,18 @@ enum rpmi_clock_service_id { RPMI_CLK_SRV_ID_MAX_COUNT }; +/* RPMI system MSI service IDs */ +enum rpmi_sysmsi_service_id { + RPMI_SYSMSI_SRV_ENABLE_NOTIFICATION = 0x01, + RPMI_SYSMSI_SRV_GET_ATTRIBUTES = 0x02, + RPMI_SYSMSI_SRV_GET_MSI_ATTRIBUTES = 0x03, + RPMI_SYSMSI_SRV_SET_MSI_STATE = 0x04, + RPMI_SYSMSI_SRV_GET_MSI_STATE = 0x05, + RPMI_SYSMSI_SRV_SET_MSI_TARGET = 0x06, + RPMI_SYSMSI_SRV_GET_MSI_TARGET = 0x07, + RPMI_SYSMSI_SRV_ID_MAX_COUNT +}; + /* RPMI Linux mailbox attribute IDs */ enum rpmi_mbox_attribute_id { RPMI_MBOX_ATTR_SPEC_VERSION, -- cgit v1.2.3 From fed7eaa4f037361fe4f3d4170649d6849a25998d Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Thu, 25 Sep 2025 12:42:16 -0700 Subject: PM: runtime: Update kerneldoc return codes APIs based on __pm_runtime_idle() (pm_runtime_idle(), pm_request_idle()) do not return 1 when already suspended. They return -EAGAIN. This is already covered in the docs, so the entry for "1" is redundant and conflicting. (pm_runtime_put() and pm_runtime_put_sync() were previously incorrect, but that's fixed in "PM: runtime: pm_runtime_put{,_sync}() returns 1 when already suspended", to ensure consistency with APIs like pm_runtime_put_autosuspend().) RPM_GET_PUT APIs based on __pm_runtime_suspend() do return 1 when already suspended, but the language is a little unclear -- it's not really an "error", so it seems better to list as a clarification before the 0/success case. Additionally, they only actually return 1 when the refcount makes it to 0; if the usage counter is still non-zero, we return 0. pm_runtime_put(), etc., also don't appear at first like they can ever see "-EAGAIN: Runtime PM usage_count non-zero", because in non-racy conditions, pm_runtime_put() would drop its reference count, see it's non-zero, and return early (in __pm_runtime_idle()). However, it's possible to race with another actor that increments the usage_count afterward, since rpm_idle() is protected by a separate lock; in such a case, we may see -EAGAIN. Because this case is only seen in the presence of concurrent actors, it makes sense to clarify that this is when "usage_count **became** non-zero", by way of some racing actor. Lastly, pm_runtime_put_sync_suspend() duplicated some -EAGAIN language. Fix that. Fixes: 271ff96d6066 ("PM: runtime: Document return values of suspend-related API functions") Link: https://lore.kernel.org/linux-pm/aJ5pkEJuixTaybV4@google.com/ Signed-off-by: Brian Norris Reviewed-by: Sakari Ailus Cc: 6.17+ # 6.17+ Signed-off-by: Rafael J. Wysocki --- include/linux/pm_runtime.h | 56 +++++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index d88d6b6ccf5b..d1ff76e0e2d0 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -350,13 +350,12 @@ static inline int pm_runtime_force_resume(struct device *dev) { return -ENXIO; } * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. - * * -EAGAIN: Runtime PM usage_count non-zero, Runtime PM status change ongoing - * or device not in %RPM_ACTIVE state. + * * -EAGAIN: Runtime PM usage counter non-zero, Runtime PM status change + * ongoing or device not in %RPM_ACTIVE state. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -EINPROGRESS: Suspend already in progress. * * -ENOSYS: CONFIG_PM not enabled. - * * 1: Device already suspended. * Other values and conditions for the above values are possible as returned by * Runtime PM idle and suspend callbacks. */ @@ -370,14 +369,15 @@ static inline int pm_runtime_idle(struct device *dev) * @dev: Target device. * * Return: + * * 1: Success; device was already suspended. * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. - * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EAGAIN: Runtime PM usage counter non-zero or Runtime PM status change + * ongoing. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -ENOSYS: CONFIG_PM not enabled. - * * 1: Device already suspended. * Other values and conditions for the above values are possible as returned by * Runtime PM suspend callbacks. */ @@ -396,14 +396,15 @@ static inline int pm_runtime_suspend(struct device *dev) * engaging its "idle check" callback. * * Return: + * * 1: Success; device was already suspended. * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. - * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EAGAIN: Runtime PM usage counter non-zero or Runtime PM status change + * ongoing. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -ENOSYS: CONFIG_PM not enabled. - * * 1: Device already suspended. * Other values and conditions for the above values are possible as returned by * Runtime PM suspend callbacks. */ @@ -433,13 +434,12 @@ static inline int pm_runtime_resume(struct device *dev) * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. - * * -EAGAIN: Runtime PM usage_count non-zero, Runtime PM status change ongoing - * or device not in %RPM_ACTIVE state. + * * -EAGAIN: Runtime PM usage counter non-zero, Runtime PM status change + * ongoing or device not in %RPM_ACTIVE state. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -EINPROGRESS: Suspend already in progress. * * -ENOSYS: CONFIG_PM not enabled. - * * 1: Device already suspended. */ static inline int pm_request_idle(struct device *dev) { @@ -464,15 +464,16 @@ static inline int pm_request_resume(struct device *dev) * equivalent pm_runtime_autosuspend() for @dev asynchronously. * * Return: + * * 1: Success; device was already suspended. * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. - * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EAGAIN: Runtime PM usage counter non-zero or Runtime PM status change + * ongoing. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -EINPROGRESS: Suspend already in progress. * * -ENOSYS: CONFIG_PM not enabled. - * * 1: Device already suspended. */ static inline int pm_request_autosuspend(struct device *dev) { @@ -540,15 +541,16 @@ static inline int pm_runtime_resume_and_get(struct device *dev) * equal to 0, queue up a work item for @dev like in pm_request_idle(). * * Return: + * * 1: Success. Usage counter dropped to zero, but device was already suspended. * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. - * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status + * change ongoing. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -EINPROGRESS: Suspend already in progress. * * -ENOSYS: CONFIG_PM not enabled. - * * 1: Device already suspended. */ static inline int pm_runtime_put(struct device *dev) { @@ -565,15 +567,16 @@ DEFINE_FREE(pm_runtime_put, struct device *, if (_T) pm_runtime_put(_T)) * equal to 0, queue up a work item for @dev like in pm_request_autosuspend(). * * Return: + * * 1: Success. Usage counter dropped to zero, but device was already suspended. * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. - * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status + * change ongoing. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -EINPROGRESS: Suspend already in progress. * * -ENOSYS: CONFIG_PM not enabled. - * * 1: Device already suspended. */ static inline int __pm_runtime_put_autosuspend(struct device *dev) { @@ -590,15 +593,16 @@ static inline int __pm_runtime_put_autosuspend(struct device *dev) * in pm_request_autosuspend(). * * Return: + * * 1: Success. Usage counter dropped to zero, but device was already suspended. * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. - * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status + * change ongoing. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -EINPROGRESS: Suspend already in progress. * * -ENOSYS: CONFIG_PM not enabled. - * * 1: Device already suspended. */ static inline int pm_runtime_put_autosuspend(struct device *dev) { @@ -619,14 +623,15 @@ static inline int pm_runtime_put_autosuspend(struct device *dev) * if it returns an error code. * * Return: + * * 1: Success. Usage counter dropped to zero, but device was already suspended. * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. - * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status + * change ongoing. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -ENOSYS: CONFIG_PM not enabled. - * * 1: Device already suspended. * Other values and conditions for the above values are possible as returned by * Runtime PM suspend callbacks. */ @@ -646,15 +651,15 @@ static inline int pm_runtime_put_sync(struct device *dev) * if it returns an error code. * * Return: + * * 1: Success. Usage counter dropped to zero, but device was already suspended. * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. - * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. - * * -EAGAIN: usage_count non-zero or Runtime PM status change ongoing. + * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status + * change ongoing. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -ENOSYS: CONFIG_PM not enabled. - * * 1: Device already suspended. * Other values and conditions for the above values are possible as returned by * Runtime PM suspend callbacks. */ @@ -677,15 +682,16 @@ static inline int pm_runtime_put_sync_suspend(struct device *dev) * if it returns an error code. * * Return: + * * 1: Success. Usage counter dropped to zero, but device was already suspended. * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. - * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status + * change ongoing. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -EINPROGRESS: Suspend already in progress. * * -ENOSYS: CONFIG_PM not enabled. - * * 1: Device already suspended. * Other values and conditions for the above values are possible as returned by * Runtime PM suspend callbacks. */ -- cgit v1.2.3 From 9a0abc39450a3123fd52533a662fbd37e0d1508c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 26 Sep 2025 17:47:14 +0200 Subject: PM: runtime: Add auto-cleanup macros for "resume and get" operations It is generally useful to be able to automatically drop a device's runtime PM usage counter incremented by runtime PM operations that resume a device and bump up its usage counter [1]. To that end, add guard definition macros allowing pm_runtime_put() and pm_runtime_put_autosuspend() to be used for the auto-cleanup in those cases. Simply put, a piece of code like below: pm_runtime_get_sync(dev); ..... pm_runtime_put(dev); return 0; can be transformed with guard() like: guard(pm_runtime_active)(dev); ..... return 0; (see the pm_runtime_put() call is gone). However, it is better to do proper error handling in the majority of cases, so doing something like this instead of the above is recommended: ACQUIRE(pm_runtime_active_try, pm)(dev); if (ACQUIRE_ERR(pm_runtime_active_try, &pm)) return -ENXIO; ..... return 0; In all of the cases in which runtime PM is known to be enabled for the given device or the device can be regarded as operational (and so it can be accessed) with runtime PM disabled, a piece of code like: ret = pm_runtime_resume_and_get(dev); if (ret < 0) return ret; ..... pm_runtime_put(dev); return 0; can be changed as follows: ACQUIRE(pm_runtime_active_try, pm)(dev); ret = ACQUIRE_ERR(pm_runtime_active_try, &pm); if (ret < 0) return ret; ..... return 0; (again, see the pm_runtime_put() call is gone). Still, if the device cannot be accessed unless runtime PM has been enabled for it, the pm_runtime_active_try_enabled guard variant needs to be used, that is (in the context of the example above): ACQUIRE(pm_runtime_active_try_enabled, pm)(dev); ret = ACQUIRE_ERR(pm_runtime_active_try_enabled, &pm); if (ret < 0) return ret; ..... return 0; When the original code calls pm_runtime_put_autosuspend(), use one of the "auto" guard variants, pm_runtime_active_auto/_try/_enabled, so for example, a piece of code like: ret = pm_runtime_resume_and_get(dev); if (ret < 0) return ret; ..... pm_runtime_put_autosuspend(dev); return 0; will become: ACQUIRE(pm_runtime_active_auto_try_enabled, pm)(dev); ret = ACQUIRE_ERR(pm_runtime_active_auto_try_enabled, &pm); if (ret < 0) return ret; ..... return 0; Note that the cases in which the return value of pm_runtime_get_sync() is checked can also be handled with the help of the new guard macros. For example, a piece of code like: ret = pm_runtime_get_sync(dev); if (ret < 0) { pm_runtime_put(dev); return ret; } ..... pm_runtime_put(dev); return 0; can be rewritten as: ACQUIRE(pm_runtime_active_auto_try_enabled, pm)(dev); ret = ACQUIRE_ERR(pm_runtime_active_auto_try_enabled, &pm); if (ret < 0) return ret; ..... return 0; or pm_runtime_get_active_try can be used if transparent handling of disabled runtime PM is desirable. Link: https://lore.kernel.org/linux-pm/878qimv24u.wl-tiwai@suse.de/ [1] Link: https://lore.kernel.org/linux-pm/20250926150613.000073a4@huawei.com/ Signed-off-by: Rafael J. Wysocki Acked-by: Dan Williams Reviewed-by: Takashi Iwai Link: https://patch.msgid.link/2238241.irdbgypaU6@rafael.j.wysocki [ rjw: Fixed leftovers from the previous version in the changelog ] Reviewed-by: Jonathan Cameron Reviewed-by: Dhruva Gole Signed-off-by: Rafael J. Wysocki --- drivers/base/power/runtime.c | 2 ++ include/linux/pm_runtime.h | 44 +++++++++++++++++++++++++++++++++++--------- 2 files changed, 37 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index faa68bf9ef3d..0fc825066ede 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -799,6 +799,8 @@ static int rpm_resume(struct device *dev, int rpmflags) if (dev->power.runtime_status == RPM_ACTIVE && dev->power.last_status == RPM_ACTIVE) retval = 1; + else if (rpmflags & RPM_TRANSPARENT) + goto out; else retval = -EACCES; } diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index d1ff76e0e2d0..e5426bdd0c9f 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -21,6 +21,7 @@ #define RPM_GET_PUT 0x04 /* Increment/decrement the usage_count */ #define RPM_AUTO 0x08 /* Use autosuspend_delay */ +#define RPM_TRANSPARENT 0x10 /* Succeed if runtime PM is disabled */ /* * Use this for defining a set of PM operations to be used in all situations @@ -512,6 +513,19 @@ static inline int pm_runtime_get_sync(struct device *dev) return __pm_runtime_resume(dev, RPM_GET_PUT); } +static inline int pm_runtime_get_active(struct device *dev, int rpmflags) +{ + int ret; + + ret = __pm_runtime_resume(dev, RPM_GET_PUT | rpmflags); + if (ret < 0) { + pm_runtime_put_noidle(dev); + return ret; + } + + return 0; +} + /** * pm_runtime_resume_and_get - Bump up usage counter of a device and resume it. * @dev: Target device. @@ -522,15 +536,7 @@ static inline int pm_runtime_get_sync(struct device *dev) */ static inline int pm_runtime_resume_and_get(struct device *dev) { - int ret; - - ret = __pm_runtime_resume(dev, RPM_GET_PUT); - if (ret < 0) { - pm_runtime_put_noidle(dev); - return ret; - } - - return 0; + return pm_runtime_get_active(dev, 0); } /** @@ -610,6 +616,26 @@ static inline int pm_runtime_put_autosuspend(struct device *dev) return __pm_runtime_put_autosuspend(dev); } +DEFINE_GUARD(pm_runtime_active, struct device *, + pm_runtime_get_sync(_T), pm_runtime_put(_T)); +DEFINE_GUARD(pm_runtime_active_auto, struct device *, + pm_runtime_get_sync(_T), pm_runtime_put_autosuspend(_T)); +/* + * Use the following guards with ACQUIRE()/ACQUIRE_ERR(). + * + * The difference between the "_try" and "_try_enabled" variants is that the + * former do not produce an error when runtime PM is disabled for the given + * device. + */ +DEFINE_GUARD_COND(pm_runtime_active, _try, + pm_runtime_get_active(_T, RPM_TRANSPARENT)) +DEFINE_GUARD_COND(pm_runtime_active, _try_enabled, + pm_runtime_resume_and_get(_T)) +DEFINE_GUARD_COND(pm_runtime_active_auto, _try, + pm_runtime_get_active(_T, RPM_TRANSPARENT)) +DEFINE_GUARD_COND(pm_runtime_active_auto, _try_enabled, + pm_runtime_resume_and_get(_T)) + /** * pm_runtime_put_sync - Drop device usage counter and run "idle check" if 0. * @dev: Target device. -- cgit v1.2.3 From d5e58ce1fb0f13a9a0845851f267ede3551cd9fe Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 26 Sep 2025 18:26:40 +0200 Subject: PM: runtime: Drop DEFINE_FREE() for pm_runtime_put() The DEFINE_FREE() for pm_runtime_put has been superseded by recently introduced runtime PM auto-cleanup macros and its only user has been converted to using one of the new macros, so drop it. Signed-off-by: Rafael J. Wysocki Reviewed-by: Dhruva Gole Reviewed-by: Takashi Iwai Reviewed-by: Jonathan Cameron --- include/linux/pm_runtime.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index e5426bdd0c9f..edb8aed5ef62 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -563,8 +563,6 @@ static inline int pm_runtime_put(struct device *dev) return __pm_runtime_idle(dev, RPM_GET_PUT | RPM_ASYNC); } -DEFINE_FREE(pm_runtime_put, struct device *, if (_T) pm_runtime_put(_T)) - /** * __pm_runtime_put_autosuspend - Drop device usage counter and queue autosuspend if 0. * @dev: Target device. -- cgit v1.2.3 From 20c48920583675e67b3824f147726e0fbda735ce Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 18 Sep 2025 17:33:00 -0700 Subject: KVM: Export KVM-internal symbols for sub-modules only Rework the vast majority of KVM's exports to expose symbols only to KVM submodules, i.e. to x86's kvm-{amd,intel}.ko and PPC's kvm-{pr,hv}.ko. With few exceptions, KVM's exported APIs are intended (and safe) for KVM- internal usage only. Keep kvm_get_kvm(), kvm_get_kvm_safe(), and kvm_put_kvm() as normal exports, as they are needed by VFIO, and are generally safe for external usage (though ideally even the get/put APIs would be KVM-internal, and VFIO would pin a VM by grabbing a reference to its associated file). Implement a framework in kvm_types.h in anticipation of providing a macro to restrict KVM-specific kernel exports, i.e. to provide symbol exports for KVM if and only if KVM is built as one or more modules. Link: https://lore.kernel.org/r/20250919003303.1355064-3-seanjc@google.com Cc: Nathan Chancellor Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/powerpc/include/asm/Kbuild | 1 - arch/powerpc/include/asm/kvm_types.h | 15 ++++ arch/x86/include/asm/kvm_types.h | 10 +++ include/linux/kvm_types.h | 25 +++++-- virt/kvm/eventfd.c | 2 +- virt/kvm/guest_memfd.c | 4 +- virt/kvm/kvm_main.c | 128 +++++++++++++++++------------------ 7 files changed, 110 insertions(+), 75 deletions(-) create mode 100644 arch/powerpc/include/asm/kvm_types.h (limited to 'include/linux') diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index e5fdc336c9b2..2e23533b67e3 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -3,7 +3,6 @@ generated-y += syscall_table_32.h generated-y += syscall_table_64.h generated-y += syscall_table_spu.h generic-y += agp.h -generic-y += kvm_types.h generic-y += mcs_spinlock.h generic-y += qrwlock.h generic-y += early_ioremap.h diff --git a/arch/powerpc/include/asm/kvm_types.h b/arch/powerpc/include/asm/kvm_types.h new file mode 100644 index 000000000000..5d4bffea7d47 --- /dev/null +++ b/arch/powerpc/include/asm/kvm_types.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_PPC_KVM_TYPES_H +#define _ASM_PPC_KVM_TYPES_H + +#if IS_MODULE(CONFIG_KVM_BOOK3S_64_PR) && IS_MODULE(CONFIG_KVM_BOOK3S_64_HV) +#define KVM_SUB_MODULES kvm-pr,kvm-hv +#elif IS_MODULE(CONFIG_KVM_BOOK3S_64_PR) +#define KVM_SUB_MODULES kvm-pr +#elif IS_MODULE(CONFIG_KVM_BOOK3S_64_HV) +#define KVM_SUB_MODULES kvm-hv +#else +#undef KVM_SUB_MODULES +#endif + +#endif diff --git a/arch/x86/include/asm/kvm_types.h b/arch/x86/include/asm/kvm_types.h index 08f1b57d3b62..23268a188e70 100644 --- a/arch/x86/include/asm/kvm_types.h +++ b/arch/x86/include/asm/kvm_types.h @@ -2,6 +2,16 @@ #ifndef _ASM_X86_KVM_TYPES_H #define _ASM_X86_KVM_TYPES_H +#if IS_MODULE(CONFIG_KVM_AMD) && IS_MODULE(CONFIG_KVM_INTEL) +#define KVM_SUB_MODULES kvm-amd,kvm-intel +#elif IS_MODULE(CONFIG_KVM_AMD) +#define KVM_SUB_MODULES kvm-amd +#elif IS_MODULE(CONFIG_KVM_INTEL) +#define KVM_SUB_MODULES kvm-intel +#else +#undef KVM_SUB_MODULES +#endif + #define KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE 40 #endif /* _ASM_X86_KVM_TYPES_H */ diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 827ecc0b7e10..490464c205b4 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -3,6 +3,23 @@ #ifndef __KVM_TYPES_H__ #define __KVM_TYPES_H__ +#include +#include +#include +#include + +#ifdef KVM_SUB_MODULES +#define EXPORT_SYMBOL_FOR_KVM_INTERNAL(symbol) \ + EXPORT_SYMBOL_FOR_MODULES(symbol, __stringify(KVM_SUB_MODULES)) +#else +#define EXPORT_SYMBOL_FOR_KVM_INTERNAL(symbol) +#endif + +#ifndef __ASSEMBLER__ + +#include +#include + struct kvm; struct kvm_async_pf; struct kvm_device_ops; @@ -19,13 +36,6 @@ struct kvm_memslots; enum kvm_mr_change; -#include -#include -#include -#include - -#include - /* * Address types: * @@ -116,5 +126,6 @@ struct kvm_vcpu_stat_generic { }; #define KVM_STATS_NAME_SIZE 48 +#endif /* !__ASSEMBLER__ */ #endif /* __KVM_TYPES_H__ */ diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 6b1133a6617f..a7794ffdb976 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -525,7 +525,7 @@ bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) return false; } -EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_irq_has_notifier); void kvm_notify_acked_gsi(struct kvm *kvm, int gsi) { diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index 1d323ca178cb..94bafd6c558c 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -702,7 +702,7 @@ out: fput(file); return r; } -EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_gmem_get_pfn); #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_POPULATE long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages, @@ -785,5 +785,5 @@ put_folio_and_exit: fput(file); return ret && !i ? ret : i; } -EXPORT_SYMBOL_GPL(kvm_gmem_populate); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_gmem_populate); #endif diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c0c5626ab71d..226faeaa8e56 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -77,22 +77,22 @@ MODULE_LICENSE("GPL"); /* Architectures should define their poll value according to the halt latency */ unsigned int halt_poll_ns = KVM_HALT_POLL_NS_DEFAULT; module_param(halt_poll_ns, uint, 0644); -EXPORT_SYMBOL_GPL(halt_poll_ns); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(halt_poll_ns); /* Default doubles per-vcpu halt_poll_ns. */ unsigned int halt_poll_ns_grow = 2; module_param(halt_poll_ns_grow, uint, 0644); -EXPORT_SYMBOL_GPL(halt_poll_ns_grow); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(halt_poll_ns_grow); /* The start value to grow halt_poll_ns from */ unsigned int halt_poll_ns_grow_start = 10000; /* 10us */ module_param(halt_poll_ns_grow_start, uint, 0644); -EXPORT_SYMBOL_GPL(halt_poll_ns_grow_start); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(halt_poll_ns_grow_start); /* Default halves per-vcpu halt_poll_ns. */ unsigned int halt_poll_ns_shrink = 2; module_param(halt_poll_ns_shrink, uint, 0644); -EXPORT_SYMBOL_GPL(halt_poll_ns_shrink); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(halt_poll_ns_shrink); /* * Allow direct access (from KVM or the CPU) without MMU notifier protection @@ -170,7 +170,7 @@ void vcpu_load(struct kvm_vcpu *vcpu) kvm_arch_vcpu_load(vcpu, cpu); put_cpu(); } -EXPORT_SYMBOL_GPL(vcpu_load); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(vcpu_load); void vcpu_put(struct kvm_vcpu *vcpu) { @@ -180,7 +180,7 @@ void vcpu_put(struct kvm_vcpu *vcpu) __this_cpu_write(kvm_running_vcpu, NULL); preempt_enable(); } -EXPORT_SYMBOL_GPL(vcpu_put); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(vcpu_put); /* TODO: merge with kvm_arch_vcpu_should_kick */ static bool kvm_request_needs_ipi(struct kvm_vcpu *vcpu, unsigned req) @@ -288,7 +288,7 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req) return called; } -EXPORT_SYMBOL_GPL(kvm_make_all_cpus_request); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_make_all_cpus_request); void kvm_flush_remote_tlbs(struct kvm *kvm) { @@ -309,7 +309,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) || kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) ++kvm->stat.generic.remote_tlb_flush; } -EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_flush_remote_tlbs); void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages) { @@ -499,7 +499,7 @@ void kvm_destroy_vcpus(struct kvm *kvm) atomic_set(&kvm->online_vcpus, 0); } -EXPORT_SYMBOL_GPL(kvm_destroy_vcpus); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_destroy_vcpus); #ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) @@ -1365,7 +1365,7 @@ void kvm_put_kvm_no_destroy(struct kvm *kvm) { WARN_ON(refcount_dec_and_test(&kvm->users_count)); } -EXPORT_SYMBOL_GPL(kvm_put_kvm_no_destroy); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_put_kvm_no_destroy); static int kvm_vm_release(struct inode *inode, struct file *filp) { @@ -1397,7 +1397,7 @@ out_unlock: } return -EINTR; } -EXPORT_SYMBOL_GPL(kvm_trylock_all_vcpus); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_trylock_all_vcpus); int kvm_lock_all_vcpus(struct kvm *kvm) { @@ -1422,7 +1422,7 @@ out_unlock: } return r; } -EXPORT_SYMBOL_GPL(kvm_lock_all_vcpus); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_lock_all_vcpus); void kvm_unlock_all_vcpus(struct kvm *kvm) { @@ -1434,7 +1434,7 @@ void kvm_unlock_all_vcpus(struct kvm *kvm) kvm_for_each_vcpu(i, vcpu, kvm) mutex_unlock(&vcpu->mutex); } -EXPORT_SYMBOL_GPL(kvm_unlock_all_vcpus); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_unlock_all_vcpus); /* * Allocation size is twice as large as the actual dirty bitmap size. @@ -2142,7 +2142,7 @@ int kvm_set_internal_memslot(struct kvm *kvm, return kvm_set_memory_region(kvm, mem); } -EXPORT_SYMBOL_GPL(kvm_set_internal_memslot); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_internal_memslot); static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region2 *mem) @@ -2201,7 +2201,7 @@ int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log, *is_dirty = 1; return 0; } -EXPORT_SYMBOL_GPL(kvm_get_dirty_log); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_dirty_log); #else /* CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */ /** @@ -2636,7 +2636,7 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) { return __gfn_to_memslot(kvm_memslots(kvm), gfn); } -EXPORT_SYMBOL_GPL(gfn_to_memslot); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(gfn_to_memslot); struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn) { @@ -2670,7 +2670,7 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn return NULL; } -EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_memslot); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_gfn_to_memslot); bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) { @@ -2678,7 +2678,7 @@ bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) return kvm_is_visible_memslot(memslot); } -EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_is_visible_gfn); bool kvm_vcpu_is_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) { @@ -2686,7 +2686,7 @@ bool kvm_vcpu_is_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) return kvm_is_visible_memslot(memslot); } -EXPORT_SYMBOL_GPL(kvm_vcpu_is_visible_gfn); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_is_visible_gfn); unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn) { @@ -2743,19 +2743,19 @@ unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, { return gfn_to_hva_many(slot, gfn, NULL); } -EXPORT_SYMBOL_GPL(gfn_to_hva_memslot); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(gfn_to_hva_memslot); unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) { return gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL); } -EXPORT_SYMBOL_GPL(gfn_to_hva); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(gfn_to_hva); unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn) { return gfn_to_hva_many(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn, NULL); } -EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_hva); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_gfn_to_hva); /* * Return the hva of a @gfn and the R/W attribute if possible. @@ -2819,7 +2819,7 @@ void kvm_release_page_clean(struct page *page) kvm_set_page_accessed(page); put_page(page); } -EXPORT_SYMBOL_GPL(kvm_release_page_clean); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_release_page_clean); void kvm_release_page_dirty(struct page *page) { @@ -2829,7 +2829,7 @@ void kvm_release_page_dirty(struct page *page) kvm_set_page_dirty(page); kvm_release_page_clean(page); } -EXPORT_SYMBOL_GPL(kvm_release_page_dirty); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_release_page_dirty); static kvm_pfn_t kvm_resolve_pfn(struct kvm_follow_pfn *kfp, struct page *page, struct follow_pfnmap_args *map, bool writable) @@ -3073,7 +3073,7 @@ kvm_pfn_t __kvm_faultin_pfn(const struct kvm_memory_slot *slot, gfn_t gfn, return kvm_follow_pfn(&kfp); } -EXPORT_SYMBOL_GPL(__kvm_faultin_pfn); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(__kvm_faultin_pfn); int kvm_prefetch_pages(struct kvm_memory_slot *slot, gfn_t gfn, struct page **pages, int nr_pages) @@ -3090,7 +3090,7 @@ int kvm_prefetch_pages(struct kvm_memory_slot *slot, gfn_t gfn, return get_user_pages_fast_only(addr, nr_pages, FOLL_WRITE, pages); } -EXPORT_SYMBOL_GPL(kvm_prefetch_pages); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_prefetch_pages); /* * Don't use this API unless you are absolutely, positively certain that KVM @@ -3112,7 +3112,7 @@ struct page *__gfn_to_page(struct kvm *kvm, gfn_t gfn, bool write) (void)kvm_follow_pfn(&kfp); return refcounted_page; } -EXPORT_SYMBOL_GPL(__gfn_to_page); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(__gfn_to_page); int __kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map, bool writable) @@ -3146,7 +3146,7 @@ int __kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map, return map->hva ? 0 : -EFAULT; } -EXPORT_SYMBOL_GPL(__kvm_vcpu_map); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(__kvm_vcpu_map); void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map) { @@ -3174,7 +3174,7 @@ void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map) map->page = NULL; map->pinned_page = NULL; } -EXPORT_SYMBOL_GPL(kvm_vcpu_unmap); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_unmap); static int next_segment(unsigned long len, int offset) { @@ -3210,7 +3210,7 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, return __kvm_read_guest_page(slot, gfn, data, offset, len); } -EXPORT_SYMBOL_GPL(kvm_read_guest_page); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_read_guest_page); int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset, int len) @@ -3219,7 +3219,7 @@ int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, return __kvm_read_guest_page(slot, gfn, data, offset, len); } -EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_page); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_read_guest_page); int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len) { @@ -3239,7 +3239,7 @@ int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len) } return 0; } -EXPORT_SYMBOL_GPL(kvm_read_guest); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_read_guest); int kvm_vcpu_read_guest(struct kvm_vcpu *vcpu, gpa_t gpa, void *data, unsigned long len) { @@ -3259,7 +3259,7 @@ int kvm_vcpu_read_guest(struct kvm_vcpu *vcpu, gpa_t gpa, void *data, unsigned l } return 0; } -EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_read_guest); static int __kvm_read_guest_atomic(struct kvm_memory_slot *slot, gfn_t gfn, void *data, int offset, unsigned long len) @@ -3290,7 +3290,7 @@ int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa, return __kvm_read_guest_atomic(slot, gfn, data, offset, len); } -EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_atomic); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_read_guest_atomic); /* Copy @len bytes from @data into guest memory at '(@gfn * PAGE_SIZE) + @offset' */ static int __kvm_write_guest_page(struct kvm *kvm, @@ -3320,7 +3320,7 @@ int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, return __kvm_write_guest_page(kvm, slot, gfn, data, offset, len); } -EXPORT_SYMBOL_GPL(kvm_write_guest_page); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_write_guest_page); int kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, const void *data, int offset, int len) @@ -3329,7 +3329,7 @@ int kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, return __kvm_write_guest_page(vcpu->kvm, slot, gfn, data, offset, len); } -EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_page); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_write_guest_page); int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, unsigned long len) @@ -3350,7 +3350,7 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, } return 0; } -EXPORT_SYMBOL_GPL(kvm_write_guest); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_write_guest); int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data, unsigned long len) @@ -3371,7 +3371,7 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data, } return 0; } -EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_write_guest); static int __kvm_gfn_to_hva_cache_init(struct kvm_memslots *slots, struct gfn_to_hva_cache *ghc, @@ -3420,7 +3420,7 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, struct kvm_memslots *slots = kvm_memslots(kvm); return __kvm_gfn_to_hva_cache_init(slots, ghc, gpa, len); } -EXPORT_SYMBOL_GPL(kvm_gfn_to_hva_cache_init); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_gfn_to_hva_cache_init); int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, void *data, unsigned int offset, @@ -3451,14 +3451,14 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, return 0; } -EXPORT_SYMBOL_GPL(kvm_write_guest_offset_cached); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_write_guest_offset_cached); int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, void *data, unsigned long len) { return kvm_write_guest_offset_cached(kvm, ghc, data, 0, len); } -EXPORT_SYMBOL_GPL(kvm_write_guest_cached); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_write_guest_cached); int kvm_read_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, void *data, unsigned int offset, @@ -3488,14 +3488,14 @@ int kvm_read_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, return 0; } -EXPORT_SYMBOL_GPL(kvm_read_guest_offset_cached); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_read_guest_offset_cached); int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, void *data, unsigned long len) { return kvm_read_guest_offset_cached(kvm, ghc, data, 0, len); } -EXPORT_SYMBOL_GPL(kvm_read_guest_cached); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_read_guest_cached); int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len) { @@ -3515,7 +3515,7 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len) } return 0; } -EXPORT_SYMBOL_GPL(kvm_clear_guest); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_clear_guest); void mark_page_dirty_in_slot(struct kvm *kvm, const struct kvm_memory_slot *memslot, @@ -3540,7 +3540,7 @@ void mark_page_dirty_in_slot(struct kvm *kvm, set_bit_le(rel_gfn, memslot->dirty_bitmap); } } -EXPORT_SYMBOL_GPL(mark_page_dirty_in_slot); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(mark_page_dirty_in_slot); void mark_page_dirty(struct kvm *kvm, gfn_t gfn) { @@ -3549,7 +3549,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) memslot = gfn_to_memslot(kvm, gfn); mark_page_dirty_in_slot(kvm, memslot, gfn); } -EXPORT_SYMBOL_GPL(mark_page_dirty); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(mark_page_dirty); void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn) { @@ -3558,7 +3558,7 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn) memslot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); mark_page_dirty_in_slot(vcpu->kvm, memslot, gfn); } -EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_mark_page_dirty); void kvm_sigset_activate(struct kvm_vcpu *vcpu) { @@ -3795,7 +3795,7 @@ out: trace_kvm_vcpu_wakeup(halt_ns, waited, vcpu_valid_wakeup(vcpu)); } -EXPORT_SYMBOL_GPL(kvm_vcpu_halt); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_halt); bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu) { @@ -3807,7 +3807,7 @@ bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu) return false; } -EXPORT_SYMBOL_GPL(kvm_vcpu_wake_up); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_wake_up); #ifndef CONFIG_S390 /* @@ -3859,7 +3859,7 @@ void __kvm_vcpu_kick(struct kvm_vcpu *vcpu, bool wait) out: put_cpu(); } -EXPORT_SYMBOL_GPL(__kvm_vcpu_kick); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(__kvm_vcpu_kick); #endif /* !CONFIG_S390 */ int kvm_vcpu_yield_to(struct kvm_vcpu *target) @@ -3882,7 +3882,7 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target) return ret; } -EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_yield_to); /* * Helper that checks whether a VCPU is eligible for directed yield. @@ -4037,7 +4037,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) /* Ensure vcpu is not eligible during next spinloop */ kvm_vcpu_set_dy_eligible(me, false); } -EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_on_spin); static bool kvm_page_in_dirty_ring(struct kvm *kvm, unsigned long pgoff) { @@ -5019,7 +5019,7 @@ bool kvm_are_all_memslots_empty(struct kvm *kvm) return true; } -EXPORT_SYMBOL_GPL(kvm_are_all_memslots_empty); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_are_all_memslots_empty); static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm, struct kvm_enable_cap *cap) @@ -5474,7 +5474,7 @@ bool file_is_kvm(struct file *file) { return file && file->f_op == &kvm_vm_fops; } -EXPORT_SYMBOL_GPL(file_is_kvm); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(file_is_kvm); static int kvm_dev_ioctl_create_vm(unsigned long type) { @@ -5569,10 +5569,10 @@ static struct miscdevice kvm_dev = { #ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING bool enable_virt_at_load = true; module_param(enable_virt_at_load, bool, 0444); -EXPORT_SYMBOL_GPL(enable_virt_at_load); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_virt_at_load); __visible bool kvm_rebooting; -EXPORT_SYMBOL_GPL(kvm_rebooting); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_rebooting); static DEFINE_PER_CPU(bool, virtualization_enabled); static DEFINE_MUTEX(kvm_usage_lock); @@ -5723,7 +5723,7 @@ err_cpuhp: --kvm_usage_count; return r; } -EXPORT_SYMBOL_GPL(kvm_enable_virtualization); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_enable_virtualization); void kvm_disable_virtualization(void) { @@ -5736,7 +5736,7 @@ void kvm_disable_virtualization(void) cpuhp_remove_state(CPUHP_AP_KVM_ONLINE); kvm_arch_disable_virtualization(); } -EXPORT_SYMBOL_GPL(kvm_disable_virtualization); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_disable_virtualization); static int kvm_init_virtualization(void) { @@ -5885,7 +5885,7 @@ int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, r = __kvm_io_bus_write(vcpu, bus, &range, val); return r < 0 ? r : 0; } -EXPORT_SYMBOL_GPL(kvm_io_bus_write); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_io_bus_write); int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, int len, const void *val, long cookie) @@ -5954,7 +5954,7 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, r = __kvm_io_bus_read(vcpu, bus, &range, val); return r < 0 ? r : 0; } -EXPORT_SYMBOL_GPL(kvm_io_bus_read); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_io_bus_read); static void __free_bus(struct rcu_head *rcu) { @@ -6078,7 +6078,7 @@ out_unlock: return iodev; } -EXPORT_SYMBOL_GPL(kvm_io_bus_get_dev); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_io_bus_get_dev); static int kvm_debugfs_open(struct inode *inode, struct file *file, int (*get)(void *, u64 *), int (*set)(void *, u64), @@ -6415,7 +6415,7 @@ struct kvm_vcpu *kvm_get_running_vcpu(void) return vcpu; } -EXPORT_SYMBOL_GPL(kvm_get_running_vcpu); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_running_vcpu); /** * kvm_get_running_vcpus - get the per-CPU array of currently running vcpus. @@ -6550,7 +6550,7 @@ err_cpu_kick_mask: kmem_cache_destroy(kvm_vcpu_cache); return r; } -EXPORT_SYMBOL_GPL(kvm_init); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_init); void kvm_exit(void) { @@ -6573,4 +6573,4 @@ void kvm_exit(void) kvm_async_pf_deinit(); kvm_irqfd_exit(); } -EXPORT_SYMBOL_GPL(kvm_exit); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_exit); -- cgit v1.2.3 From 6d0386ea99875313fdfd074eb74013b6e3b48a76 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Aug 2025 17:01:53 -0700 Subject: entry/kvm: KVM: Move KVM details related to signal/-EINTR into KVM proper Move KVM's morphing of pending signals into userspace exits into KVM proper, and drop the @vcpu param from xfer_to_guest_mode_handle_work(). How KVM responds to -EINTR is a detail that really belongs in KVM itself, and invoking kvm_handle_signal_exit() from kernel code creates an inverted module dependency. E.g. attempting to move kvm_handle_signal_exit() into kvm_main.c would generate an linker error when building kvm.ko as a module. Dropping KVM details will also converting the KVM "entry" code into a more generic virtualization framework so that it can be used when running as a Hyper-V root partition. Lastly, eliminating usage of "struct kvm_vcpu" outside of KVM is also nice to have for KVM x86 developers, as keeping the details of kvm_vcpu purely within KVM allows changing the layout of the structure without having to boot into a new kernel, e.g. allows rebuilding and reloading kvm.ko with a modified kvm_vcpu structure as part of debug/development. Signed-off-by: Sean Christopherson Reviewed-by: Thomas Gleixner Signed-off-by: Wei Liu --- arch/arm64/kvm/arm.c | 3 +-- arch/loongarch/kvm/vcpu.c | 3 +-- arch/riscv/kvm/vcpu.c | 3 +-- arch/x86/kvm/vmx/vmx.c | 1 - arch/x86/kvm/x86.c | 3 +-- include/linux/entry-kvm.h | 11 +++-------- include/linux/kvm_host.h | 13 ++++++++++++- kernel/entry/kvm.c | 13 +++++-------- 8 files changed, 24 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 888f7c7abf54..418fd3043467 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -6,7 +6,6 @@ #include #include -#include #include #include #include @@ -1177,7 +1176,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) /* * Check conditions before entering the guest */ - ret = xfer_to_guest_mode_handle_work(vcpu); + ret = kvm_xfer_to_guest_mode_handle_work(vcpu); if (!ret) ret = 1; diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index d1b8c50941ca..514256b25ba1 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -4,7 +4,6 @@ */ #include -#include #include #include #include @@ -251,7 +250,7 @@ static int kvm_enter_guest_check(struct kvm_vcpu *vcpu) /* * Check conditions before entering the guest */ - ret = xfer_to_guest_mode_handle_work(vcpu); + ret = kvm_xfer_to_guest_mode_handle_work(vcpu); if (ret < 0) return ret; diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index f001e56403f9..251e787f2ebc 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -7,7 +7,6 @@ */ #include -#include #include #include #include @@ -910,7 +909,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) run->exit_reason = KVM_EXIT_UNKNOWN; while (ret > 0) { /* Check conditions before entering the guest */ - ret = xfer_to_guest_mode_handle_work(vcpu); + ret = kvm_xfer_to_guest_mode_handle_work(vcpu); if (ret) continue; ret = 1; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index aa157fe5b7b3..d7c86613e50a 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a1c49bc681c4..0b13b8bf69e5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -59,7 +59,6 @@ #include #include #include -#include #include #include @@ -11241,7 +11240,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu) if (__xfer_to_guest_mode_work_pending()) { kvm_vcpu_srcu_read_unlock(vcpu); - r = xfer_to_guest_mode_handle_work(vcpu); + r = kvm_xfer_to_guest_mode_handle_work(vcpu); kvm_vcpu_srcu_read_lock(vcpu); if (r) return r; diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h index 16149f6625e4..3644de7e6019 100644 --- a/include/linux/entry-kvm.h +++ b/include/linux/entry-kvm.h @@ -21,8 +21,6 @@ _TIF_NOTIFY_SIGNAL | _TIF_NOTIFY_RESUME | \ ARCH_XFER_TO_GUEST_MODE_WORK) -struct kvm_vcpu; - /** * arch_xfer_to_guest_mode_handle_work - Architecture specific xfer to guest * mode work handling function. @@ -32,12 +30,10 @@ struct kvm_vcpu; * Invoked from xfer_to_guest_mode_handle_work(). Defaults to NOOP. Can be * replaced by architecture specific code. */ -static inline int arch_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu, - unsigned long ti_work); +static inline int arch_xfer_to_guest_mode_handle_work(unsigned long ti_work); #ifndef arch_xfer_to_guest_mode_work -static inline int arch_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu, - unsigned long ti_work) +static inline int arch_xfer_to_guest_mode_handle_work(unsigned long ti_work) { return 0; } @@ -46,11 +42,10 @@ static inline int arch_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu, /** * xfer_to_guest_mode_handle_work - Check and handle pending work which needs * to be handled before going to guest mode - * @vcpu: Pointer to current's VCPU data * * Returns: 0 or an error code */ -int xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu); +int xfer_to_guest_mode_handle_work(void); /** * xfer_to_guest_mode_prepare - Perform last minute preparation work that diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 15656b7fba6c..598b9473e46d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2,7 +2,7 @@ #ifndef __KVM_HOST_H #define __KVM_HOST_H - +#include #include #include #include @@ -2450,6 +2450,17 @@ static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu) vcpu->run->exit_reason = KVM_EXIT_INTR; vcpu->stat.signal_exits++; } + +static inline int kvm_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu) +{ + int r = xfer_to_guest_mode_handle_work(); + + if (r) { + WARN_ON_ONCE(r != -EINTR); + kvm_handle_signal_exit(vcpu); + } + return r; +} #endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */ /* diff --git a/kernel/entry/kvm.c b/kernel/entry/kvm.c index 8485f63863af..6fc762eaacca 100644 --- a/kernel/entry/kvm.c +++ b/kernel/entry/kvm.c @@ -1,17 +1,14 @@ // SPDX-License-Identifier: GPL-2.0 #include -#include -static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work) +static int xfer_to_guest_mode_work(unsigned long ti_work) { do { int ret; - if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) { - kvm_handle_signal_exit(vcpu); + if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) return -EINTR; - } if (ti_work & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)) schedule(); @@ -19,7 +16,7 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work) if (ti_work & _TIF_NOTIFY_RESUME) resume_user_mode_work(NULL); - ret = arch_xfer_to_guest_mode_handle_work(vcpu, ti_work); + ret = arch_xfer_to_guest_mode_handle_work(ti_work); if (ret) return ret; @@ -28,7 +25,7 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work) return 0; } -int xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu) +int xfer_to_guest_mode_handle_work(void) { unsigned long ti_work; @@ -44,6 +41,6 @@ int xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu) if (!(ti_work & XFER_TO_GUEST_MODE_WORK)) return 0; - return xfer_to_guest_mode_work(vcpu, ti_work); + return xfer_to_guest_mode_work(ti_work); } EXPORT_SYMBOL_GPL(xfer_to_guest_mode_handle_work); -- cgit v1.2.3 From 9be7e1e320ff2e7db4b23c8ec5f599bbfac94ede Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Aug 2025 17:01:54 -0700 Subject: entry: Rename "kvm" entry code assets to "virt" to genericize APIs Rename the "kvm" entry code files and Kconfigs to use generic "virt" nomenclature so that the code can be reused by other hypervisors (or rather, their root/dom0 partition drivers), without incorrectly suggesting the code somehow relies on and/or involves KVM. No functional change intended. Signed-off-by: Sean Christopherson Reviewed-by: Thomas Gleixner Reviewed-by: Joel Fernandes Signed-off-by: Wei Liu --- MAINTAINERS | 2 +- arch/arm64/kvm/Kconfig | 2 +- arch/loongarch/kvm/Kconfig | 2 +- arch/riscv/kvm/Kconfig | 2 +- arch/x86/kvm/Kconfig | 2 +- include/linux/entry-kvm.h | 95 ---------------------------------------------- include/linux/entry-virt.h | 95 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/kvm_host.h | 6 +-- include/linux/rcupdate.h | 2 +- kernel/entry/Makefile | 2 +- kernel/entry/kvm.c | 46 ---------------------- kernel/entry/virt.c | 46 ++++++++++++++++++++++ kernel/rcu/tree.c | 6 +-- virt/kvm/Kconfig | 2 +- 14 files changed, 155 insertions(+), 155 deletions(-) delete mode 100644 include/linux/entry-kvm.h create mode 100644 include/linux/entry-virt.h delete mode 100644 kernel/entry/kvm.c create mode 100644 kernel/entry/virt.c (limited to 'include/linux') diff --git a/MAINTAINERS b/MAINTAINERS index fe168477caa4..c255048333f0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10200,7 +10200,7 @@ L: linux-kernel@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git core/entry F: include/linux/entry-common.h -F: include/linux/entry-kvm.h +F: include/linux/entry-virt.h F: include/linux/irq-entry-common.h F: kernel/entry/ diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 713248f240e0..6f4fc3caa31a 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -25,7 +25,7 @@ menuconfig KVM select HAVE_KVM_CPU_RELAX_INTERCEPT select KVM_MMIO select KVM_GENERIC_DIRTYLOG_READ_PROTECT - select KVM_XFER_TO_GUEST_WORK + select VIRT_XFER_TO_GUEST_WORK select KVM_VFIO select HAVE_KVM_DIRTY_RING_ACQ_REL select NEED_KVM_DIRTY_RING_WITH_BITMAP diff --git a/arch/loongarch/kvm/Kconfig b/arch/loongarch/kvm/Kconfig index 40eea6da7c25..ae64bbdf83a7 100644 --- a/arch/loongarch/kvm/Kconfig +++ b/arch/loongarch/kvm/Kconfig @@ -31,7 +31,7 @@ config KVM select KVM_GENERIC_HARDWARE_ENABLING select KVM_GENERIC_MMU_NOTIFIER select KVM_MMIO - select KVM_XFER_TO_GUEST_WORK + select VIRT_XFER_TO_GUEST_WORK select SCHED_INFO select GUEST_PERF_EVENTS if PERF_EVENTS help diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig index 5a62091b0809..c50328212917 100644 --- a/arch/riscv/kvm/Kconfig +++ b/arch/riscv/kvm/Kconfig @@ -30,7 +30,7 @@ config KVM select KVM_GENERIC_DIRTYLOG_READ_PROTECT select KVM_GENERIC_HARDWARE_ENABLING select KVM_MMIO - select KVM_XFER_TO_GUEST_WORK + select VIRT_XFER_TO_GUEST_WORK select KVM_GENERIC_MMU_NOTIFIER select SCHED_INFO select GUEST_PERF_EVENTS if PERF_EVENTS diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 2c86673155c9..f81074b0c0a8 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -40,7 +40,7 @@ config KVM_X86 select HAVE_KVM_MSI select HAVE_KVM_CPU_RELAX_INTERCEPT select HAVE_KVM_NO_POLL - select KVM_XFER_TO_GUEST_WORK + select VIRT_XFER_TO_GUEST_WORK select KVM_GENERIC_DIRTYLOG_READ_PROTECT select KVM_VFIO select HAVE_KVM_PM_NOTIFIER if PM diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h deleted file mode 100644 index 3644de7e6019..000000000000 --- a/include/linux/entry-kvm.h +++ /dev/null @@ -1,95 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LINUX_ENTRYKVM_H -#define __LINUX_ENTRYKVM_H - -#include -#include -#include -#include -#include -#include - -/* Transfer to guest mode work */ -#ifdef CONFIG_KVM_XFER_TO_GUEST_WORK - -#ifndef ARCH_XFER_TO_GUEST_MODE_WORK -# define ARCH_XFER_TO_GUEST_MODE_WORK (0) -#endif - -#define XFER_TO_GUEST_MODE_WORK \ - (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | _TIF_SIGPENDING | \ - _TIF_NOTIFY_SIGNAL | _TIF_NOTIFY_RESUME | \ - ARCH_XFER_TO_GUEST_MODE_WORK) - -/** - * arch_xfer_to_guest_mode_handle_work - Architecture specific xfer to guest - * mode work handling function. - * @vcpu: Pointer to current's VCPU data - * @ti_work: Cached TIF flags gathered in xfer_to_guest_mode_handle_work() - * - * Invoked from xfer_to_guest_mode_handle_work(). Defaults to NOOP. Can be - * replaced by architecture specific code. - */ -static inline int arch_xfer_to_guest_mode_handle_work(unsigned long ti_work); - -#ifndef arch_xfer_to_guest_mode_work -static inline int arch_xfer_to_guest_mode_handle_work(unsigned long ti_work) -{ - return 0; -} -#endif - -/** - * xfer_to_guest_mode_handle_work - Check and handle pending work which needs - * to be handled before going to guest mode - * - * Returns: 0 or an error code - */ -int xfer_to_guest_mode_handle_work(void); - -/** - * xfer_to_guest_mode_prepare - Perform last minute preparation work that - * need to be handled while IRQs are disabled - * upon entering to guest. - * - * Has to be invoked with interrupts disabled before the last call - * to xfer_to_guest_mode_work_pending(). - */ -static inline void xfer_to_guest_mode_prepare(void) -{ - lockdep_assert_irqs_disabled(); - tick_nohz_user_enter_prepare(); -} - -/** - * __xfer_to_guest_mode_work_pending - Check if work is pending - * - * Returns: True if work pending, False otherwise. - * - * Bare variant of xfer_to_guest_mode_work_pending(). Can be called from - * interrupt enabled code for racy quick checks with care. - */ -static inline bool __xfer_to_guest_mode_work_pending(void) -{ - unsigned long ti_work = read_thread_flags(); - - return !!(ti_work & XFER_TO_GUEST_MODE_WORK); -} - -/** - * xfer_to_guest_mode_work_pending - Check if work is pending which needs to be - * handled before returning to guest mode - * - * Returns: True if work pending, False otherwise. - * - * Has to be invoked with interrupts disabled before the transition to - * guest mode. - */ -static inline bool xfer_to_guest_mode_work_pending(void) -{ - lockdep_assert_irqs_disabled(); - return __xfer_to_guest_mode_work_pending(); -} -#endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */ - -#endif diff --git a/include/linux/entry-virt.h b/include/linux/entry-virt.h new file mode 100644 index 000000000000..42c89e3e5ca7 --- /dev/null +++ b/include/linux/entry-virt.h @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_ENTRYVIRT_H +#define __LINUX_ENTRYVIRT_H + +#include +#include +#include +#include +#include +#include + +/* Transfer to guest mode work */ +#ifdef CONFIG_VIRT_XFER_TO_GUEST_WORK + +#ifndef ARCH_XFER_TO_GUEST_MODE_WORK +# define ARCH_XFER_TO_GUEST_MODE_WORK (0) +#endif + +#define XFER_TO_GUEST_MODE_WORK \ + (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | _TIF_SIGPENDING | \ + _TIF_NOTIFY_SIGNAL | _TIF_NOTIFY_RESUME | \ + ARCH_XFER_TO_GUEST_MODE_WORK) + +/** + * arch_xfer_to_guest_mode_handle_work - Architecture specific xfer to guest + * mode work handling function. + * @vcpu: Pointer to current's VCPU data + * @ti_work: Cached TIF flags gathered in xfer_to_guest_mode_handle_work() + * + * Invoked from xfer_to_guest_mode_handle_work(). Defaults to NOOP. Can be + * replaced by architecture specific code. + */ +static inline int arch_xfer_to_guest_mode_handle_work(unsigned long ti_work); + +#ifndef arch_xfer_to_guest_mode_work +static inline int arch_xfer_to_guest_mode_handle_work(unsigned long ti_work) +{ + return 0; +} +#endif + +/** + * xfer_to_guest_mode_handle_work - Check and handle pending work which needs + * to be handled before going to guest mode + * + * Returns: 0 or an error code + */ +int xfer_to_guest_mode_handle_work(void); + +/** + * xfer_to_guest_mode_prepare - Perform last minute preparation work that + * need to be handled while IRQs are disabled + * upon entering to guest. + * + * Has to be invoked with interrupts disabled before the last call + * to xfer_to_guest_mode_work_pending(). + */ +static inline void xfer_to_guest_mode_prepare(void) +{ + lockdep_assert_irqs_disabled(); + tick_nohz_user_enter_prepare(); +} + +/** + * __xfer_to_guest_mode_work_pending - Check if work is pending + * + * Returns: True if work pending, False otherwise. + * + * Bare variant of xfer_to_guest_mode_work_pending(). Can be called from + * interrupt enabled code for racy quick checks with care. + */ +static inline bool __xfer_to_guest_mode_work_pending(void) +{ + unsigned long ti_work = read_thread_flags(); + + return !!(ti_work & XFER_TO_GUEST_MODE_WORK); +} + +/** + * xfer_to_guest_mode_work_pending - Check if work is pending which needs to be + * handled before returning to guest mode + * + * Returns: True if work pending, False otherwise. + * + * Has to be invoked with interrupts disabled before the transition to + * guest mode. + */ +static inline bool xfer_to_guest_mode_work_pending(void) +{ + lockdep_assert_irqs_disabled(); + return __xfer_to_guest_mode_work_pending(); +} +#endif /* CONFIG_VIRT_XFER_TO_GUEST_WORK */ + +#endif diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 598b9473e46d..70ac2267d5d0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2,7 +2,7 @@ #ifndef __KVM_HOST_H #define __KVM_HOST_H -#include +#include #include #include #include @@ -2444,7 +2444,7 @@ static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) } #endif /* CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE */ -#ifdef CONFIG_KVM_XFER_TO_GUEST_WORK +#ifdef CONFIG_VIRT_XFER_TO_GUEST_WORK static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu) { vcpu->run->exit_reason = KVM_EXIT_INTR; @@ -2461,7 +2461,7 @@ static inline int kvm_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu) } return r; } -#endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */ +#endif /* CONFIG_VIRT_XFER_TO_GUEST_WORK */ /* * If more than one page is being (un)accounted, @virt must be the address of diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 120536f4c6eb..1e1f3aa375d9 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -129,7 +129,7 @@ static inline void rcu_sysrq_start(void) { } static inline void rcu_sysrq_end(void) { } #endif /* #else #ifdef CONFIG_RCU_STALL_COMMON */ -#if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK)) +#if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_VIRT_XFER_TO_GUEST_WORK)) void rcu_irq_work_resched(void); #else static __always_inline void rcu_irq_work_resched(void) { } diff --git a/kernel/entry/Makefile b/kernel/entry/Makefile index 77fcd83dd663..2333d70802e4 100644 --- a/kernel/entry/Makefile +++ b/kernel/entry/Makefile @@ -14,4 +14,4 @@ CFLAGS_common.o += -fno-stack-protector obj-$(CONFIG_GENERIC_IRQ_ENTRY) += common.o obj-$(CONFIG_GENERIC_SYSCALL) += syscall-common.o syscall_user_dispatch.o -obj-$(CONFIG_KVM_XFER_TO_GUEST_WORK) += kvm.o +obj-$(CONFIG_VIRT_XFER_TO_GUEST_WORK) += virt.o diff --git a/kernel/entry/kvm.c b/kernel/entry/kvm.c deleted file mode 100644 index 6fc762eaacca..000000000000 --- a/kernel/entry/kvm.c +++ /dev/null @@ -1,46 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include - -static int xfer_to_guest_mode_work(unsigned long ti_work) -{ - do { - int ret; - - if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) - return -EINTR; - - if (ti_work & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)) - schedule(); - - if (ti_work & _TIF_NOTIFY_RESUME) - resume_user_mode_work(NULL); - - ret = arch_xfer_to_guest_mode_handle_work(ti_work); - if (ret) - return ret; - - ti_work = read_thread_flags(); - } while (ti_work & XFER_TO_GUEST_MODE_WORK); - return 0; -} - -int xfer_to_guest_mode_handle_work(void) -{ - unsigned long ti_work; - - /* - * This is invoked from the outer guest loop with interrupts and - * preemption enabled. - * - * KVM invokes xfer_to_guest_mode_work_pending() with interrupts - * disabled in the inner loop before going into guest mode. No need - * to disable interrupts here. - */ - ti_work = read_thread_flags(); - if (!(ti_work & XFER_TO_GUEST_MODE_WORK)) - return 0; - - return xfer_to_guest_mode_work(ti_work); -} -EXPORT_SYMBOL_GPL(xfer_to_guest_mode_handle_work); diff --git a/kernel/entry/virt.c b/kernel/entry/virt.c new file mode 100644 index 000000000000..c52f99249763 --- /dev/null +++ b/kernel/entry/virt.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include + +static int xfer_to_guest_mode_work(unsigned long ti_work) +{ + do { + int ret; + + if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) + return -EINTR; + + if (ti_work & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)) + schedule(); + + if (ti_work & _TIF_NOTIFY_RESUME) + resume_user_mode_work(NULL); + + ret = arch_xfer_to_guest_mode_handle_work(ti_work); + if (ret) + return ret; + + ti_work = read_thread_flags(); + } while (ti_work & XFER_TO_GUEST_MODE_WORK); + return 0; +} + +int xfer_to_guest_mode_handle_work(void) +{ + unsigned long ti_work; + + /* + * This is invoked from the outer guest loop with interrupts and + * preemption enabled. + * + * KVM invokes xfer_to_guest_mode_work_pending() with interrupts + * disabled in the inner loop before going into guest mode. No need + * to disable interrupts here. + */ + ti_work = read_thread_flags(); + if (!(ti_work & XFER_TO_GUEST_MODE_WORK)) + return 0; + + return xfer_to_guest_mode_work(ti_work); +} +EXPORT_SYMBOL_GPL(xfer_to_guest_mode_handle_work); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 174ee243b349..995489b72535 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -573,7 +573,7 @@ void rcutorture_format_gp_seqs(unsigned long long seqs, char *cp, size_t len) } EXPORT_SYMBOL_GPL(rcutorture_format_gp_seqs); -#if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK)) +#if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_VIRT_XFER_TO_GUEST_WORK)) /* * An empty function that will trigger a reschedule on * IRQ tail once IRQs get re-enabled on userspace/guest resume. @@ -602,7 +602,7 @@ noinstr void rcu_irq_work_resched(void) if (IS_ENABLED(CONFIG_GENERIC_ENTRY) && !(current->flags & PF_VCPU)) return; - if (IS_ENABLED(CONFIG_KVM_XFER_TO_GUEST_WORK) && (current->flags & PF_VCPU)) + if (IS_ENABLED(CONFIG_VIRT_XFER_TO_GUEST_WORK) && (current->flags & PF_VCPU)) return; instrumentation_begin(); @@ -611,7 +611,7 @@ noinstr void rcu_irq_work_resched(void) } instrumentation_end(); } -#endif /* #if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK)) */ +#endif /* #if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_VIRT_XFER_TO_GUEST_WORK)) */ #ifdef CONFIG_PROVE_RCU /** diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 727b542074e7..ce843db53831 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -87,7 +87,7 @@ config HAVE_KVM_VCPU_RUN_PID_CHANGE config HAVE_KVM_NO_POLL bool -config KVM_XFER_TO_GUEST_WORK +config VIRT_XFER_TO_GUEST_WORK bool config HAVE_KVM_PM_NOTIFIER -- cgit v1.2.3 From 7d096cb3e16f09d9762ae8cef897cdbc13a40029 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 21 Aug 2025 14:46:33 +0800 Subject: virtio_ring: constify virtqueue pointer for DMA helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch constifies the virtqueue pointer for DMA helpers. Reviewed-by: Christoph Hellwig Reviewed-by: Xuan Zhuo Reviewed-by: Eugenio Pérez Signed-off-by: Jason Wang Message-Id: <20250821064641.5025-2-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin Tested-by: Lei Yang Reviewed-by: Eugenio Pérez --- drivers/virtio/virtio_ring.c | 25 +++++++++++++------------ include/linux/virtio.h | 12 ++++++------ 2 files changed, 19 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index f5062061c408..103bad8cffff 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -3149,12 +3149,12 @@ EXPORT_SYMBOL_GPL(virtqueue_get_vring); * * return DMA address. Caller should check that by virtqueue_dma_mapping_error(). */ -dma_addr_t virtqueue_dma_map_single_attrs(struct virtqueue *_vq, void *ptr, +dma_addr_t virtqueue_dma_map_single_attrs(const struct virtqueue *_vq, void *ptr, size_t size, enum dma_data_direction dir, unsigned long attrs) { - struct vring_virtqueue *vq = to_vvq(_vq); + const struct vring_virtqueue *vq = to_vvq(_vq); if (!vq->use_dma_api) { kmsan_handle_dma(virt_to_page(ptr), offset_in_page(ptr), size, dir); @@ -3176,11 +3176,12 @@ EXPORT_SYMBOL_GPL(virtqueue_dma_map_single_attrs); * Unmap the address that is mapped by the virtqueue_dma_map_* APIs. * */ -void virtqueue_dma_unmap_single_attrs(struct virtqueue *_vq, dma_addr_t addr, +void virtqueue_dma_unmap_single_attrs(const struct virtqueue *_vq, + dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { - struct vring_virtqueue *vq = to_vvq(_vq); + const struct vring_virtqueue *vq = to_vvq(_vq); if (!vq->use_dma_api) return; @@ -3196,9 +3197,9 @@ EXPORT_SYMBOL_GPL(virtqueue_dma_unmap_single_attrs); * * Returns 0 means dma valid. Other means invalid dma address. */ -int virtqueue_dma_mapping_error(struct virtqueue *_vq, dma_addr_t addr) +int virtqueue_dma_mapping_error(const struct virtqueue *_vq, dma_addr_t addr) { - struct vring_virtqueue *vq = to_vvq(_vq); + const struct vring_virtqueue *vq = to_vvq(_vq); if (!vq->use_dma_api) return 0; @@ -3217,9 +3218,9 @@ EXPORT_SYMBOL_GPL(virtqueue_dma_mapping_error); * * return bool */ -bool virtqueue_dma_need_sync(struct virtqueue *_vq, dma_addr_t addr) +bool virtqueue_dma_need_sync(const struct virtqueue *_vq, dma_addr_t addr) { - struct vring_virtqueue *vq = to_vvq(_vq); + const struct vring_virtqueue *vq = to_vvq(_vq); if (!vq->use_dma_api) return false; @@ -3240,12 +3241,12 @@ EXPORT_SYMBOL_GPL(virtqueue_dma_need_sync); * the DMA address really needs to be synchronized * */ -void virtqueue_dma_sync_single_range_for_cpu(struct virtqueue *_vq, +void virtqueue_dma_sync_single_range_for_cpu(const struct virtqueue *_vq, dma_addr_t addr, unsigned long offset, size_t size, enum dma_data_direction dir) { - struct vring_virtqueue *vq = to_vvq(_vq); + const struct vring_virtqueue *vq = to_vvq(_vq); struct device *dev = vring_dma_dev(vq); if (!vq->use_dma_api) @@ -3266,12 +3267,12 @@ EXPORT_SYMBOL_GPL(virtqueue_dma_sync_single_range_for_cpu); * Before calling this function, use virtqueue_dma_need_sync() to confirm that * the DMA address really needs to be synchronized */ -void virtqueue_dma_sync_single_range_for_device(struct virtqueue *_vq, +void virtqueue_dma_sync_single_range_for_device(const struct virtqueue *_vq, dma_addr_t addr, unsigned long offset, size_t size, enum dma_data_direction dir) { - struct vring_virtqueue *vq = to_vvq(_vq); + const struct vring_virtqueue *vq = to_vvq(_vq); struct device *dev = vring_dma_dev(vq); if (!vq->use_dma_api) diff --git a/include/linux/virtio.h b/include/linux/virtio.h index db31fc6f4f1f..eab71a440fba 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -262,18 +262,18 @@ void unregister_virtio_driver(struct virtio_driver *drv); module_driver(__virtio_driver, register_virtio_driver, \ unregister_virtio_driver) -dma_addr_t virtqueue_dma_map_single_attrs(struct virtqueue *_vq, void *ptr, size_t size, +dma_addr_t virtqueue_dma_map_single_attrs(const struct virtqueue *_vq, void *ptr, size_t size, enum dma_data_direction dir, unsigned long attrs); -void virtqueue_dma_unmap_single_attrs(struct virtqueue *_vq, dma_addr_t addr, +void virtqueue_dma_unmap_single_attrs(const struct virtqueue *_vq, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs); -int virtqueue_dma_mapping_error(struct virtqueue *_vq, dma_addr_t addr); +int virtqueue_dma_mapping_error(const struct virtqueue *_vq, dma_addr_t addr); -bool virtqueue_dma_need_sync(struct virtqueue *_vq, dma_addr_t addr); -void virtqueue_dma_sync_single_range_for_cpu(struct virtqueue *_vq, dma_addr_t addr, +bool virtqueue_dma_need_sync(const struct virtqueue *_vq, dma_addr_t addr); +void virtqueue_dma_sync_single_range_for_cpu(const struct virtqueue *_vq, dma_addr_t addr, unsigned long offset, size_t size, enum dma_data_direction dir); -void virtqueue_dma_sync_single_range_for_device(struct virtqueue *_vq, dma_addr_t addr, +void virtqueue_dma_sync_single_range_for_device(const struct virtqueue *_vq, dma_addr_t addr, unsigned long offset, size_t size, enum dma_data_direction dir); -- cgit v1.2.3 From b41cb3bcf67fcb7b8297e5acc5bb3309c96c2ff2 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 21 Aug 2025 14:46:35 +0800 Subject: virtio: rename dma helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Following patch will introduce virtio mapping function to avoid abusing DMA API for device that doesn't do DMA. To ease the introduction, this patch rename "dma" to "map" for the current dma mapping helpers. Reviewed-by: Christoph Hellwig Reviewed-by: Xuan Zhuo Signed-off-by: Jason Wang Message-Id: <20250821064641.5025-4-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin Tested-by: Lei Yang Reviewed-by: Eugenio Pérez --- drivers/net/virtio_net.c | 28 +++++------ drivers/virtio/virtio_ring.c | 114 +++++++++++++++++++++---------------------- include/linux/virtio.h | 12 ++--- 3 files changed, 77 insertions(+), 77 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 975bdc5dab84..31bd32bdecaf 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -962,7 +962,7 @@ static void virtnet_rq_unmap(struct receive_queue *rq, void *buf, u32 len) if (dma->need_sync && len) { offset = buf - (head + sizeof(*dma)); - virtqueue_dma_sync_single_range_for_cpu(rq->vq, dma->addr, + virtqueue_map_sync_single_range_for_cpu(rq->vq, dma->addr, offset, len, DMA_FROM_DEVICE); } @@ -970,8 +970,8 @@ static void virtnet_rq_unmap(struct receive_queue *rq, void *buf, u32 len) if (dma->ref) return; - virtqueue_dma_unmap_single_attrs(rq->vq, dma->addr, dma->len, - DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); + virtqueue_unmap_single_attrs(rq->vq, dma->addr, dma->len, + DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); put_page(page); } @@ -1038,13 +1038,13 @@ static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size, gfp_t gfp) dma->len = alloc_frag->size - sizeof(*dma); - addr = virtqueue_dma_map_single_attrs(rq->vq, dma + 1, - dma->len, DMA_FROM_DEVICE, 0); - if (virtqueue_dma_mapping_error(rq->vq, addr)) + addr = virtqueue_map_single_attrs(rq->vq, dma + 1, + dma->len, DMA_FROM_DEVICE, 0); + if (virtqueue_map_mapping_error(rq->vq, addr)) return NULL; dma->addr = addr; - dma->need_sync = virtqueue_dma_need_sync(rq->vq, addr); + dma->need_sync = virtqueue_map_need_sync(rq->vq, addr); /* Add a reference to dma to prevent the entire dma from * being released during error handling. This reference @@ -5952,9 +5952,9 @@ static int virtnet_xsk_pool_enable(struct net_device *dev, if (!rq->xsk_buffs) return -ENOMEM; - hdr_dma = virtqueue_dma_map_single_attrs(sq->vq, &xsk_hdr, vi->hdr_len, - DMA_TO_DEVICE, 0); - if (virtqueue_dma_mapping_error(sq->vq, hdr_dma)) { + hdr_dma = virtqueue_map_single_attrs(sq->vq, &xsk_hdr, vi->hdr_len, + DMA_TO_DEVICE, 0); + if (virtqueue_map_mapping_error(sq->vq, hdr_dma)) { err = -ENOMEM; goto err_free_buffs; } @@ -5983,8 +5983,8 @@ err_sq: err_rq: xsk_pool_dma_unmap(pool, 0); err_xsk_map: - virtqueue_dma_unmap_single_attrs(rq->vq, hdr_dma, vi->hdr_len, - DMA_TO_DEVICE, 0); + virtqueue_unmap_single_attrs(rq->vq, hdr_dma, vi->hdr_len, + DMA_TO_DEVICE, 0); err_free_buffs: kvfree(rq->xsk_buffs); return err; @@ -6011,8 +6011,8 @@ static int virtnet_xsk_pool_disable(struct net_device *dev, u16 qid) xsk_pool_dma_unmap(pool, 0); - virtqueue_dma_unmap_single_attrs(sq->vq, sq->xsk_hdr_dma_addr, - vi->hdr_len, DMA_TO_DEVICE, 0); + virtqueue_unmap_single_attrs(sq->vq, sq->xsk_hdr_dma_addr, + vi->hdr_len, DMA_TO_DEVICE, 0); kvfree(rq->xsk_buffs); return err; diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 75e5f6336c8d..482a268af851 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -166,7 +166,7 @@ struct vring_virtqueue { bool packed_ring; /* Is DMA API used? */ - bool use_dma_api; + bool use_map_api; /* Can we use weak barriers? */ bool weak_barriers; @@ -268,7 +268,7 @@ static bool virtqueue_use_indirect(const struct vring_virtqueue *vq, * unconditionally on data path. */ -static bool vring_use_dma_api(const struct virtio_device *vdev) +static bool vring_use_map_api(const struct virtio_device *vdev) { if (!virtio_has_dma_quirk(vdev)) return true; @@ -291,14 +291,14 @@ static bool vring_use_dma_api(const struct virtio_device *vdev) static bool vring_need_unmap_buffer(const struct vring_virtqueue *vring, const struct vring_desc_extra *extra) { - return vring->use_dma_api && (extra->addr != DMA_MAPPING_ERROR); + return vring->use_map_api && (extra->addr != DMA_MAPPING_ERROR); } size_t virtio_max_dma_size(const struct virtio_device *vdev) { size_t max_segment_size = SIZE_MAX; - if (vring_use_dma_api(vdev)) + if (vring_use_map_api(vdev)) max_segment_size = dma_max_mapping_size(vdev->dev.parent); return max_segment_size; @@ -309,7 +309,7 @@ static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, dma_addr_t *dma_handle, gfp_t flag, struct device *dma_dev) { - if (vring_use_dma_api(vdev)) { + if (vring_use_map_api(vdev)) { return dma_alloc_coherent(dma_dev, size, dma_handle, flag); } else { @@ -343,7 +343,7 @@ static void vring_free_queue(struct virtio_device *vdev, size_t size, void *queue, dma_addr_t dma_handle, struct device *dma_dev) { - if (vring_use_dma_api(vdev)) + if (vring_use_map_api(vdev)) dma_free_coherent(dma_dev, size, queue, dma_handle); else free_pages_exact(queue, PAGE_ALIGN(size)); @@ -372,7 +372,7 @@ static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *len = sg->length; - if (!vq->use_dma_api) { + if (!vq->use_map_api) { /* * If DMA is not used, KMSAN doesn't know that the scatterlist * is initialized by the hardware. Explicitly check/unpoison it @@ -402,17 +402,17 @@ static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, void *cpu_addr, size_t size, enum dma_data_direction direction) { - if (!vq->use_dma_api) + if (!vq->use_map_api) return (dma_addr_t)virt_to_phys(cpu_addr); - return virtqueue_dma_map_single_attrs(&vq->vq, cpu_addr, - size, direction, 0); + return virtqueue_map_single_attrs(&vq->vq, cpu_addr, + size, direction, 0); } static int vring_mapping_error(const struct vring_virtqueue *vq, dma_addr_t addr) { - if (!vq->use_dma_api) + if (!vq->use_map_api) return 0; return dma_mapping_error(vring_dma_dev(vq), addr); @@ -449,7 +449,7 @@ static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, flags = extra->flags; if (flags & VRING_DESC_F_INDIRECT) { - if (!vq->use_dma_api) + if (!vq->use_map_api) goto out; } else if (!vring_need_unmap_buffer(vq, extra)) goto out; @@ -782,7 +782,7 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, extra = (struct vring_desc_extra *)&indir_desc[num]; - if (vq->use_dma_api) { + if (vq->use_map_api) { for (j = 0; j < num; j++) vring_unmap_one_split(vq, &extra[j]); } @@ -1150,7 +1150,7 @@ static struct virtqueue *__vring_new_virtqueue_split(unsigned int index, vq->broken = false; #endif vq->dma_dev = dma_dev; - vq->use_dma_api = vring_use_dma_api(vdev); + vq->use_map_api = vring_use_map_api(vdev); vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && !context; @@ -1266,7 +1266,7 @@ static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, flags = extra->flags; if (flags & VRING_DESC_F_INDIRECT) { - if (!vq->use_dma_api) + if (!vq->use_map_api) return; } else if (!vring_need_unmap_buffer(vq, extra)) return; @@ -1351,7 +1351,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, desc[i].addr = cpu_to_le64(addr); desc[i].len = cpu_to_le32(len); - if (unlikely(vq->use_dma_api)) { + if (unlikely(vq->use_map_api)) { extra[i].addr = premapped ? DMA_MAPPING_ERROR : addr; extra[i].len = len; extra[i].flags = n < out_sgs ? 0 : VRING_DESC_F_WRITE; @@ -1373,7 +1373,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, sizeof(struct vring_packed_desc)); vq->packed.vring.desc[head].id = cpu_to_le16(id); - if (vq->use_dma_api) { + if (vq->use_map_api) { vq->packed.desc_extra[id].addr = addr; vq->packed.desc_extra[id].len = total_sg * sizeof(struct vring_packed_desc); @@ -1515,7 +1515,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, desc[i].len = cpu_to_le32(len); desc[i].id = cpu_to_le16(id); - if (unlikely(vq->use_dma_api)) { + if (unlikely(vq->use_map_api)) { vq->packed.desc_extra[curr].addr = premapped ? DMA_MAPPING_ERROR : addr; vq->packed.desc_extra[curr].len = len; @@ -1650,7 +1650,7 @@ static void detach_buf_packed(struct vring_virtqueue *vq, vq->free_head = id; vq->vq.num_free += state->num; - if (unlikely(vq->use_dma_api)) { + if (unlikely(vq->use_map_api)) { curr = id; for (i = 0; i < state->num; i++) { vring_unmap_extra_packed(vq, @@ -1668,7 +1668,7 @@ static void detach_buf_packed(struct vring_virtqueue *vq, if (!desc) return; - if (vq->use_dma_api) { + if (vq->use_map_api) { len = vq->packed.desc_extra[id].len; num = len / sizeof(struct vring_packed_desc); @@ -2121,7 +2121,7 @@ static struct virtqueue *__vring_new_virtqueue_packed(unsigned int index, #endif vq->packed_ring = true; vq->dma_dev = dma_dev; - vq->use_dma_api = vring_use_dma_api(vdev); + vq->use_map_api = vring_use_map_api(vdev); vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && !context; @@ -2433,7 +2433,7 @@ struct device *virtqueue_dma_dev(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); - if (vq->use_dma_api) + if (vq->use_map_api) return vring_dma_dev(vq); else return NULL; @@ -3122,7 +3122,7 @@ const struct vring *virtqueue_get_vring(const struct virtqueue *vq) EXPORT_SYMBOL_GPL(virtqueue_get_vring); /** - * virtqueue_dma_map_single_attrs - map DMA for _vq + * virtqueue_map_single_attrs - map DMA for _vq * @_vq: the struct virtqueue we're talking about. * @ptr: the pointer of the buffer to do dma * @size: the size of the buffer to do dma @@ -3132,16 +3132,16 @@ EXPORT_SYMBOL_GPL(virtqueue_get_vring); * The caller calls this to do dma mapping in advance. The DMA address can be * passed to this _vq when it is in pre-mapped mode. * - * return DMA address. Caller should check that by virtqueue_dma_mapping_error(). + * return DMA address. Caller should check that by virtqueue_mapping_error(). */ -dma_addr_t virtqueue_dma_map_single_attrs(const struct virtqueue *_vq, void *ptr, - size_t size, - enum dma_data_direction dir, - unsigned long attrs) +dma_addr_t virtqueue_map_single_attrs(const struct virtqueue *_vq, void *ptr, + size_t size, + enum dma_data_direction dir, + unsigned long attrs) { const struct vring_virtqueue *vq = to_vvq(_vq); - if (!vq->use_dma_api) { + if (!vq->use_map_api) { kmsan_handle_dma(virt_to_page(ptr), offset_in_page(ptr), size, dir); return (dma_addr_t)virt_to_phys(ptr); } @@ -3154,85 +3154,85 @@ dma_addr_t virtqueue_dma_map_single_attrs(const struct virtqueue *_vq, void *ptr return dma_map_page_attrs(vring_dma_dev(vq), virt_to_page(ptr), offset_in_page(ptr), size, dir, attrs); } -EXPORT_SYMBOL_GPL(virtqueue_dma_map_single_attrs); +EXPORT_SYMBOL_GPL(virtqueue_map_single_attrs); /** - * virtqueue_dma_unmap_single_attrs - unmap DMA for _vq + * virtqueue_unmap_single_attrs - unmap map for _vq * @_vq: the struct virtqueue we're talking about. * @addr: the dma address to unmap * @size: the size of the buffer * @dir: DMA direction * @attrs: DMA Attrs * - * Unmap the address that is mapped by the virtqueue_dma_map_* APIs. + * Unmap the address that is mapped by the virtqueue_map_* APIs. * */ -void virtqueue_dma_unmap_single_attrs(const struct virtqueue *_vq, - dma_addr_t addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs) +void virtqueue_unmap_single_attrs(const struct virtqueue *_vq, + dma_addr_t addr, + size_t size, enum dma_data_direction dir, + unsigned long attrs) { const struct vring_virtqueue *vq = to_vvq(_vq); - if (!vq->use_dma_api) + if (!vq->use_map_api) return; dma_unmap_page_attrs(vring_dma_dev(vq), addr, size, dir, attrs); } -EXPORT_SYMBOL_GPL(virtqueue_dma_unmap_single_attrs); +EXPORT_SYMBOL_GPL(virtqueue_unmap_single_attrs); /** - * virtqueue_dma_mapping_error - check dma address + * virtqueue_map_mapping_error - check dma address * @_vq: the struct virtqueue we're talking about. * @addr: DMA address * * Returns 0 means dma valid. Other means invalid dma address. */ -int virtqueue_dma_mapping_error(const struct virtqueue *_vq, dma_addr_t addr) +int virtqueue_map_mapping_error(const struct virtqueue *_vq, dma_addr_t addr) { const struct vring_virtqueue *vq = to_vvq(_vq); - if (!vq->use_dma_api) + if (!vq->use_map_api) return 0; return dma_mapping_error(vring_dma_dev(vq), addr); } -EXPORT_SYMBOL_GPL(virtqueue_dma_mapping_error); +EXPORT_SYMBOL_GPL(virtqueue_map_mapping_error); /** - * virtqueue_dma_need_sync - check a dma address needs sync + * virtqueue_map_need_sync - check a dma address needs sync * @_vq: the struct virtqueue we're talking about. * @addr: DMA address * - * Check if the dma address mapped by the virtqueue_dma_map_* APIs needs to be + * Check if the dma address mapped by the virtqueue_map_* APIs needs to be * synchronized * * return bool */ -bool virtqueue_dma_need_sync(const struct virtqueue *_vq, dma_addr_t addr) +bool virtqueue_map_need_sync(const struct virtqueue *_vq, dma_addr_t addr) { const struct vring_virtqueue *vq = to_vvq(_vq); - if (!vq->use_dma_api) + if (!vq->use_map_api) return false; return dma_need_sync(vring_dma_dev(vq), addr); } -EXPORT_SYMBOL_GPL(virtqueue_dma_need_sync); +EXPORT_SYMBOL_GPL(virtqueue_map_need_sync); /** - * virtqueue_dma_sync_single_range_for_cpu - dma sync for cpu + * virtqueue_map_sync_single_range_for_cpu - map sync for cpu * @_vq: the struct virtqueue we're talking about. * @addr: DMA address * @offset: DMA address offset * @size: buf size for sync * @dir: DMA direction * - * Before calling this function, use virtqueue_dma_need_sync() to confirm that + * Before calling this function, use virtqueue_map_need_sync() to confirm that * the DMA address really needs to be synchronized * */ -void virtqueue_dma_sync_single_range_for_cpu(const struct virtqueue *_vq, +void virtqueue_map_sync_single_range_for_cpu(const struct virtqueue *_vq, dma_addr_t addr, unsigned long offset, size_t size, enum dma_data_direction dir) @@ -3240,25 +3240,25 @@ void virtqueue_dma_sync_single_range_for_cpu(const struct virtqueue *_vq, const struct vring_virtqueue *vq = to_vvq(_vq); struct device *dev = vring_dma_dev(vq); - if (!vq->use_dma_api) + if (!vq->use_map_api) return; dma_sync_single_range_for_cpu(dev, addr, offset, size, dir); } -EXPORT_SYMBOL_GPL(virtqueue_dma_sync_single_range_for_cpu); +EXPORT_SYMBOL_GPL(virtqueue_map_sync_single_range_for_cpu); /** - * virtqueue_dma_sync_single_range_for_device - dma sync for device + * virtqueue_map_sync_single_range_for_device - map sync for device * @_vq: the struct virtqueue we're talking about. * @addr: DMA address * @offset: DMA address offset * @size: buf size for sync * @dir: DMA direction * - * Before calling this function, use virtqueue_dma_need_sync() to confirm that + * Before calling this function, use virtqueue_map_need_sync() to confirm that * the DMA address really needs to be synchronized */ -void virtqueue_dma_sync_single_range_for_device(const struct virtqueue *_vq, +void virtqueue_map_sync_single_range_for_device(const struct virtqueue *_vq, dma_addr_t addr, unsigned long offset, size_t size, enum dma_data_direction dir) @@ -3266,12 +3266,12 @@ void virtqueue_dma_sync_single_range_for_device(const struct virtqueue *_vq, const struct vring_virtqueue *vq = to_vvq(_vq); struct device *dev = vring_dma_dev(vq); - if (!vq->use_dma_api) + if (!vq->use_map_api) return; dma_sync_single_range_for_device(dev, addr, offset, size, dir); } -EXPORT_SYMBOL_GPL(virtqueue_dma_sync_single_range_for_device); +EXPORT_SYMBOL_GPL(virtqueue_map_sync_single_range_for_device); MODULE_DESCRIPTION("Virtio ring implementation"); MODULE_LICENSE("GPL"); diff --git a/include/linux/virtio.h b/include/linux/virtio.h index eab71a440fba..576e08bd7697 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -262,18 +262,18 @@ void unregister_virtio_driver(struct virtio_driver *drv); module_driver(__virtio_driver, register_virtio_driver, \ unregister_virtio_driver) -dma_addr_t virtqueue_dma_map_single_attrs(const struct virtqueue *_vq, void *ptr, size_t size, +dma_addr_t virtqueue_map_single_attrs(const struct virtqueue *_vq, void *ptr, size_t size, enum dma_data_direction dir, unsigned long attrs); -void virtqueue_dma_unmap_single_attrs(const struct virtqueue *_vq, dma_addr_t addr, +void virtqueue_unmap_single_attrs(const struct virtqueue *_vq, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs); -int virtqueue_dma_mapping_error(const struct virtqueue *_vq, dma_addr_t addr); +int virtqueue_map_mapping_error(const struct virtqueue *_vq, dma_addr_t addr); -bool virtqueue_dma_need_sync(const struct virtqueue *_vq, dma_addr_t addr); -void virtqueue_dma_sync_single_range_for_cpu(const struct virtqueue *_vq, dma_addr_t addr, +bool virtqueue_map_need_sync(const struct virtqueue *_vq, dma_addr_t addr); +void virtqueue_map_sync_single_range_for_cpu(const struct virtqueue *_vq, dma_addr_t addr, unsigned long offset, size_t size, enum dma_data_direction dir); -void virtqueue_dma_sync_single_range_for_device(const struct virtqueue *_vq, dma_addr_t addr, +void virtqueue_map_sync_single_range_for_device(const struct virtqueue *_vq, dma_addr_t addr, unsigned long offset, size_t size, enum dma_data_direction dir); -- cgit v1.2.3 From b16060c5c7d56455da3c3c50b4a20a83c2a30810 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 21 Aug 2025 14:46:36 +0800 Subject: virtio: introduce virtio_map container union MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Following patch will introduce the mapping operations for virtio device. In order to achieve this, besides the dma device, virtio core needs to support a transport or device specific mapping metadata as well. So this patch introduces a union container of a dma device. The idea is the allow the transport layer to pass device specific mapping metadata which will be used as a parameter for the virtio mapping operations. For the transport or device that is using DMA, dma device is still being used. Signed-off-by: Jason Wang Message-Id: <20250821064641.5025-5-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin Tested-by: Lei Yang Reviewed-by: Eugenio Pérez --- drivers/virtio/virtio_ring.c | 104 ++++++++++++++++++++++--------------------- drivers/virtio/virtio_vdpa.c | 6 ++- include/linux/virtio.h | 5 +++ include/linux/virtio_ring.h | 7 +-- 4 files changed, 66 insertions(+), 56 deletions(-) (limited to 'include/linux') diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 482a268af851..cda9bc2121bf 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -210,8 +210,7 @@ struct vring_virtqueue { /* DMA, allocation, and size information */ bool we_own_ring; - /* Device used for doing DMA */ - struct device *dma_dev; + union virtio_map map; #ifdef DEBUG /* They're supposed to lock for us. */ @@ -307,10 +306,10 @@ EXPORT_SYMBOL_GPL(virtio_max_dma_size); static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, dma_addr_t *dma_handle, gfp_t flag, - struct device *dma_dev) + union virtio_map map) { if (vring_use_map_api(vdev)) { - return dma_alloc_coherent(dma_dev, size, + return dma_alloc_coherent(map.dma_dev, size, dma_handle, flag); } else { void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag); @@ -341,10 +340,10 @@ static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, static void vring_free_queue(struct virtio_device *vdev, size_t size, void *queue, dma_addr_t dma_handle, - struct device *dma_dev) + union virtio_map map) { if (vring_use_map_api(vdev)) - dma_free_coherent(dma_dev, size, queue, dma_handle); + dma_free_coherent(map.dma_dev, size, queue, dma_handle); else free_pages_exact(queue, PAGE_ALIGN(size)); } @@ -356,7 +355,7 @@ static void vring_free_queue(struct virtio_device *vdev, size_t size, */ static struct device *vring_dma_dev(const struct vring_virtqueue *vq) { - return vq->dma_dev; + return vq->map.dma_dev; } /* Map one sg entry. */ @@ -1056,12 +1055,13 @@ err_state: } static void vring_free_split(struct vring_virtqueue_split *vring_split, - struct virtio_device *vdev, struct device *dma_dev) + struct virtio_device *vdev, + union virtio_map map) { vring_free_queue(vdev, vring_split->queue_size_in_bytes, vring_split->vring.desc, vring_split->queue_dma_addr, - dma_dev); + map); kfree(vring_split->desc_state); kfree(vring_split->desc_extra); @@ -1072,7 +1072,7 @@ static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split, u32 num, unsigned int vring_align, bool may_reduce_num, - struct device *dma_dev) + union virtio_map map) { void *queue = NULL; dma_addr_t dma_addr; @@ -1088,7 +1088,7 @@ static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split, queue = vring_alloc_queue(vdev, vring_size(num, vring_align), &dma_addr, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, - dma_dev); + map); if (queue) break; if (!may_reduce_num) @@ -1102,7 +1102,7 @@ static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split, /* Try to get a single page. You are my only hope! */ queue = vring_alloc_queue(vdev, vring_size(num, vring_align), &dma_addr, GFP_KERNEL | __GFP_ZERO, - dma_dev); + map); } if (!queue) return -ENOMEM; @@ -1126,7 +1126,7 @@ static struct virtqueue *__vring_new_virtqueue_split(unsigned int index, bool (*notify)(struct virtqueue *), void (*callback)(struct virtqueue *), const char *name, - struct device *dma_dev) + union virtio_map map) { struct vring_virtqueue *vq; int err; @@ -1149,7 +1149,7 @@ static struct virtqueue *__vring_new_virtqueue_split(unsigned int index, #else vq->broken = false; #endif - vq->dma_dev = dma_dev; + vq->map = map; vq->use_map_api = vring_use_map_api(vdev); vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && @@ -1187,21 +1187,21 @@ static struct virtqueue *vring_create_virtqueue_split( bool (*notify)(struct virtqueue *), void (*callback)(struct virtqueue *), const char *name, - struct device *dma_dev) + union virtio_map map) { struct vring_virtqueue_split vring_split = {}; struct virtqueue *vq; int err; err = vring_alloc_queue_split(&vring_split, vdev, num, vring_align, - may_reduce_num, dma_dev); + may_reduce_num, map); if (err) return NULL; vq = __vring_new_virtqueue_split(index, &vring_split, vdev, weak_barriers, - context, notify, callback, name, dma_dev); + context, notify, callback, name, map); if (!vq) { - vring_free_split(&vring_split, vdev, dma_dev); + vring_free_split(&vring_split, vdev, map); return NULL; } @@ -1220,7 +1220,7 @@ static int virtqueue_resize_split(struct virtqueue *_vq, u32 num) err = vring_alloc_queue_split(&vring_split, vdev, num, vq->split.vring_align, vq->split.may_reduce_num, - vring_dma_dev(vq)); + vq->map); if (err) goto err; @@ -1238,7 +1238,7 @@ static int virtqueue_resize_split(struct virtqueue *_vq, u32 num) return 0; err_state_extra: - vring_free_split(&vring_split, vdev, vring_dma_dev(vq)); + vring_free_split(&vring_split, vdev, vq->map); err: virtqueue_reinit_split(vq); return -ENOMEM; @@ -1947,25 +1947,25 @@ static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num) static void vring_free_packed(struct vring_virtqueue_packed *vring_packed, struct virtio_device *vdev, - struct device *dma_dev) + union virtio_map map) { if (vring_packed->vring.desc) vring_free_queue(vdev, vring_packed->ring_size_in_bytes, vring_packed->vring.desc, vring_packed->ring_dma_addr, - dma_dev); + map); if (vring_packed->vring.driver) vring_free_queue(vdev, vring_packed->event_size_in_bytes, vring_packed->vring.driver, vring_packed->driver_event_dma_addr, - dma_dev); + map); if (vring_packed->vring.device) vring_free_queue(vdev, vring_packed->event_size_in_bytes, vring_packed->vring.device, vring_packed->device_event_dma_addr, - dma_dev); + map); kfree(vring_packed->desc_state); kfree(vring_packed->desc_extra); @@ -1973,7 +1973,7 @@ static void vring_free_packed(struct vring_virtqueue_packed *vring_packed, static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed, struct virtio_device *vdev, - u32 num, struct device *dma_dev) + u32 num, union virtio_map map) { struct vring_packed_desc *ring; struct vring_packed_desc_event *driver, *device; @@ -1985,7 +1985,7 @@ static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed, ring = vring_alloc_queue(vdev, ring_size_in_bytes, &ring_dma_addr, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, - dma_dev); + map); if (!ring) goto err; @@ -1998,7 +1998,7 @@ static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed, driver = vring_alloc_queue(vdev, event_size_in_bytes, &driver_event_dma_addr, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, - dma_dev); + map); if (!driver) goto err; @@ -2009,7 +2009,7 @@ static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed, device = vring_alloc_queue(vdev, event_size_in_bytes, &device_event_dma_addr, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, - dma_dev); + map); if (!device) goto err; @@ -2021,7 +2021,7 @@ static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed, return 0; err: - vring_free_packed(vring_packed, vdev, dma_dev); + vring_free_packed(vring_packed, vdev, map); return -ENOMEM; } @@ -2097,7 +2097,7 @@ static struct virtqueue *__vring_new_virtqueue_packed(unsigned int index, bool (*notify)(struct virtqueue *), void (*callback)(struct virtqueue *), const char *name, - struct device *dma_dev) + union virtio_map map) { struct vring_virtqueue *vq; int err; @@ -2120,7 +2120,7 @@ static struct virtqueue *__vring_new_virtqueue_packed(unsigned int index, vq->broken = false; #endif vq->packed_ring = true; - vq->dma_dev = dma_dev; + vq->map = map; vq->use_map_api = vring_use_map_api(vdev); vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && @@ -2158,18 +2158,18 @@ static struct virtqueue *vring_create_virtqueue_packed( bool (*notify)(struct virtqueue *), void (*callback)(struct virtqueue *), const char *name, - struct device *dma_dev) + union virtio_map map) { struct vring_virtqueue_packed vring_packed = {}; struct virtqueue *vq; - if (vring_alloc_queue_packed(&vring_packed, vdev, num, dma_dev)) + if (vring_alloc_queue_packed(&vring_packed, vdev, num, map)) return NULL; vq = __vring_new_virtqueue_packed(index, &vring_packed, vdev, weak_barriers, - context, notify, callback, name, dma_dev); + context, notify, callback, name, map); if (!vq) { - vring_free_packed(&vring_packed, vdev, dma_dev); + vring_free_packed(&vring_packed, vdev, map); return NULL; } @@ -2185,7 +2185,7 @@ static int virtqueue_resize_packed(struct virtqueue *_vq, u32 num) struct virtio_device *vdev = _vq->vdev; int err; - if (vring_alloc_queue_packed(&vring_packed, vdev, num, vring_dma_dev(vq))) + if (vring_alloc_queue_packed(&vring_packed, vdev, num, vq->map)) goto err_ring; err = vring_alloc_state_extra_packed(&vring_packed); @@ -2202,7 +2202,7 @@ static int virtqueue_resize_packed(struct virtqueue *_vq, u32 num) return 0; err_state_extra: - vring_free_packed(&vring_packed, vdev, vring_dma_dev(vq)); + vring_free_packed(&vring_packed, vdev, vq->map); err_ring: virtqueue_reinit_packed(vq); return -ENOMEM; @@ -2434,7 +2434,7 @@ struct device *virtqueue_dma_dev(struct virtqueue *_vq) struct vring_virtqueue *vq = to_vvq(_vq); if (vq->use_map_api) - return vring_dma_dev(vq); + return vq->map.dma_dev; else return NULL; } @@ -2719,19 +2719,20 @@ struct virtqueue *vring_create_virtqueue( void (*callback)(struct virtqueue *), const char *name) { + union virtio_map map = {.dma_dev = vdev->dev.parent}; if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) return vring_create_virtqueue_packed(index, num, vring_align, vdev, weak_barriers, may_reduce_num, - context, notify, callback, name, vdev->dev.parent); + context, notify, callback, name, map); return vring_create_virtqueue_split(index, num, vring_align, vdev, weak_barriers, may_reduce_num, - context, notify, callback, name, vdev->dev.parent); + context, notify, callback, name, map); } EXPORT_SYMBOL_GPL(vring_create_virtqueue); -struct virtqueue *vring_create_virtqueue_dma( +struct virtqueue *vring_create_virtqueue_map( unsigned int index, unsigned int num, unsigned int vring_align, @@ -2742,19 +2743,19 @@ struct virtqueue *vring_create_virtqueue_dma( bool (*notify)(struct virtqueue *), void (*callback)(struct virtqueue *), const char *name, - struct device *dma_dev) + union virtio_map map) { if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) return vring_create_virtqueue_packed(index, num, vring_align, vdev, weak_barriers, may_reduce_num, - context, notify, callback, name, dma_dev); + context, notify, callback, name, map); return vring_create_virtqueue_split(index, num, vring_align, vdev, weak_barriers, may_reduce_num, - context, notify, callback, name, dma_dev); + context, notify, callback, name, map); } -EXPORT_SYMBOL_GPL(vring_create_virtqueue_dma); +EXPORT_SYMBOL_GPL(vring_create_virtqueue_map); /** * virtqueue_resize - resize the vring of vq @@ -2865,6 +2866,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int index, const char *name) { struct vring_virtqueue_split vring_split = {}; + union virtio_map map = {.dma_dev = vdev->dev.parent}; if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) { struct vring_virtqueue_packed vring_packed = {}; @@ -2874,13 +2876,13 @@ struct virtqueue *vring_new_virtqueue(unsigned int index, return __vring_new_virtqueue_packed(index, &vring_packed, vdev, weak_barriers, context, notify, callback, - name, vdev->dev.parent); + name, map); } vring_init(&vring_split.vring, num, pages, vring_align); return __vring_new_virtqueue_split(index, &vring_split, vdev, weak_barriers, context, notify, callback, name, - vdev->dev.parent); + map); } EXPORT_SYMBOL_GPL(vring_new_virtqueue); @@ -2894,19 +2896,19 @@ static void vring_free(struct virtqueue *_vq) vq->packed.ring_size_in_bytes, vq->packed.vring.desc, vq->packed.ring_dma_addr, - vring_dma_dev(vq)); + vq->map); vring_free_queue(vq->vq.vdev, vq->packed.event_size_in_bytes, vq->packed.vring.driver, vq->packed.driver_event_dma_addr, - vring_dma_dev(vq)); + vq->map); vring_free_queue(vq->vq.vdev, vq->packed.event_size_in_bytes, vq->packed.vring.device, vq->packed.device_event_dma_addr, - vring_dma_dev(vq)); + vq->map); kfree(vq->packed.desc_state); kfree(vq->packed.desc_extra); @@ -2915,7 +2917,7 @@ static void vring_free(struct virtqueue *_vq) vq->split.queue_size_in_bytes, vq->split.vring.desc, vq->split.queue_dma_addr, - vring_dma_dev(vq)); + vq->map); } } if (!vq->packed_ring) { diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c index 657b07a60788..dc557aa7c825 100644 --- a/drivers/virtio/virtio_vdpa.c +++ b/drivers/virtio/virtio_vdpa.c @@ -139,6 +139,7 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index, struct vdpa_callback cb; struct virtqueue *vq; u64 desc_addr, driver_addr, device_addr; + union virtio_map map = {0}; /* Assume split virtqueue, switch to packed if necessary */ struct vdpa_vq_state state = {0}; u32 align, max_num, min_num = 1; @@ -185,9 +186,10 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index, dma_dev = ops->get_vq_dma_dev(vdpa, index); else dma_dev = vdpa_get_dma_dev(vdpa); - vq = vring_create_virtqueue_dma(index, max_num, align, vdev, + map.dma_dev = dma_dev; + vq = vring_create_virtqueue_map(index, max_num, align, vdev, true, may_reduce_num, ctx, - notify, callback, name, dma_dev); + notify, callback, name, map); if (!vq) { err = -ENOMEM; goto error_new_virtqueue; diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 576e08bd7697..b4ba1a99e5ab 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -41,6 +41,11 @@ struct virtqueue { void *priv; }; +union virtio_map { + /* Device that performs DMA */ + struct device *dma_dev; +}; + int virtqueue_add_outbuf(struct virtqueue *vq, struct scatterlist sg[], unsigned int num, void *data, diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index 9b33df741b63..c97a12c1cda3 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -3,6 +3,7 @@ #define _LINUX_VIRTIO_RING_H #include +#include #include #include @@ -79,9 +80,9 @@ struct virtqueue *vring_create_virtqueue(unsigned int index, /* * Creates a virtqueue and allocates the descriptor ring with per - * virtqueue DMA device. + * virtqueue mapping operations. */ -struct virtqueue *vring_create_virtqueue_dma(unsigned int index, +struct virtqueue *vring_create_virtqueue_map(unsigned int index, unsigned int num, unsigned int vring_align, struct virtio_device *vdev, @@ -91,7 +92,7 @@ struct virtqueue *vring_create_virtqueue_dma(unsigned int index, bool (*notify)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq), const char *name, - struct device *dma_dev); + union virtio_map map); /* * Creates a virtqueue with a standard layout but a caller-allocated -- cgit v1.2.3 From bee8c7c24b737338216dc0f87d6c47a4abaf609a Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 21 Aug 2025 14:46:38 +0800 Subject: virtio: introduce map ops in virtio core MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch introduces map operations for virtio device. Virtio used to use DMA API which is not necessarily the case since some devices doesn't do DMA. Instead of using tricks and abusing DMA API, let's simply abstract the current mapping logic into a virtio specific mapping operations. For the device or transport that doesn't do DMA, they can implement their own mapping logic without the need to trick DMA core. In this case the mapping metadata is opaque to the virtio core that will be passed back to the transport or device specific map operations. For other devices, DMA API will still be used, so map token will still be the dma device to minimize the changeset and performance impact. The mapping operations are abstracted as a independent structure instead of reusing virtio_config_ops. This allows the transport can simply reuse the structure for lower layers like vDPA. A set of new mapping helpers were introduced for the device that want to do mapping by themselves. Signed-off-by: Jason Wang Message-Id: <20250821064641.5025-7-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin Tested-by: Lei Yang Reviewed-by: Eugenio Pérez --- drivers/virtio/virtio_ring.c | 211 +++++++++++++++++++++++++++++++++--------- drivers/virtio/virtio_vdpa.c | 3 + include/linux/virtio.h | 25 +++++ include/linux/virtio_config.h | 72 ++++++++++++++ 4 files changed, 269 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 46515b017ccb..f91a432b3e53 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -297,8 +297,14 @@ size_t virtio_max_dma_size(const struct virtio_device *vdev) { size_t max_segment_size = SIZE_MAX; - if (vring_use_map_api(vdev)) - max_segment_size = dma_max_mapping_size(vdev->dev.parent); + if (vring_use_map_api(vdev)) { + if (vdev->map) { + max_segment_size = + vdev->map->max_mapping_size(vdev->vmap); + } else + max_segment_size = + dma_max_mapping_size(vdev->dev.parent); + } return max_segment_size; } @@ -309,8 +315,8 @@ static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, union virtio_map map) { if (vring_use_map_api(vdev)) { - return dma_alloc_coherent(map.dma_dev, size, - map_handle, flag); + return virtqueue_map_alloc_coherent(vdev, map, size, + map_handle, flag); } else { void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag); @@ -343,7 +349,8 @@ static void vring_free_queue(struct virtio_device *vdev, size_t size, union virtio_map map) { if (vring_use_map_api(vdev)) - dma_free_coherent(map.dma_dev, size, queue, map_handle); + virtqueue_map_free_coherent(vdev, map, size, + queue, map_handle); else free_pages_exact(queue, PAGE_ALIGN(size)); } @@ -358,6 +365,20 @@ static struct device *vring_dma_dev(const struct vring_virtqueue *vq) return vq->map.dma_dev; } +static int vring_mapping_error(const struct vring_virtqueue *vq, + dma_addr_t addr) +{ + struct virtio_device *vdev = vq->vq.vdev; + + if (!vq->use_map_api) + return 0; + + if (vdev->map) + return vdev->map->mapping_error(vq->map, addr); + else + return dma_mapping_error(vring_dma_dev(vq), addr); +} + /* Map one sg entry. */ static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg, enum dma_data_direction direction, dma_addr_t *addr, @@ -387,11 +408,11 @@ static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist * the way it expects (we don't guarantee that the scatterlist * will exist for the lifetime of the mapping). */ - *addr = dma_map_page(vring_dma_dev(vq), - sg_page(sg), sg->offset, sg->length, - direction); + *addr = virtqueue_map_page_attrs(&vq->vq, sg_page(sg), + sg->offset, sg->length, + direction, 0); - if (dma_mapping_error(vring_dma_dev(vq), *addr)) + if (vring_mapping_error(vq, *addr)) return -ENOMEM; return 0; @@ -408,15 +429,6 @@ static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, size, direction, 0); } -static int vring_mapping_error(const struct vring_virtqueue *vq, - dma_addr_t addr) -{ - if (!vq->use_map_api) - return 0; - - return dma_mapping_error(vring_dma_dev(vq), addr); -} - static void virtqueue_init(struct vring_virtqueue *vq, u32 num) { vq->vq.num_free = num; @@ -453,11 +465,12 @@ static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, } else if (!vring_need_unmap_buffer(vq, extra)) goto out; - dma_unmap_page(vring_dma_dev(vq), - extra->addr, - extra->len, - (flags & VRING_DESC_F_WRITE) ? - DMA_FROM_DEVICE : DMA_TO_DEVICE); + virtqueue_unmap_page_attrs(&vq->vq, + extra->addr, + extra->len, + (flags & VRING_DESC_F_WRITE) ? + DMA_FROM_DEVICE : DMA_TO_DEVICE, + 0); out: return extra->next; @@ -1271,10 +1284,11 @@ static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, } else if (!vring_need_unmap_buffer(vq, extra)) return; - dma_unmap_page(vring_dma_dev(vq), - extra->addr, extra->len, - (flags & VRING_DESC_F_WRITE) ? - DMA_FROM_DEVICE : DMA_TO_DEVICE); + virtqueue_unmap_page_attrs(&vq->vq, + extra->addr, extra->len, + (flags & VRING_DESC_F_WRITE) ? + DMA_FROM_DEVICE : DMA_TO_DEVICE, + 0); } static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg, @@ -2433,7 +2447,7 @@ struct device *virtqueue_dma_dev(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); - if (vq->use_map_api) + if (vq->use_map_api && !_vq->vdev->map) return vq->map.dma_dev; else return NULL; @@ -3123,6 +3137,107 @@ const struct vring *virtqueue_get_vring(const struct virtqueue *vq) } EXPORT_SYMBOL_GPL(virtqueue_get_vring); +/** + * virtqueue_map_alloc_coherent - alloc coherent mapping + * @vdev: the virtio device we are talking to + * @map: metadata for performing mapping + * @size: the size of the buffer + * @map_handle: the pointer to the mapped address + * @gfp: allocation flag (GFP_XXX) + * + * return virtual address or NULL on error + */ +void *virtqueue_map_alloc_coherent(struct virtio_device *vdev, + union virtio_map map, + size_t size, dma_addr_t *map_handle, + gfp_t gfp) +{ + if (vdev->map) + return vdev->map->alloc(map, size, + map_handle, gfp); + else + return dma_alloc_coherent(map.dma_dev, size, + map_handle, gfp); +} +EXPORT_SYMBOL_GPL(virtqueue_map_alloc_coherent); + +/** + * virtqueue_map_free_coherent - free coherent mapping + * @vdev: the virtio device we are talking to + * @map: metadata for performing mapping + * @size: the size of the buffer + * @map_handle: the mapped address that needs to be freed + * + */ +void virtqueue_map_free_coherent(struct virtio_device *vdev, + union virtio_map map, size_t size, void *vaddr, + dma_addr_t map_handle) +{ + if (vdev->map) + vdev->map->free(map, size, vaddr, + map_handle, 0); + else + dma_free_coherent(map.dma_dev, size, vaddr, map_handle); +} +EXPORT_SYMBOL_GPL(virtqueue_map_free_coherent); + +/** + * virtqueue_map_page_attrs - map a page to the device + * @_vq: the virtqueue we are talking to + * @page: the page that will be mapped by the device + * @offset: the offset in the page for a buffer + * @size: the buffer size + * @dir: mapping direction + * @attrs: mapping attributes + * + * Returns mapped address. Caller should check that by virtqueue_mapping_error(). + */ +dma_addr_t virtqueue_map_page_attrs(const struct virtqueue *_vq, + struct page *page, + unsigned long offset, + size_t size, + enum dma_data_direction dir, + unsigned long attrs) +{ + const struct vring_virtqueue *vq = to_vvq(_vq); + struct virtio_device *vdev = _vq->vdev; + + if (vdev->map) + return vdev->map->map_page(vq->map, + page, offset, size, + dir, attrs); + + return dma_map_page_attrs(vring_dma_dev(vq), + page, offset, size, + dir, attrs); +} +EXPORT_SYMBOL_GPL(virtqueue_map_page_attrs); + +/** + * virtqueue_unmap_page_attrs - map a page to the device + * @_vq: the virtqueue we are talking to + * @map_handle: the mapped address + * @size: the buffer size + * @dir: mapping direction + * @attrs: unmapping attributes + */ +void virtqueue_unmap_page_attrs(const struct virtqueue *_vq, + dma_addr_t map_handle, + size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + const struct vring_virtqueue *vq = to_vvq(_vq); + struct virtio_device *vdev = _vq->vdev; + + if (vdev->map) + vdev->map->unmap_page(vq->map, + map_handle, size, dir, attrs); + else + dma_unmap_page_attrs(vring_dma_dev(vq), map_handle, + size, dir, attrs); +} +EXPORT_SYMBOL_GPL(virtqueue_unmap_page_attrs); + /** * virtqueue_map_single_attrs - map DMA for _vq * @_vq: the struct virtqueue we're talking about. @@ -3134,7 +3249,7 @@ EXPORT_SYMBOL_GPL(virtqueue_get_vring); * The caller calls this to do dma mapping in advance. The DMA address can be * passed to this _vq when it is in pre-mapped mode. * - * return DMA address. Caller should check that by virtqueue_mapping_error(). + * return mapped address. Caller should check that by virtqueue_mapping_error(). */ dma_addr_t virtqueue_map_single_attrs(const struct virtqueue *_vq, void *ptr, size_t size, @@ -3153,8 +3268,8 @@ dma_addr_t virtqueue_map_single_attrs(const struct virtqueue *_vq, void *ptr, "rejecting DMA map of vmalloc memory\n")) return DMA_MAPPING_ERROR; - return dma_map_page_attrs(vring_dma_dev(vq), virt_to_page(ptr), - offset_in_page(ptr), size, dir, attrs); + return virtqueue_map_page_attrs(&vq->vq, virt_to_page(ptr), + offset_in_page(ptr), size, dir, attrs); } EXPORT_SYMBOL_GPL(virtqueue_map_single_attrs); @@ -3179,12 +3294,12 @@ void virtqueue_unmap_single_attrs(const struct virtqueue *_vq, if (!vq->use_map_api) return; - dma_unmap_page_attrs(vring_dma_dev(vq), addr, size, dir, attrs); + virtqueue_unmap_page_attrs(_vq, addr, size, dir, attrs); } EXPORT_SYMBOL_GPL(virtqueue_unmap_single_attrs); /** - * virtqueue_map_mapping_error - check dma address + * virtqueue_mapping_error - check dma address * @_vq: the struct virtqueue we're talking about. * @addr: DMA address * @@ -3194,10 +3309,7 @@ int virtqueue_map_mapping_error(const struct virtqueue *_vq, dma_addr_t addr) { const struct vring_virtqueue *vq = to_vvq(_vq); - if (!vq->use_map_api) - return 0; - - return dma_mapping_error(vring_dma_dev(vq), addr); + return vring_mapping_error(vq, addr); } EXPORT_SYMBOL_GPL(virtqueue_map_mapping_error); @@ -3214,11 +3326,15 @@ EXPORT_SYMBOL_GPL(virtqueue_map_mapping_error); bool virtqueue_map_need_sync(const struct virtqueue *_vq, dma_addr_t addr) { const struct vring_virtqueue *vq = to_vvq(_vq); + struct virtio_device *vdev = _vq->vdev; if (!vq->use_map_api) return false; - return dma_need_sync(vring_dma_dev(vq), addr); + if (vdev->map) + return vdev->map->need_sync(vq->map, addr); + else + return dma_need_sync(vring_dma_dev(vq), addr); } EXPORT_SYMBOL_GPL(virtqueue_map_need_sync); @@ -3240,12 +3356,17 @@ void virtqueue_map_sync_single_range_for_cpu(const struct virtqueue *_vq, enum dma_data_direction dir) { const struct vring_virtqueue *vq = to_vvq(_vq); - struct device *dev = vring_dma_dev(vq); + struct virtio_device *vdev = _vq->vdev; if (!vq->use_map_api) return; - dma_sync_single_range_for_cpu(dev, addr, offset, size, dir); + if (vdev->map) + vdev->map->sync_single_for_cpu(vq->map, + addr + offset, size, dir); + else + dma_sync_single_range_for_cpu(vring_dma_dev(vq), + addr, offset, size, dir); } EXPORT_SYMBOL_GPL(virtqueue_map_sync_single_range_for_cpu); @@ -3266,12 +3387,18 @@ void virtqueue_map_sync_single_range_for_device(const struct virtqueue *_vq, enum dma_data_direction dir) { const struct vring_virtqueue *vq = to_vvq(_vq); - struct device *dev = vring_dma_dev(vq); + struct virtio_device *vdev = _vq->vdev; if (!vq->use_map_api) return; - dma_sync_single_range_for_device(dev, addr, offset, size, dir); + if (vdev->map) + vdev->map->sync_single_for_device(vq->map, + addr + offset, + size, dir); + else + dma_sync_single_range_for_device(vring_dma_dev(vq), addr, + offset, size, dir); } EXPORT_SYMBOL_GPL(virtqueue_map_sync_single_range_for_device); diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c index dc557aa7c825..d4be689e3626 100644 --- a/drivers/virtio/virtio_vdpa.c +++ b/drivers/virtio/virtio_vdpa.c @@ -195,6 +195,9 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index, goto error_new_virtqueue; } + if (index == 0) + vdev->vmap = map; + vq->num_max = max_num; /* Setup virtqueue callback */ diff --git a/include/linux/virtio.h b/include/linux/virtio.h index b4ba1a99e5ab..3386a4a8d06b 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -166,9 +166,11 @@ struct virtio_device { struct virtio_device_id id; const struct virtio_config_ops *config; const struct vringh_config_ops *vringh_config; + const struct virtio_map_ops *map; struct list_head vqs; VIRTIO_DECLARE_FEATURES(features); void *priv; + union virtio_map vmap; #ifdef CONFIG_VIRTIO_DEBUG struct dentry *debugfs_dir; u64 debugfs_filter_features[VIRTIO_FEATURES_DWORDS]; @@ -267,6 +269,29 @@ void unregister_virtio_driver(struct virtio_driver *drv); module_driver(__virtio_driver, register_virtio_driver, \ unregister_virtio_driver) + +void *virtqueue_map_alloc_coherent(struct virtio_device *vdev, + union virtio_map mapping_token, + size_t size, dma_addr_t *dma_handle, + gfp_t gfp); + +void virtqueue_map_free_coherent(struct virtio_device *vdev, + union virtio_map mapping_token, + size_t size, void *vaddr, + dma_addr_t dma_handle); + +dma_addr_t virtqueue_map_page_attrs(const struct virtqueue *_vq, + struct page *page, + unsigned long offset, + size_t size, + enum dma_data_direction dir, + unsigned long attrs); + +void virtqueue_unmap_page_attrs(const struct virtqueue *_vq, + dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir, + unsigned long attrs); + dma_addr_t virtqueue_map_single_attrs(const struct virtqueue *_vq, void *ptr, size_t size, enum dma_data_direction dir, unsigned long attrs); void virtqueue_unmap_single_attrs(const struct virtqueue *_vq, dma_addr_t addr, diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 7427b79d6f3d..16001e9f9b39 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -139,6 +139,78 @@ struct virtio_config_ops { int (*enable_vq_after_reset)(struct virtqueue *vq); }; +/** + * struct virtio_map_ops - operations for mapping buffer for a virtio device + * Note: For transport that has its own mapping logic it must + * implements all of the operations + * @map_page: map a buffer to the device + * map: metadata for performing mapping + * page: the page that will be mapped by the device + * offset: the offset in the page for a buffer + * size: the buffer size + * dir: mapping direction + * attrs: mapping attributes + * Returns: the mapped address + * @unmap_page: unmap a buffer from the device + * map: device specific mapping map + * map_handle: the mapped address + * size: the buffer size + * dir: mapping direction + * attrs: unmapping attributes + * @sync_single_for_cpu: sync a single buffer from device to cpu + * map: metadata for performing mapping + * map_handle: the mapping address to sync + * size: the size of the buffer + * dir: synchronization direction + * @sync_single_for_device: sync a single buffer from cpu to device + * map: metadata for performing mapping + * map_handle: the mapping address to sync + * size: the size of the buffer + * dir: synchronization direction + * @alloc: alloc a coherent buffer mapping + * map: metadata for performing mapping + * size: the size of the buffer + * map_handle: the mapping address to sync + * gfp: allocation flag (GFP_XXX) + * Returns: virtual address of the allocated buffer + * @free: free a coherent buffer mapping + * map: metadata for performing mapping + * size: the size of the buffer + * vaddr: virtual address of the buffer + * map_handle: the mapping address to sync + * attrs: unmapping attributes + * @need_sync: if the buffer needs synchronization + * map: metadata for performing mapping + * map_handle: the mapped address + * Returns: whether the buffer needs synchronization + * @mapping_error: if the mapping address is error + * map: metadata for performing mapping + * map_handle: the mapped address + * @max_mapping_size: get the maximum buffer size that can be mapped + * map: metadata for performing mapping + * Returns: the maximum buffer size that can be mapped + */ +struct virtio_map_ops { + dma_addr_t (*map_page)(union virtio_map map, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, unsigned long attrs); + void (*unmap_page)(union virtio_map map, dma_addr_t map_handle, + size_t size, enum dma_data_direction dir, + unsigned long attrs); + void (*sync_single_for_cpu)(union virtio_map map, dma_addr_t map_handle, + size_t size, enum dma_data_direction dir); + void (*sync_single_for_device)(union virtio_map map, + dma_addr_t map_handle, size_t size, + enum dma_data_direction dir); + void *(*alloc)(union virtio_map map, size_t size, + dma_addr_t *map_handle, gfp_t gfp); + void (*free)(union virtio_map map, size_t size, void *vaddr, + dma_addr_t map_handle, unsigned long attrs); + bool (*need_sync)(union virtio_map map, dma_addr_t map_handle); + int (*mapping_error)(union virtio_map map, dma_addr_t map_handle); + size_t (*max_mapping_size)(union virtio_map map); +}; + /* If driver didn't advertise the feature, it will never appear. */ void virtio_check_driver_offered_feature(const struct virtio_device *vdev, unsigned int fbit); -- cgit v1.2.3 From 58aca3dbc7d8891a016cb17d488af3002812793b Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 21 Aug 2025 14:46:39 +0800 Subject: vdpa: support virtio_map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Virtio core switches from DMA device to virtio_map, let's do that as well for vDPA. Signed-off-by: Jason Wang Message-Id: <20250821064641.5025-8-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin Tested-by: Lei Yang Reviewed-by: Eugenio Pérez --- drivers/vdpa/alibaba/eni_vdpa.c | 2 +- drivers/vdpa/ifcvf/ifcvf_main.c | 2 +- drivers/vdpa/mlx5/core/mr.c | 4 ++-- drivers/vdpa/mlx5/net/mlx5_vnet.c | 13 ++++++++----- drivers/vdpa/octeon_ep/octep_vdpa_main.c | 2 +- drivers/vdpa/pds/vdpa_dev.c | 2 +- drivers/vdpa/solidrun/snet_main.c | 4 ++-- drivers/vdpa/vdpa.c | 2 +- drivers/vdpa/vdpa_sim/vdpa_sim.c | 2 +- drivers/vdpa/vdpa_user/vduse_dev.c | 2 +- drivers/vdpa/virtio_pci/vp_vdpa.c | 2 +- drivers/vhost/vdpa.c | 6 ++++-- drivers/virtio/virtio_vdpa.c | 11 +++++------ include/linux/vdpa.h | 15 ++++++++------- 14 files changed, 37 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/drivers/vdpa/alibaba/eni_vdpa.c b/drivers/vdpa/alibaba/eni_vdpa.c index ad7f3447fe90..54aea086d08c 100644 --- a/drivers/vdpa/alibaba/eni_vdpa.c +++ b/drivers/vdpa/alibaba/eni_vdpa.c @@ -496,7 +496,7 @@ static int eni_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id) pci_set_master(pdev); pci_set_drvdata(pdev, eni_vdpa); - eni_vdpa->vdpa.dma_dev = &pdev->dev; + eni_vdpa->vdpa.vmap.dma_dev = &pdev->dev; eni_vdpa->queues = eni_vdpa_get_num_queues(eni_vdpa); eni_vdpa->vring = devm_kcalloc(&pdev->dev, eni_vdpa->queues, diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index ccf64d7bbfaa..979d188d74ee 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -713,7 +713,7 @@ static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, ifcvf_mgmt_dev->adapter = adapter; adapter->pdev = pdev; - adapter->vdpa.dma_dev = &pdev->dev; + adapter->vdpa.vmap.dma_dev = &pdev->dev; adapter->vdpa.mdev = mdev; adapter->vf = vf; vdpa_dev = &adapter->vdpa; diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c index c7a20278bc3c..8870a7169267 100644 --- a/drivers/vdpa/mlx5/core/mr.c +++ b/drivers/vdpa/mlx5/core/mr.c @@ -378,7 +378,7 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr u64 pa, offset; u64 paend; struct scatterlist *sg; - struct device *dma = mvdev->vdev.dma_dev; + struct device *dma = mvdev->vdev.vmap.dma_dev; for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1); map; map = vhost_iotlb_itree_next(map, mr->start, mr->end - 1)) { @@ -432,7 +432,7 @@ err_map: static void unmap_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) { - struct device *dma = mvdev->vdev.dma_dev; + struct device *dma = mvdev->vdev.vmap.dma_dev; destroy_direct_mr(mvdev, mr); dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 0ed2fc28e1ce..a7e76f175914 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -3395,14 +3395,17 @@ static int mlx5_vdpa_reset_map(struct vdpa_device *vdev, unsigned int asid) return err; } -static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx) +static union virtio_map mlx5_get_vq_map(struct vdpa_device *vdev, u16 idx) { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + union virtio_map map; if (is_ctrl_vq_idx(mvdev, idx)) - return &vdev->dev; + map.dma_dev = &vdev->dev; + else + map.dma_dev = mvdev->vdev.vmap.dma_dev; - return mvdev->vdev.dma_dev; + return map; } static void free_irqs(struct mlx5_vdpa_net *ndev) @@ -3686,7 +3689,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = { .set_map = mlx5_vdpa_set_map, .reset_map = mlx5_vdpa_reset_map, .set_group_asid = mlx5_set_group_asid, - .get_vq_dma_dev = mlx5_get_vq_dma_dev, + .get_vq_map = mlx5_get_vq_map, .free = mlx5_vdpa_free, .suspend = mlx5_vdpa_suspend, .resume = mlx5_vdpa_resume, /* Op disabled if not supported. */ @@ -3965,7 +3968,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, } ndev->mvdev.mlx_features = device_features; - mvdev->vdev.dma_dev = &mdev->pdev->dev; + mvdev->vdev.vmap.dma_dev = &mdev->pdev->dev; err = mlx5_vdpa_alloc_resources(&ndev->mvdev); if (err) goto err_alloc; diff --git a/drivers/vdpa/octeon_ep/octep_vdpa_main.c b/drivers/vdpa/octeon_ep/octep_vdpa_main.c index 9b49efd24391..5818dae133a3 100644 --- a/drivers/vdpa/octeon_ep/octep_vdpa_main.c +++ b/drivers/vdpa/octeon_ep/octep_vdpa_main.c @@ -516,7 +516,7 @@ static int octep_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, } oct_vdpa->pdev = pdev; - oct_vdpa->vdpa.dma_dev = &pdev->dev; + oct_vdpa->vdpa.vmap.dma_dev = &pdev->dev; oct_vdpa->vdpa.mdev = mdev; oct_vdpa->oct_hw = oct_hw; vdpa_dev = &oct_vdpa->vdpa; diff --git a/drivers/vdpa/pds/vdpa_dev.c b/drivers/vdpa/pds/vdpa_dev.c index 301d95e08596..63d82263fb52 100644 --- a/drivers/vdpa/pds/vdpa_dev.c +++ b/drivers/vdpa/pds/vdpa_dev.c @@ -643,7 +643,7 @@ static int pds_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, pdev = vdpa_aux->padev->vf_pdev; dma_dev = &pdev->dev; - pdsv->vdpa_dev.dma_dev = dma_dev; + pdsv->vdpa_dev.vmap.dma_dev = dma_dev; status = pds_vdpa_get_status(&pdsv->vdpa_dev); if (status == 0xff) { diff --git a/drivers/vdpa/solidrun/snet_main.c b/drivers/vdpa/solidrun/snet_main.c index 55ec51c17ab3..39050aab147f 100644 --- a/drivers/vdpa/solidrun/snet_main.c +++ b/drivers/vdpa/solidrun/snet_main.c @@ -1052,8 +1052,8 @@ static int snet_vdpa_probe_vf(struct pci_dev *pdev) */ snet_reserve_irq_idx(pf_irqs ? pdev_pf : pdev, snet); - /*set DMA device*/ - snet->vdpa.dma_dev = &pdev->dev; + /* set map metadata */ + snet->vdpa.vmap.dma_dev = &pdev->dev; /* Register VDPA device */ ret = vdpa_register_device(&snet->vdpa, snet->cfg->vq_num); diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 8a372b51c21a..c71debeb8471 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -151,7 +151,7 @@ static void vdpa_release_dev(struct device *d) * Driver should use vdpa_alloc_device() wrapper macro instead of * using this directly. * - * Return: Returns an error when parent/config/dma_dev is not set or fail to get + * Return: Returns an error when parent/config/map is not set or fail to get * ida. */ struct vdpa_device *__vdpa_alloc_device(struct device *parent, diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index c204fc8e471a..22ee53538444 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -272,7 +272,7 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr, vringh_set_iotlb(&vdpasim->vqs[i].vring, &vdpasim->iommu[0], &vdpasim->iommu_lock); - vdpasim->vdpa.dma_dev = dev; + vdpasim->vdpa.vmap.dma_dev = dev; return vdpasim; diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index 04620bb77203..f68ed569394c 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -2022,7 +2022,7 @@ static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name) return ret; } set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops); - vdev->vdpa.dma_dev = &vdev->vdpa.dev; + vdev->vdpa.vmap.dma_dev = &vdev->vdpa.dev; vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev; return 0; diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c index 8787407f75b0..242641c0f2bd 100644 --- a/drivers/vdpa/virtio_pci/vp_vdpa.c +++ b/drivers/vdpa/virtio_pci/vp_vdpa.c @@ -520,7 +520,7 @@ static int vp_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, vp_vdpa_mgtdev->vp_vdpa = vp_vdpa; - vp_vdpa->vdpa.dma_dev = &pdev->dev; + vp_vdpa->vdpa.vmap.dma_dev = &pdev->dev; vp_vdpa->queues = vp_modern_get_num_queues(mdev); vp_vdpa->mdev = mdev; diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index af1e1fdfd9ed..05a481e4c385 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -1318,7 +1318,8 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v) { struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; - struct device *dma_dev = vdpa_get_dma_dev(vdpa); + union virtio_map map = vdpa_get_map(vdpa); + struct device *dma_dev = map.dma_dev; int ret; /* Device want to do DMA by itself */ @@ -1353,7 +1354,8 @@ err_attach: static void vhost_vdpa_free_domain(struct vhost_vdpa *v) { struct vdpa_device *vdpa = v->vdpa; - struct device *dma_dev = vdpa_get_dma_dev(vdpa); + union virtio_map map = vdpa_get_map(vdpa); + struct device *dma_dev = map.dma_dev; if (v->domain) { iommu_detach_device(v->domain, dma_dev); diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c index d4be689e3626..8b27c6e8eebb 100644 --- a/drivers/virtio/virtio_vdpa.c +++ b/drivers/virtio/virtio_vdpa.c @@ -133,7 +133,6 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index, const char *name, bool ctx) { struct vdpa_device *vdpa = vd_get_vdpa(vdev); - struct device *dma_dev; const struct vdpa_config_ops *ops = vdpa->config; bool (*notify)(struct virtqueue *vq) = virtio_vdpa_notify; struct vdpa_callback cb; @@ -182,11 +181,11 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index, /* Create the vring */ align = ops->get_vq_align(vdpa); - if (ops->get_vq_dma_dev) - dma_dev = ops->get_vq_dma_dev(vdpa, index); + if (ops->get_vq_map) + map = ops->get_vq_map(vdpa, index); else - dma_dev = vdpa_get_dma_dev(vdpa); - map.dma_dev = dma_dev; + map = vdpa_get_map(vdpa); + vq = vring_create_virtqueue_map(index, max_num, align, vdev, true, may_reduce_num, ctx, notify, callback, name, map); @@ -467,7 +466,7 @@ static int virtio_vdpa_probe(struct vdpa_device *vdpa) if (!vd_dev) return -ENOMEM; - vd_dev->vdev.dev.parent = vdpa_get_dma_dev(vdpa); + vd_dev->vdev.dev.parent = vdpa_get_map(vdpa).dma_dev; vd_dev->vdev.dev.release = virtio_vdpa_release_dev; vd_dev->vdev.config = &virtio_vdpa_config_ops; vd_dev->vdpa = vdpa; diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 2e7a30fe6b92..ae0451945851 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -70,7 +71,7 @@ struct vdpa_mgmt_dev; /** * struct vdpa_device - representation of a vDPA device * @dev: underlying device - * @dma_dev: the actual device that is performing DMA + * @vmap: the metadata passed to upper layer to be used for mapping * @driver_override: driver name to force a match; do not set directly, * because core frees it; use driver_set_override() to * set or clear it. @@ -87,7 +88,7 @@ struct vdpa_mgmt_dev; */ struct vdpa_device { struct device dev; - struct device *dma_dev; + union virtio_map vmap; const char *driver_override; const struct vdpa_config_ops *config; struct rw_semaphore cf_lock; /* Protects get/set config */ @@ -352,11 +353,11 @@ struct vdpa_map_file { * @vdev: vdpa device * @asid: address space identifier * Returns integer: success (0) or error (< 0) - * @get_vq_dma_dev: Get the dma device for a specific + * @get_vq_map: Get the map metadata for a specific * virtqueue (optional) * @vdev: vdpa device * @idx: virtqueue index - * Returns pointer to structure device or error (NULL) + * Returns map token union error (NULL) * @bind_mm: Bind the device to a specific address space * so the vDPA framework can use VA when this * callback is implemented. (optional) @@ -436,7 +437,7 @@ struct vdpa_config_ops { int (*reset_map)(struct vdpa_device *vdev, unsigned int asid); int (*set_group_asid)(struct vdpa_device *vdev, unsigned int group, unsigned int asid); - struct device *(*get_vq_dma_dev)(struct vdpa_device *vdev, u16 idx); + union virtio_map (*get_vq_map)(struct vdpa_device *vdev, u16 idx); int (*bind_mm)(struct vdpa_device *vdev, struct mm_struct *mm); void (*unbind_mm)(struct vdpa_device *vdev); @@ -520,9 +521,9 @@ static inline void vdpa_set_drvdata(struct vdpa_device *vdev, void *data) dev_set_drvdata(&vdev->dev, data); } -static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev) +static inline union virtio_map vdpa_get_map(struct vdpa_device *vdev) { - return vdev->dma_dev; + return vdev->vmap; } static inline int vdpa_reset(struct vdpa_device *vdev, u32 flags) -- cgit v1.2.3 From 0d16cc439f36355d04b17ac45c3001d90969aa44 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 24 Sep 2025 15:00:44 +0800 Subject: vdpa: introduce map ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Virtio core allows the transport to provide device or transport specific mapping functions. This patch adds this support to vDPA. We can simply do this by allowing the vDPA parent to register a virtio_map_ops. Reviewed-by: Christoph Hellwig Signed-off-by: Jason Wang Message-Id: <20250924070045.10361-2-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Eugenio Pérez --- drivers/vdpa/alibaba/eni_vdpa.c | 3 ++- drivers/vdpa/ifcvf/ifcvf_main.c | 3 ++- drivers/vdpa/mlx5/net/mlx5_vnet.c | 2 +- drivers/vdpa/octeon_ep/octep_vdpa_main.c | 4 ++-- drivers/vdpa/pds/vdpa_dev.c | 3 ++- drivers/vdpa/solidrun/snet_main.c | 4 ++-- drivers/vdpa/vdpa.c | 3 +++ drivers/vdpa/vdpa_sim/vdpa_sim.c | 2 +- drivers/vdpa/vdpa_user/vduse_dev.c | 3 ++- drivers/vdpa/virtio_pci/vp_vdpa.c | 3 ++- drivers/virtio/virtio_vdpa.c | 4 +++- include/linux/vdpa.h | 10 +++++++--- 12 files changed, 29 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/vdpa/alibaba/eni_vdpa.c b/drivers/vdpa/alibaba/eni_vdpa.c index 54aea086d08c..e476504db0c8 100644 --- a/drivers/vdpa/alibaba/eni_vdpa.c +++ b/drivers/vdpa/alibaba/eni_vdpa.c @@ -478,7 +478,8 @@ static int eni_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id) return ret; eni_vdpa = vdpa_alloc_device(struct eni_vdpa, vdpa, - dev, &eni_vdpa_ops, 1, 1, NULL, false); + dev, &eni_vdpa_ops, NULL, + 1, 1, NULL, false); if (IS_ERR(eni_vdpa)) { ENI_ERR(pdev, "failed to allocate vDPA structure\n"); return PTR_ERR(eni_vdpa); diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index 979d188d74ee..6658dc74d915 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -705,7 +705,8 @@ static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, vf = &ifcvf_mgmt_dev->vf; pdev = vf->pdev; adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa, - &pdev->dev, &ifc_vdpa_ops, 1, 1, NULL, false); + &pdev->dev, &ifc_vdpa_ops, + NULL, 1, 1, NULL, false); if (IS_ERR(adapter)) { IFCVF_ERR(pdev, "Failed to allocate vDPA structure"); return PTR_ERR(adapter); diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index a7e76f175914..82034efb74fc 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -3882,7 +3882,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, } ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mgtdev->vdpa_ops, - MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false); + NULL, MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false); if (IS_ERR(ndev)) return PTR_ERR(ndev); diff --git a/drivers/vdpa/octeon_ep/octep_vdpa_main.c b/drivers/vdpa/octeon_ep/octep_vdpa_main.c index 5818dae133a3..9e8d07078606 100644 --- a/drivers/vdpa/octeon_ep/octep_vdpa_main.c +++ b/drivers/vdpa/octeon_ep/octep_vdpa_main.c @@ -508,8 +508,8 @@ static int octep_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, u64 device_features; int ret; - oct_vdpa = vdpa_alloc_device(struct octep_vdpa, vdpa, &pdev->dev, &octep_vdpa_ops, 1, 1, - NULL, false); + oct_vdpa = vdpa_alloc_device(struct octep_vdpa, vdpa, &pdev->dev, &octep_vdpa_ops, + NULL, 1, 1, NULL, false); if (IS_ERR(oct_vdpa)) { dev_err(&pdev->dev, "Failed to allocate vDPA structure for octep vdpa device"); return PTR_ERR(oct_vdpa); diff --git a/drivers/vdpa/pds/vdpa_dev.c b/drivers/vdpa/pds/vdpa_dev.c index 63d82263fb52..36f61cc96e21 100644 --- a/drivers/vdpa/pds/vdpa_dev.c +++ b/drivers/vdpa/pds/vdpa_dev.c @@ -632,7 +632,8 @@ static int pds_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, } pdsv = vdpa_alloc_device(struct pds_vdpa_device, vdpa_dev, - dev, &pds_vdpa_ops, 1, 1, name, false); + dev, &pds_vdpa_ops, NULL, + 1, 1, name, false); if (IS_ERR(pdsv)) { dev_err(dev, "Failed to allocate vDPA structure: %pe\n", pdsv); return PTR_ERR(pdsv); diff --git a/drivers/vdpa/solidrun/snet_main.c b/drivers/vdpa/solidrun/snet_main.c index 39050aab147f..4588211d57eb 100644 --- a/drivers/vdpa/solidrun/snet_main.c +++ b/drivers/vdpa/solidrun/snet_main.c @@ -1008,8 +1008,8 @@ static int snet_vdpa_probe_vf(struct pci_dev *pdev) } /* Allocate vdpa device */ - snet = vdpa_alloc_device(struct snet, vdpa, &pdev->dev, &snet_config_ops, 1, 1, NULL, - false); + snet = vdpa_alloc_device(struct snet, vdpa, &pdev->dev, &snet_config_ops, + NULL, 1, 1, NULL, false); if (!snet) { SNET_ERR(pdev, "Failed to allocate a vdpa device\n"); ret = -ENOMEM; diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index c71debeb8471..34874beb0152 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -142,6 +142,7 @@ static void vdpa_release_dev(struct device *d) * initialized but before registered. * @parent: the parent device * @config: the bus operations that is supported by this device + * @map: the map operations that is supported by this device * @ngroups: number of groups supported by this device * @nas: number of address spaces supported by this device * @size: size of the parent structure that contains private data @@ -156,6 +157,7 @@ static void vdpa_release_dev(struct device *d) */ struct vdpa_device *__vdpa_alloc_device(struct device *parent, const struct vdpa_config_ops *config, + const struct virtio_map_ops *map, unsigned int ngroups, unsigned int nas, size_t size, const char *name, bool use_va) @@ -187,6 +189,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, vdev->dev.release = vdpa_release_dev; vdev->index = err; vdev->config = config; + vdev->map = map; vdev->features_valid = false; vdev->use_va = use_va; vdev->ngroups = ngroups; diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 22ee53538444..c1c6431950e1 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -215,7 +215,7 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr, else ops = &vdpasim_config_ops; - vdpa = __vdpa_alloc_device(NULL, ops, + vdpa = __vdpa_alloc_device(NULL, ops, NULL, dev_attr->ngroups, dev_attr->nas, dev_attr->alloc_size, dev_attr->name, use_va); diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index f68ed569394c..ad782d20a8ed 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -2009,7 +2009,8 @@ static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name) return -EEXIST; vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev, - &vduse_vdpa_config_ops, 1, 1, name, true); + &vduse_vdpa_config_ops, NULL, + 1, 1, name, true); if (IS_ERR(vdev)) return PTR_ERR(vdev); diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c index 242641c0f2bd..17a19a728c9c 100644 --- a/drivers/vdpa/virtio_pci/vp_vdpa.c +++ b/drivers/vdpa/virtio_pci/vp_vdpa.c @@ -511,7 +511,8 @@ static int vp_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, int ret, i; vp_vdpa = vdpa_alloc_device(struct vp_vdpa, vdpa, - dev, &vp_vdpa_ops, 1, 1, name, false); + dev, &vp_vdpa_ops, NULL, + 1, 1, name, false); if (IS_ERR(vp_vdpa)) { dev_err(dev, "vp_vdpa: Failed to allocate vDPA structure\n"); diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c index 8b27c6e8eebb..2464fa655f09 100644 --- a/drivers/virtio/virtio_vdpa.c +++ b/drivers/virtio/virtio_vdpa.c @@ -466,9 +466,11 @@ static int virtio_vdpa_probe(struct vdpa_device *vdpa) if (!vd_dev) return -ENOMEM; - vd_dev->vdev.dev.parent = vdpa_get_map(vdpa).dma_dev; + vd_dev->vdev.dev.parent = vdpa->map ? &vdpa->dev : + vdpa_get_map(vdpa).dma_dev; vd_dev->vdev.dev.release = virtio_vdpa_release_dev; vd_dev->vdev.config = &virtio_vdpa_config_ops; + vd_dev->vdev.map = vdpa->map; vd_dev->vdpa = vdpa; vd_dev->vdev.id.device = ops->get_device_id(vdpa); diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index ae0451945851..4cf21d6e9cfd 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -76,6 +76,7 @@ struct vdpa_mgmt_dev; * because core frees it; use driver_set_override() to * set or clear it. * @config: the configuration ops for this device. + * @map: the map ops for this device * @cf_lock: Protects get and set access to configuration layout. * @index: device index * @features_valid: were features initialized? for legacy guests @@ -91,6 +92,7 @@ struct vdpa_device { union virtio_map vmap; const char *driver_override; const struct vdpa_config_ops *config; + const struct virtio_map_ops *map; struct rw_semaphore cf_lock; /* Protects get/set config */ unsigned int index; bool features_valid; @@ -447,6 +449,7 @@ struct vdpa_config_ops { struct vdpa_device *__vdpa_alloc_device(struct device *parent, const struct vdpa_config_ops *config, + const struct virtio_map_ops *map, unsigned int ngroups, unsigned int nas, size_t size, const char *name, bool use_va); @@ -458,6 +461,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, * @member: the name of struct vdpa_device within the @dev_struct * @parent: the parent device * @config: the bus operations that is supported by this device + * @map: the map operations that is supported by this device * @ngroups: the number of virtqueue groups supported by this device * @nas: the number of address spaces * @name: name of the vdpa device @@ -465,10 +469,10 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, * * Return allocated data structure or ERR_PTR upon error */ -#define vdpa_alloc_device(dev_struct, member, parent, config, ngroups, nas, \ - name, use_va) \ +#define vdpa_alloc_device(dev_struct, member, parent, config, map, \ + ngroups, nas, name, use_va) \ container_of((__vdpa_alloc_device( \ - parent, config, ngroups, nas, \ + parent, config, map, ngroups, nas, \ (sizeof(dev_struct) + \ BUILD_BUG_ON_ZERO(offsetof( \ dev_struct, member))), name, use_va)), \ -- cgit v1.2.3 From 1c14b0e4ba988381e362ad8a9651eff0b21bd47f Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 24 Sep 2025 15:00:45 +0800 Subject: vduse: switch to use virtio map API instead of DMA API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lacking the support of device specific mapping supported in virtio, VDUSE must trick the DMA API in order to make virtio-vdpa transport work. This is done by advertising vDPA device as dma device with a VDUSE specific dma_ops even if it doesn't do DMA at all. This will be fixed by this patch. Thanks to the new mapping operations support by virtio and vDPA. VDUSE can simply switch to advertise its specific mappings operations to virtio via virtio-vdpa then DMA API is not needed for VDUSE any more and iova domain could be used as the mapping token instead. Signed-off-by: Jason Wang Message-Id: <20250924070045.10361-3-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Eugenio Pérez --- drivers/vdpa/Kconfig | 8 +--- drivers/vdpa/vdpa_user/iova_domain.c | 2 +- drivers/vdpa/vdpa_user/iova_domain.h | 2 +- drivers/vdpa/vdpa_user/vduse_dev.c | 78 ++++++++++++++++++------------------ include/linux/virtio.h | 4 ++ 5 files changed, 46 insertions(+), 48 deletions(-) (limited to 'include/linux') diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig index 559fb9d3271f..857cf288c876 100644 --- a/drivers/vdpa/Kconfig +++ b/drivers/vdpa/Kconfig @@ -34,13 +34,7 @@ config VDPA_SIM_BLOCK config VDPA_USER tristate "VDUSE (vDPA Device in Userspace) support" - depends on EVENTFD && MMU && HAS_DMA - # - # This driver incorrectly tries to override the dma_ops. It should - # never have done that, but for now keep it working on architectures - # that use dma ops - # - depends on ARCH_HAS_DMA_OPS + depends on EVENTFD && MMU select VHOST_IOTLB select IOMMU_IOVA help diff --git a/drivers/vdpa/vdpa_user/iova_domain.c b/drivers/vdpa/vdpa_user/iova_domain.c index 58116f89d8da..ccaed24b7ef8 100644 --- a/drivers/vdpa/vdpa_user/iova_domain.c +++ b/drivers/vdpa/vdpa_user/iova_domain.c @@ -447,7 +447,7 @@ void vduse_domain_unmap_page(struct vduse_iova_domain *domain, void *vduse_domain_alloc_coherent(struct vduse_iova_domain *domain, size_t size, dma_addr_t *dma_addr, - gfp_t flag, unsigned long attrs) + gfp_t flag) { struct iova_domain *iovad = &domain->consistent_iovad; unsigned long limit = domain->iova_limit; diff --git a/drivers/vdpa/vdpa_user/iova_domain.h b/drivers/vdpa/vdpa_user/iova_domain.h index 7f3f0928ec78..1f3c30be272a 100644 --- a/drivers/vdpa/vdpa_user/iova_domain.h +++ b/drivers/vdpa/vdpa_user/iova_domain.h @@ -64,7 +64,7 @@ void vduse_domain_unmap_page(struct vduse_iova_domain *domain, void *vduse_domain_alloc_coherent(struct vduse_iova_domain *domain, size_t size, dma_addr_t *dma_addr, - gfp_t flag, unsigned long attrs); + gfp_t flag); void vduse_domain_free_coherent(struct vduse_iova_domain *domain, size_t size, void *vaddr, dma_addr_t dma_addr, diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index ad782d20a8ed..e7bced0b5542 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -814,59 +814,53 @@ static const struct vdpa_config_ops vduse_vdpa_config_ops = { .free = vduse_vdpa_free, }; -static void vduse_dev_sync_single_for_device(struct device *dev, +static void vduse_dev_sync_single_for_device(union virtio_map token, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir) { - struct vduse_dev *vdev = dev_to_vduse(dev); - struct vduse_iova_domain *domain = vdev->domain; + struct vduse_iova_domain *domain = token.iova_domain; vduse_domain_sync_single_for_device(domain, dma_addr, size, dir); } -static void vduse_dev_sync_single_for_cpu(struct device *dev, +static void vduse_dev_sync_single_for_cpu(union virtio_map token, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir) { - struct vduse_dev *vdev = dev_to_vduse(dev); - struct vduse_iova_domain *domain = vdev->domain; + struct vduse_iova_domain *domain = token.iova_domain; vduse_domain_sync_single_for_cpu(domain, dma_addr, size, dir); } -static dma_addr_t vduse_dev_map_page(struct device *dev, struct page *page, +static dma_addr_t vduse_dev_map_page(union virtio_map token, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs) { - struct vduse_dev *vdev = dev_to_vduse(dev); - struct vduse_iova_domain *domain = vdev->domain; + struct vduse_iova_domain *domain = token.iova_domain; return vduse_domain_map_page(domain, page, offset, size, dir, attrs); } -static void vduse_dev_unmap_page(struct device *dev, dma_addr_t dma_addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs) +static void vduse_dev_unmap_page(union virtio_map token, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir, + unsigned long attrs) { - struct vduse_dev *vdev = dev_to_vduse(dev); - struct vduse_iova_domain *domain = vdev->domain; + struct vduse_iova_domain *domain = token.iova_domain; return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs); } -static void *vduse_dev_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_addr, gfp_t flag, - unsigned long attrs) +static void *vduse_dev_alloc_coherent(union virtio_map token, size_t size, + dma_addr_t *dma_addr, gfp_t flag) { - struct vduse_dev *vdev = dev_to_vduse(dev); - struct vduse_iova_domain *domain = vdev->domain; + struct vduse_iova_domain *domain = token.iova_domain; unsigned long iova; void *addr; *dma_addr = DMA_MAPPING_ERROR; addr = vduse_domain_alloc_coherent(domain, size, - (dma_addr_t *)&iova, flag, attrs); + (dma_addr_t *)&iova, flag); if (!addr) return NULL; @@ -875,31 +869,45 @@ static void *vduse_dev_alloc_coherent(struct device *dev, size_t size, return addr; } -static void vduse_dev_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_addr, - unsigned long attrs) +static void vduse_dev_free_coherent(union virtio_map token, size_t size, + void *vaddr, dma_addr_t dma_addr, + unsigned long attrs) { - struct vduse_dev *vdev = dev_to_vduse(dev); - struct vduse_iova_domain *domain = vdev->domain; + struct vduse_iova_domain *domain = token.iova_domain; vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs); } -static size_t vduse_dev_max_mapping_size(struct device *dev) +static bool vduse_dev_need_sync(union virtio_map token, dma_addr_t dma_addr) { - struct vduse_dev *vdev = dev_to_vduse(dev); - struct vduse_iova_domain *domain = vdev->domain; + struct vduse_iova_domain *domain = token.iova_domain; + + return dma_addr < domain->bounce_size; +} + +static int vduse_dev_mapping_error(union virtio_map token, dma_addr_t dma_addr) +{ + if (unlikely(dma_addr == DMA_MAPPING_ERROR)) + return -ENOMEM; + return 0; +} + +static size_t vduse_dev_max_mapping_size(union virtio_map token) +{ + struct vduse_iova_domain *domain = token.iova_domain; return domain->bounce_size; } -static const struct dma_map_ops vduse_dev_dma_ops = { +static const struct virtio_map_ops vduse_map_ops = { .sync_single_for_device = vduse_dev_sync_single_for_device, .sync_single_for_cpu = vduse_dev_sync_single_for_cpu, .map_page = vduse_dev_map_page, .unmap_page = vduse_dev_unmap_page, .alloc = vduse_dev_alloc_coherent, .free = vduse_dev_free_coherent, + .need_sync = vduse_dev_need_sync, + .mapping_error = vduse_dev_mapping_error, .max_mapping_size = vduse_dev_max_mapping_size, }; @@ -2003,27 +2011,18 @@ static struct vduse_mgmt_dev *vduse_mgmt; static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name) { struct vduse_vdpa *vdev; - int ret; if (dev->vdev) return -EEXIST; vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev, - &vduse_vdpa_config_ops, NULL, + &vduse_vdpa_config_ops, &vduse_map_ops, 1, 1, name, true); if (IS_ERR(vdev)) return PTR_ERR(vdev); dev->vdev = vdev; vdev->dev = dev; - vdev->vdpa.dev.dma_mask = &vdev->vdpa.dev.coherent_dma_mask; - ret = dma_set_mask_and_coherent(&vdev->vdpa.dev, DMA_BIT_MASK(64)); - if (ret) { - put_device(&vdev->vdpa.dev); - return ret; - } - set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops); - vdev->vdpa.vmap.dma_dev = &vdev->vdpa.dev; vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev; return 0; @@ -2056,6 +2055,7 @@ static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, return -ENOMEM; } + dev->vdev->vdpa.vmap.iova_domain = dev->domain; ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num); if (ret) { put_device(&dev->vdev->vdpa.dev); diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 3386a4a8d06b..96c66126c074 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -41,9 +41,13 @@ struct virtqueue { void *priv; }; +struct vduse_iova_domain; + union virtio_map { /* Device that performs DMA */ struct device *dma_dev; + /* VDUSE specific mapping data */ + struct vduse_iova_domain *iova_domain; }; int virtqueue_add_outbuf(struct virtqueue *vq, -- cgit v1.2.3 From f97aef092e199c10a3da96ae79b571edd5362faa Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 26 Sep 2025 12:12:37 +0200 Subject: cpufreq: Make drivers using CPUFREQ_ETERNAL specify transition latency Commit a755d0e2d41b ("cpufreq: Honour transition_latency over transition_delay_us") caused platforms where cpuinfo.transition_latency is CPUFREQ_ETERNAL to get a very large transition latency whereas previously it had been capped at 10 ms (and later at 2 ms). This led to a user-observable regression between 6.6 and 6.12 as described by Shawn: "The dbs sampling_rate was 10000 us on 6.6 and suddently becomes 6442450 us (4294967295 / 1000 * 1.5) on 6.12 for these platforms because the default transition delay was dropped [...]. It slows down dbs governor's reacting to CPU loading change dramatically. Also, as transition_delay_us is used by schedutil governor as rate_limit_us, it shows a negative impact on device idle power consumption, because the device gets slightly less time in the lowest OPP." Evidently, the expectation of the drivers using CPUFREQ_ETERNAL as cpuinfo.transition_latency was that it would be capped by the core, but they may as well return a default transition latency value instead of CPUFREQ_ETERNAL and the core need not do anything with it. Accordingly, introduce CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS and make all of the drivers in question use it instead of CPUFREQ_ETERNAL. Also update the related Rust binding. Fixes: a755d0e2d41b ("cpufreq: Honour transition_latency over transition_delay_us") Closes: https://lore.kernel.org/linux-pm/20250922125929.453444-1-shawnguo2@yeah.net/ Reported-by: Shawn Guo Reviewed-by: Mario Limonciello (AMD) Reviewed-by: Jie Zhan Acked-by: Viresh Kumar Cc: 6.6+ # 6.6+ Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/2264949.irdbgypaU6@rafael.j.wysocki [ rjw: Fix typo in new symbol name, drop redundant type cast from Rust binding ] Tested-by: Shawn Guo # with cpufreq-dt driver Reviewed-by: Qais Yousef Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq-dt.c | 2 +- drivers/cpufreq/imx6q-cpufreq.c | 2 +- drivers/cpufreq/mediatek-cpufreq-hw.c | 2 +- drivers/cpufreq/rcpufreq_dt.rs | 2 +- drivers/cpufreq/scmi-cpufreq.c | 2 +- drivers/cpufreq/scpi-cpufreq.c | 2 +- drivers/cpufreq/spear-cpufreq.c | 2 +- include/linux/cpufreq.h | 3 +++ rust/kernel/cpufreq.rs | 7 ++++--- 9 files changed, 14 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 506437489b4d..7d5079fd1688 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -104,7 +104,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) transition_latency = dev_pm_opp_get_max_transition_latency(cpu_dev); if (!transition_latency) - transition_latency = CPUFREQ_ETERNAL; + transition_latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS; cpumask_copy(policy->cpus, priv->cpus); policy->driver_data = priv; diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index db1c88e9d3f9..e93697d3edfd 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -442,7 +442,7 @@ soc_opp_out: } if (of_property_read_u32(np, "clock-latency", &transition_latency)) - transition_latency = CPUFREQ_ETERNAL; + transition_latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS; /* * Calculate the ramp time for max voltage change in the diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c index fce5aa5ceea0..ae4500ab4891 100644 --- a/drivers/cpufreq/mediatek-cpufreq-hw.c +++ b/drivers/cpufreq/mediatek-cpufreq-hw.c @@ -309,7 +309,7 @@ static int mtk_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) latency = readl_relaxed(data->reg_bases[REG_FREQ_LATENCY]) * 1000; if (!latency) - latency = CPUFREQ_ETERNAL; + latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS; policy->cpuinfo.transition_latency = latency; policy->fast_switch_possible = true; diff --git a/drivers/cpufreq/rcpufreq_dt.rs b/drivers/cpufreq/rcpufreq_dt.rs index 7e1fbf9a091f..3909022e1c74 100644 --- a/drivers/cpufreq/rcpufreq_dt.rs +++ b/drivers/cpufreq/rcpufreq_dt.rs @@ -123,7 +123,7 @@ impl cpufreq::Driver for CPUFreqDTDriver { let mut transition_latency = opp_table.max_transition_latency_ns() as u32; if transition_latency == 0 { - transition_latency = cpufreq::ETERNAL_LATENCY_NS; + transition_latency = cpufreq::DEFAULT_TRANSITION_LATENCY_NS; } policy diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index 38c165d526d1..d2a110079f5f 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -294,7 +294,7 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy) latency = perf_ops->transition_latency_get(ph, domain); if (!latency) - latency = CPUFREQ_ETERNAL; + latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS; policy->cpuinfo.transition_latency = latency; diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c index dcbb0ae7dd47..e530345baddf 100644 --- a/drivers/cpufreq/scpi-cpufreq.c +++ b/drivers/cpufreq/scpi-cpufreq.c @@ -157,7 +157,7 @@ static int scpi_cpufreq_init(struct cpufreq_policy *policy) latency = scpi_ops->get_transition_latency(cpu_dev); if (!latency) - latency = CPUFREQ_ETERNAL; + latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS; policy->cpuinfo.transition_latency = latency; diff --git a/drivers/cpufreq/spear-cpufreq.c b/drivers/cpufreq/spear-cpufreq.c index 707c71090cc3..2a1550e1aa21 100644 --- a/drivers/cpufreq/spear-cpufreq.c +++ b/drivers/cpufreq/spear-cpufreq.c @@ -182,7 +182,7 @@ static int spear_cpufreq_probe(struct platform_device *pdev) if (of_property_read_u32(np, "clock-latency", &spear_cpufreq.transition_latency)) - spear_cpufreq.transition_latency = CPUFREQ_ETERNAL; + spear_cpufreq.transition_latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS; cnt = of_property_count_u32_elems(np, "cpufreq_tbl"); if (cnt <= 0) { diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 40966512ea18..bc8c083bc16a 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -32,6 +32,9 @@ */ #define CPUFREQ_ETERNAL (-1) + +#define CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS NSEC_PER_MSEC + #define CPUFREQ_NAME_LEN 16 /* Print length for names. Extra 1 space for accommodating '\n' in prints */ #define CPUFREQ_NAME_PLEN (CPUFREQ_NAME_LEN + 1) diff --git a/rust/kernel/cpufreq.rs b/rust/kernel/cpufreq.rs index eea57ba95f24..2ea735700ae7 100644 --- a/rust/kernel/cpufreq.rs +++ b/rust/kernel/cpufreq.rs @@ -39,7 +39,8 @@ use macros::vtable; const CPUFREQ_NAME_LEN: usize = bindings::CPUFREQ_NAME_LEN as usize; /// Default transition latency value in nanoseconds. -pub const ETERNAL_LATENCY_NS: u32 = bindings::CPUFREQ_ETERNAL as u32; +pub const DEFAULT_TRANSITION_LATENCY_NS: u32 = + bindings::CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS; /// CPU frequency driver flags. pub mod flags { @@ -400,13 +401,13 @@ impl TableBuilder { /// The following example demonstrates how to create a CPU frequency table. /// /// ``` -/// use kernel::cpufreq::{ETERNAL_LATENCY_NS, Policy}; +/// use kernel::cpufreq::{DEFAULT_TRANSITION_LATENCY_NS, Policy}; /// /// fn update_policy(policy: &mut Policy) { /// policy /// .set_dvfs_possible_from_any_cpu(true) /// .set_fast_switch_possible(true) -/// .set_transition_latency_ns(ETERNAL_LATENCY_NS); +/// .set_transition_latency_ns(DEFAULT_TRANSITION_LATENCY_NS); /// /// pr_info!("The policy details are: {:?}\n", (policy.cpu(), policy.cur())); /// } -- cgit v1.2.3 From 950c6451a5c38d375993c3b9da427e2e69b01c30 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 26 Sep 2025 12:31:47 +0200 Subject: cpufreq: Drop unused symbol CPUFREQ_ETERNAL Drop CPUFREQ_ETERNAL that has no users any more along with all references to it in the documentation. No functional impact. Reviewed-by: Mario Limonciello (AMD) Reviewed-by: Jie Zhan Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Reviewed-by: Qais Yousef --- Documentation/admin-guide/pm/cpufreq.rst | 4 ---- Documentation/cpu-freq/cpu-drivers.rst | 3 +-- Documentation/translations/zh_CN/cpu-freq/cpu-drivers.rst | 3 +-- Documentation/translations/zh_TW/cpu-freq/cpu-drivers.rst | 3 +-- include/linux/cpufreq.h | 5 ----- 5 files changed, 3 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/Documentation/admin-guide/pm/cpufreq.rst b/Documentation/admin-guide/pm/cpufreq.rst index cacb9f0307dd..738d7b4dc33a 100644 --- a/Documentation/admin-guide/pm/cpufreq.rst +++ b/Documentation/admin-guide/pm/cpufreq.rst @@ -274,10 +274,6 @@ are the following: The time it takes to switch the CPUs belonging to this policy from one P-state to another, in nanoseconds. - If unknown or if known to be so high that the scaling driver does not - work with the `ondemand`_ governor, -1 (:c:macro:`CPUFREQ_ETERNAL`) - will be returned by reads from this attribute. - ``related_cpus`` List of all (online and offline) CPUs belonging to this policy. diff --git a/Documentation/cpu-freq/cpu-drivers.rst b/Documentation/cpu-freq/cpu-drivers.rst index d84ededb66f9..c5635ac3de54 100644 --- a/Documentation/cpu-freq/cpu-drivers.rst +++ b/Documentation/cpu-freq/cpu-drivers.rst @@ -109,8 +109,7 @@ Then, the driver must fill in the following values: +-----------------------------------+--------------------------------------+ |policy->cpuinfo.transition_latency | the time it takes on this CPU to | | | switch between two frequencies in | -| | nanoseconds (if appropriate, else | -| | specify CPUFREQ_ETERNAL) | +| | nanoseconds | +-----------------------------------+--------------------------------------+ |policy->cur | The current operating frequency of | | | this CPU (if appropriate) | diff --git a/Documentation/translations/zh_CN/cpu-freq/cpu-drivers.rst b/Documentation/translations/zh_CN/cpu-freq/cpu-drivers.rst index 2ca92042767b..8238f4c6e4f5 100644 --- a/Documentation/translations/zh_CN/cpu-freq/cpu-drivers.rst +++ b/Documentation/translations/zh_CN/cpu-freq/cpu-drivers.rst @@ -112,8 +112,7 @@ CPUfreq核心层注册一个cpufreq_driver结构体。 | | | +-----------------------------------+--------------------------------------+ |policy->cpuinfo.transition_latency | CPU在两个频率之间切换所需的时间,以 | -| | 纳秒为单位(如不适用,设定为 | -| | CPUFREQ_ETERNAL) | +| | 纳秒为单位 | | | | +-----------------------------------+--------------------------------------+ |policy->cur | 该CPU当前的工作频率(如适用) | diff --git a/Documentation/translations/zh_TW/cpu-freq/cpu-drivers.rst b/Documentation/translations/zh_TW/cpu-freq/cpu-drivers.rst index add3de2d4523..5435c3928d4b 100644 --- a/Documentation/translations/zh_TW/cpu-freq/cpu-drivers.rst +++ b/Documentation/translations/zh_TW/cpu-freq/cpu-drivers.rst @@ -112,8 +112,7 @@ CPUfreq核心層註冊一個cpufreq_driver結構體。 | | | +-----------------------------------+--------------------------------------+ |policy->cpuinfo.transition_latency | CPU在兩個頻率之間切換所需的時間,以 | -| | 納秒爲單位(如不適用,設定爲 | -| | CPUFREQ_ETERNAL) | +| | 納秒爲單位 | | | | +-----------------------------------+--------------------------------------+ |policy->cur | 該CPU當前的工作頻率(如適用) | diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index bc8c083bc16a..0465d1e6f72a 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -26,13 +26,8 @@ *********************************************************************/ /* * Frequency values here are CPU kHz - * - * Maximum transition latency is in nanoseconds - if it's unknown, - * CPUFREQ_ETERNAL shall be used. */ -#define CPUFREQ_ETERNAL (-1) - #define CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS NSEC_PER_MSEC #define CPUFREQ_NAME_LEN 16 -- cgit v1.2.3 From 4e66293bb141df33d5eb1f922e16fe05913bf296 Mon Sep 17 00:00:00 2001 From: Bhanu Seshu Kumar Valluri Date: Wed, 1 Oct 2025 14:27:16 +0530 Subject: of: doc: Fix typo in doc comments. synthetized => synthesized definied => defined sucess => success Signed-off-by: Bhanu Seshu Kumar Valluri Signed-off-by: Rob Herring (Arm) --- drivers/of/irq.c | 2 +- drivers/of/overlay.c | 2 +- include/linux/of.h | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/of/irq.c b/drivers/of/irq.c index e7c12abd10ab..1ca66b6e267c 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -163,7 +163,7 @@ const __be32 *of_irq_parse_imap_parent(const __be32 *imap, int len, struct of_ph * @out_irq: structure of_phandle_args updated by this function * * This function is a low-level interrupt tree walking function. It - * can be used to do a partial walk with synthetized reg and interrupts + * can be used to do a partial walk with synthesized reg and interrupts * properties, for example when resolving PCI interrupts when no device * node exist for the parent. It takes an interrupt specifier structure as * input, walks the tree looking for any interrupt-map properties, translates diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c index 1af6f52d0708..255e8362f600 100644 --- a/drivers/of/overlay.c +++ b/drivers/of/overlay.c @@ -135,7 +135,7 @@ static BLOCKING_NOTIFIER_HEAD(overlay_notify_chain); * @nb: Notifier block to register * * Register for notification on overlay operations on device tree nodes. The - * reported actions definied by @of_reconfig_change. The notifier callback + * reported actions defined by @of_reconfig_change. The notifier callback * furthermore receives a pointer to the affected device tree node. * * Note that a notifier callback is not supposed to store pointers to a device diff --git a/include/linux/of.h b/include/linux/of.h index 5e2c6ed9370a..121a288ca92d 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -1134,7 +1134,7 @@ static inline bool of_phandle_args_equal(const struct of_phandle_args *a1, * Search for a property in a device node and count the number of u8 elements * in it. * - * Return: The number of elements on sucess, -EINVAL if the property does + * Return: The number of elements on success, -EINVAL if the property does * not exist or its length does not match a multiple of u8 and -ENODATA if the * property does not have a value. */ @@ -1153,7 +1153,7 @@ static inline int of_property_count_u8_elems(const struct device_node *np, * Search for a property in a device node and count the number of u16 elements * in it. * - * Return: The number of elements on sucess, -EINVAL if the property does + * Return: The number of elements on success, -EINVAL if the property does * not exist or its length does not match a multiple of u16 and -ENODATA if the * property does not have a value. */ @@ -1172,7 +1172,7 @@ static inline int of_property_count_u16_elems(const struct device_node *np, * Search for a property in a device node and count the number of u32 elements * in it. * - * Return: The number of elements on sucess, -EINVAL if the property does + * Return: The number of elements on success, -EINVAL if the property does * not exist or its length does not match a multiple of u32 and -ENODATA if the * property does not have a value. */ @@ -1191,7 +1191,7 @@ static inline int of_property_count_u32_elems(const struct device_node *np, * Search for a property in a device node and count the number of u64 elements * in it. * - * Return: The number of elements on sucess, -EINVAL if the property does + * Return: The number of elements on success, -EINVAL if the property does * not exist or its length does not match a multiple of u64 and -ENODATA if the * property does not have a value. */ -- cgit v1.2.3 From 510d76646a6a7beaa49fc0da7282e285a3dfce97 Mon Sep 17 00:00:00 2001 From: Tang Yizhou Date: Tue, 23 Sep 2025 19:21:36 +0800 Subject: block: Update a comment of disk statistics >From commit 074a7aca7afa ("block: move stats from disk to part0"), we know that: * {disk|all}_stat_*() are gone. * disk_stat_lock/unlock() are renamed to part_stat_lock/unlock(). Therefore, outdated comments should be updated accordingly. Fixes: 074a7aca7afa ("block: move stats from disk to part0") Signed-off-by: Tang Yizhou Signed-off-by: Jens Axboe --- include/linux/part_stat.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h index eeeff2a04529..729415e91215 100644 --- a/include/linux/part_stat.h +++ b/include/linux/part_stat.h @@ -17,8 +17,8 @@ struct disk_stats { /* * Macros to operate on percpu disk statistics: * - * {disk|part|all}_stat_{add|sub|inc|dec}() modify the stat counters and should - * be called between disk_stat_lock() and disk_stat_unlock(). + * part_stat_{add|sub|inc|dec}() modify the stat counters and should + * be called between part_stat_lock() and part_stat_unlock(). * * part_stat_read() can be called at any time. */ -- cgit v1.2.3 From 1b54b0756f051c11f5a5d0fbc1581e0b9a18e2bc Mon Sep 17 00:00:00 2001 From: Bhanu Seshu Kumar Valluri Date: Wed, 1 Oct 2025 16:27:15 +0530 Subject: net: doc: Fix typos in docs Fix typos in doc comments. Signed-off-by: Bhanu Seshu Kumar Valluri Link: https://patch.msgid.link/20251001105715.50462-1-bhanuseshukumar@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 4 ++-- net/tipc/crypto.c | 2 +- net/tipc/topsrv.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 7a54a8b4d277..3c7634482356 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -297,7 +297,7 @@ static inline const char *phy_modes(phy_interface_t interface) * * Description: maps RGMII supported link speeds into the clock rates. * This can also be used for MII, GMII, and RMII interface modes as the - * clock rates are indentical, but the caller must be aware that errors + * clock rates are identical, but the caller must be aware that errors * for unsupported clock rates will not be signalled. * * Returns: clock rate or negative errno @@ -519,7 +519,7 @@ enum phy_state { * struct phy_c45_device_ids - 802.3-c45 Device Identifiers * @devices_in_package: IEEE 802.3 devices in package register value. * @mmds_present: bit vector of MMDs present. - * @device_ids: The device identifer for each present device. + * @device_ids: The device identifier for each present device. */ struct phy_c45_device_ids { u32 devices_in_package; diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index ea5bb131ebd0..751904f10aab 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -1797,7 +1797,7 @@ exit: * @b: bearer where the message has been received * * If the decryption is successful, the decrypted skb is returned directly or - * as the callback, the encryption header and auth tag will be trimed out + * as the callback, the encryption header and auth tag will be trimmed out * before forwarding to tipc_rcv() via the tipc_crypto_rcv_complete(). * Otherwise, the skb will be freed! * Note: RX key(s) can be re-aligned, or in case of no key suitable, TX diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index ffe577bf6b51..aad7f96b6009 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -57,7 +57,7 @@ * @conn_idr: identifier set of connection * @idr_lock: protect the connection identifier set * @idr_in_use: amount of allocated identifier entry - * @net: network namspace instance + * @net: network namespace instance * @awork: accept work item * @rcv_wq: receive workqueue * @send_wq: send workqueue @@ -83,7 +83,7 @@ struct tipc_topsrv { * @sock: socket handler associated with connection * @flags: indicates connection state * @server: pointer to connected server - * @sub_list: lsit to all pertaing subscriptions + * @sub_list: list to all pertaining subscriptions * @sub_lock: lock protecting the subscription list * @rwork: receive work item * @outqueue: pointer to first outbound message in queue -- cgit v1.2.3 From 384b52ce32110db974d3b61d463af48347eb73fb Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 1 Oct 2025 19:34:29 +0200 Subject: PM: runtime: Introduce one more usage counter guard Follow previous commit 9a0abc39450a ("PM: runtime: Add auto-cleanup macros for "resume and get" operations") and define a runtime PM usage counter guard in which pm_runtime_get_noresume() and pm_runtime_put_noidle() will be used for incrementing and decrementing it, respectively. Signed-off-by: Rafael J. Wysocki Reviewed-by: Jonathan Cameron --- include/linux/pm_runtime.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index edb8aed5ef62..a3f44f6c2da1 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -614,6 +614,9 @@ static inline int pm_runtime_put_autosuspend(struct device *dev) return __pm_runtime_put_autosuspend(dev); } +DEFINE_GUARD(pm_runtime_noresume, struct device *, + pm_runtime_get_noresume(_T), pm_runtime_put_noidle(_T)); + DEFINE_GUARD(pm_runtime_active, struct device *, pm_runtime_get_sync(_T), pm_runtime_put(_T)); DEFINE_GUARD(pm_runtime_active_auto, struct device *, -- cgit v1.2.3 From b8179af120943e2fc099ea87caa234039a709a66 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 29 Jul 2025 08:46:35 +0200 Subject: mm/memory_hotplug: activate node before adding new memory blocks The sysfs attributes for memory blocks require the node ID to be set and initialized, so move the node activation before adding new memory blocks. This also has the nice side effect that the BUG_ON() can be converted into a WARN_ON() as we now can handle registration errors. Link: https://lkml.kernel.org/r/20250729064637.51662-3-hare@kernel.org Fixes: b9ff036082cd ("mm/memory_hotplug.c: make add_memory_resource use __try_online_node") Signed-off-by: Hannes Reinecke Acked-by: David Hildenbrand Acked-by: Oscar Salvador Reviewed-by: Donet Tom Signed-off-by: Andrew Morton --- drivers/base/memory.c | 4 ++-- include/linux/memory.h | 2 +- mm/memory_hotplug.c | 32 +++++++++++++++++--------------- 3 files changed, 20 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 894d3891292b..fb212a889e65 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -879,7 +879,7 @@ static void remove_memory_block(struct memory_block *memory) * Called under device_hotplug_lock. */ int create_memory_block_devices(unsigned long start, unsigned long size, - struct vmem_altmap *altmap, + int nid, struct vmem_altmap *altmap, struct memory_group *group) { const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start)); @@ -893,7 +893,7 @@ int create_memory_block_devices(unsigned long start, unsigned long size, return -EINVAL; for (block_id = start_block_id; block_id != end_block_id; block_id++) { - ret = add_memory_block(block_id, NUMA_NO_NODE, MEM_OFFLINE, altmap, group); + ret = add_memory_block(block_id, nid, MEM_OFFLINE, altmap, group); if (ret) break; } diff --git a/include/linux/memory.h b/include/linux/memory.h index 40eb70ccb09d..4a29153e372e 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -159,7 +159,7 @@ static inline unsigned long memory_block_advised_max_size(void) extern int register_memory_notifier(struct notifier_block *nb); extern void unregister_memory_notifier(struct notifier_block *nb); int create_memory_block_devices(unsigned long start, unsigned long size, - struct vmem_altmap *altmap, + int nid, struct vmem_altmap *altmap, struct memory_group *group); void remove_memory_block_devices(unsigned long start, unsigned long size); extern void memory_dev_init(void); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index e9f14de4a9c9..0be83039c3b5 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1477,7 +1477,7 @@ static int create_altmaps_and_memory_blocks(int nid, struct memory_group *group, } /* create memory block devices after memory was added */ - ret = create_memory_block_devices(cur_start, memblock_size, + ret = create_memory_block_devices(cur_start, memblock_size, nid, params.altmap, group); if (ret) { arch_remove_memory(cur_start, memblock_size, NULL); @@ -1539,8 +1539,16 @@ int add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) ret = __try_online_node(nid, false); if (ret < 0) - goto error; - new_node = ret; + goto error_memblock_remove; + if (ret) { + node_set_online(nid); + ret = register_one_node(nid); + if (WARN_ON(ret)) { + node_set_offline(nid); + goto error_memblock_remove; + } + new_node = true; + } /* * Self hosted memmap array @@ -1556,24 +1564,13 @@ int add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) goto error; /* create memory block devices after memory was added */ - ret = create_memory_block_devices(start, size, NULL, group); + ret = create_memory_block_devices(start, size, nid, NULL, group); if (ret) { arch_remove_memory(start, size, params.altmap); goto error; } } - if (new_node) { - /* If sysfs file of new node can't be created, cpu on the node - * can't be hot-added. There is no rollback way now. - * So, check by BUG_ON() to catch it reluctantly.. - * We online node here. We can't roll back from here. - */ - node_set_online(nid); - ret = register_one_node(nid); - BUG_ON(ret); - } - register_memory_blocks_under_node_hotplug(nid, PFN_DOWN(start), PFN_UP(start + size - 1)); @@ -1597,6 +1594,11 @@ int add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) return ret; error: + if (new_node) { + node_set_offline(nid); + unregister_one_node(nid); + } +error_memblock_remove: if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) memblock_remove(start, size); error_mem_hotplug_end: -- cgit v1.2.3 From 0a947c14e48cbf9de222836170282e0167a9e096 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 29 Jul 2025 08:46:36 +0200 Subject: drivers/base: move memory_block_add_nid() into the caller Now the node id only needs to be set for early memory, so move memory_block_add_nid() into the caller and rename it into memory_block_add_nid_early(). This allows us to further simplify the code by dropping the 'context' argument to do_register_memory_block_under_node(). Link: https://lkml.kernel.org/r/20250729064637.51662-4-hare@kernel.org Suggested-by: David Hildenbrand Signed-off-by: Hannes Reinecke Acked-by: David Hildenbrand Acked-by: Oscar Salvador Reviewed-by: Donet Tom Signed-off-by: Andrew Morton --- drivers/base/memory.c | 36 ++++++++++++++++++------------------ drivers/base/node.c | 10 ++++------ include/linux/memory.h | 3 +-- 3 files changed, 23 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/memory.c b/drivers/base/memory.c index fb212a889e65..6d84a02cfa5d 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -769,21 +769,22 @@ static struct zone *early_node_zone_for_memory_block(struct memory_block *mem, #ifdef CONFIG_NUMA /** - * memory_block_add_nid() - Indicate that system RAM falling into this memory - * block device (partially) belongs to the given node. + * memory_block_add_nid_early() - Indicate that early system RAM falling into + * this memory block device (partially) belongs + * to the given node. * @mem: The memory block device. * @nid: The node id. - * @context: The memory initialization context. * - * Indicate that system RAM falling into this memory block (partially) belongs - * to the given node. If the context indicates ("early") that we are adding the - * node during node device subsystem initialization, this will also properly - * set/adjust mem->zone based on the zone ranges of the given node. + * Indicate that early system RAM falling into this memory block (partially) + * belongs to the given node. This will also properly set/adjust mem->zone based + * on the zone ranges of the given node. + * + * Memory hotplug handles this on memory block creation, where we can only have + * a single nid span a memory block. */ -void memory_block_add_nid(struct memory_block *mem, int nid, - enum meminit_context context) +void memory_block_add_nid_early(struct memory_block *mem, int nid) { - if (context == MEMINIT_EARLY && mem->nid != nid) { + if (mem->nid != nid) { /* * For early memory we have to determine the zone when setting * the node id and handle multiple nodes spanning a single @@ -797,15 +798,14 @@ void memory_block_add_nid(struct memory_block *mem, int nid, mem->zone = early_node_zone_for_memory_block(mem, nid); else mem->zone = NULL; + /* + * If this memory block spans multiple nodes, we only indicate + * the last processed node. If we span multiple nodes (not applicable + * to hotplugged memory), zone == NULL will prohibit memory offlining + * and consequently unplug. + */ + mem->nid = nid; } - - /* - * If this memory block spans multiple nodes, we only indicate - * the last processed node. If we span multiple nodes (not applicable - * to hotplugged memory), zone == NULL will prohibit memory offlining - * and consequently unplug. - */ - mem->nid = nid; } #endif diff --git a/drivers/base/node.c b/drivers/base/node.c index 67b01d579737..6b6e55a98b79 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -781,13 +781,10 @@ int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) #ifdef CONFIG_MEMORY_HOTPLUG static void do_register_memory_block_under_node(int nid, - struct memory_block *mem_blk, - enum meminit_context context) + struct memory_block *mem_blk) { int ret; - memory_block_add_nid(mem_blk, nid, context); - ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj, &mem_blk->dev.kobj, kobject_name(&mem_blk->dev.kobj)); @@ -815,7 +812,7 @@ static int register_mem_block_under_node_hotplug(struct memory_block *mem_blk, { int nid = *(int *)arg; - do_register_memory_block_under_node(nid, mem_blk, MEMINIT_HOTPLUG); + do_register_memory_block_under_node(nid, mem_blk); return 0; } @@ -855,7 +852,8 @@ static void register_memory_blocks_under_nodes(void) if (!mem) continue; - do_register_memory_block_under_node(nid, mem, MEMINIT_EARLY); + memory_block_add_nid_early(mem, nid); + do_register_memory_block_under_node(nid, mem); put_device(&mem->dev); } diff --git a/include/linux/memory.h b/include/linux/memory.h index 4a29153e372e..43d378038ce2 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -202,8 +202,7 @@ static inline unsigned long phys_to_block_id(unsigned long phys) } #ifdef CONFIG_NUMA -void memory_block_add_nid(struct memory_block *mem, int nid, - enum meminit_context context); +void memory_block_add_nid_early(struct memory_block *mem, int nid); #endif /* CONFIG_NUMA */ int memory_block_advise_max_size(unsigned long size); unsigned long memory_block_advised_max_size(void); -- cgit v1.2.3 From b71a6e2a1b710ea31c363a72c75f510ef35d8e69 Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Thu, 2 Oct 2025 17:12:35 +0900 Subject: i2c: rename wait_for_completion callback to wait_for_completion_cb Functionally no change. Remove the ambiguity of 'wait_for_completion'. It helps development of the DEPT dependency tracker, but seems favorable in any case. Signed-off-by: Byungchul Park [wsa: reworded commit message] Signed-off-by: Wolfram Sang --- drivers/i2c/algos/i2c-algo-pca.c | 2 +- drivers/i2c/busses/i2c-pca-isa.c | 2 +- drivers/i2c/busses/i2c-pca-platform.c | 2 +- include/linux/i2c-algo-pca.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/i2c/algos/i2c-algo-pca.c b/drivers/i2c/algos/i2c-algo-pca.c index 74b66aec33d4..ee86df4cff4b 100644 --- a/drivers/i2c/algos/i2c-algo-pca.c +++ b/drivers/i2c/algos/i2c-algo-pca.c @@ -30,7 +30,7 @@ static int i2c_debug; #define pca_clock(adap) adap->i2c_clock #define pca_set_con(adap, val) pca_outw(adap, I2C_PCA_CON, val) #define pca_get_con(adap) pca_inw(adap, I2C_PCA_CON) -#define pca_wait(adap) adap->wait_for_completion(adap->data) +#define pca_wait(adap) adap->wait_for_completion_cb(adap->data) static void pca_reset(struct i2c_algo_pca_data *adap) { diff --git a/drivers/i2c/busses/i2c-pca-isa.c b/drivers/i2c/busses/i2c-pca-isa.c index 85e8cf58e8bf..0cbf2f509527 100644 --- a/drivers/i2c/busses/i2c-pca-isa.c +++ b/drivers/i2c/busses/i2c-pca-isa.c @@ -95,7 +95,7 @@ static struct i2c_algo_pca_data pca_isa_data = { /* .data intentionally left NULL, not needed with ISA */ .write_byte = pca_isa_writebyte, .read_byte = pca_isa_readbyte, - .wait_for_completion = pca_isa_waitforcompletion, + .wait_for_completion_cb = pca_isa_waitforcompletion, .reset_chip = pca_isa_resetchip, }; diff --git a/drivers/i2c/busses/i2c-pca-platform.c b/drivers/i2c/busses/i2c-pca-platform.c index 87da8241b927..c0f35ebbe37d 100644 --- a/drivers/i2c/busses/i2c-pca-platform.c +++ b/drivers/i2c/busses/i2c-pca-platform.c @@ -180,7 +180,7 @@ static int i2c_pca_pf_probe(struct platform_device *pdev) } i2c->algo_data.data = i2c; - i2c->algo_data.wait_for_completion = i2c_pca_pf_waitforcompletion; + i2c->algo_data.wait_for_completion_cb = i2c_pca_pf_waitforcompletion; if (i2c->gpio) i2c->algo_data.reset_chip = i2c_pca_pf_resetchip; else diff --git a/include/linux/i2c-algo-pca.h b/include/linux/i2c-algo-pca.h index 7c522fdd9ea7..e305bf32e40a 100644 --- a/include/linux/i2c-algo-pca.h +++ b/include/linux/i2c-algo-pca.h @@ -71,7 +71,7 @@ struct i2c_algo_pca_data { void *data; /* private low level data */ void (*write_byte) (void *data, int reg, int val); int (*read_byte) (void *data, int reg); - int (*wait_for_completion) (void *data); + int (*wait_for_completion_cb) (void *data); void (*reset_chip) (void *data); /* For PCA9564, use one of the predefined frequencies: * 330000, 288000, 217000, 146000, 88000, 59000, 44000, 36000 -- cgit v1.2.3 From 929bf010e0599ddef6b640cd314f1de65dd1ca3e Mon Sep 17 00:00:00 2001 From: Li Zhe Date: Thu, 14 Aug 2025 14:47:10 +0800 Subject: mm: introduce num_pages_contiguous() Let's add a simple helper for determining the number of contiguous pages that represent contiguous PFNs. In an ideal world, this helper would be simpler or not even required. Unfortunately, on some configs we still have to maintain (SPARSEMEM without VMEMMAP), the memmap is allocated per memory section, and we might run into weird corner cases of false positives when blindly testing for contiguous pages only. One example of such false positives would be a memory section-sized hole that does not have a memmap. The surrounding memory sections might get "struct pages" that are contiguous, but the PFNs are actually not. This helper will, for example, be useful for determining contiguous PFNs in a GUP result, to batch further operations across returned "struct page"s. VFIO will utilize this interface to accelerate the VFIO DMA map process. Implementation based on Linus' suggestions to avoid new usage of nth_page() where avoidable. Suggested-by: Linus Torvalds Suggested-by: Jason Gunthorpe Signed-off-by: Li Zhe Co-developed-by: David Hildenbrand Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20250814064714.56485-2-lizhe.67@bytedance.com Signed-off-by: Alex Williamson --- include/linux/mm.h | 7 ++++++- include/linux/mm_inline.h | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 06978b4dbeb8..f092ce3530bb 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1833,7 +1833,12 @@ static inline unsigned long memdesc_section(memdesc_flags_t mdf) { return (mdf.f >> SECTIONS_PGSHIFT) & SECTIONS_MASK; } -#endif +#else /* !SECTION_IN_PAGE_FLAGS */ +static inline unsigned long memdesc_section(memdesc_flags_t mdf) +{ + return 0; +} +#endif /* SECTION_IN_PAGE_FLAGS */ /** * folio_pfn - Return the Page Frame Number of a folio. diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index d6c1011b38f2..f6a2b2d20016 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -617,4 +617,40 @@ static inline bool vma_has_recency(const struct vm_area_struct *vma) return true; } +/** + * num_pages_contiguous() - determine the number of contiguous pages + * that represent contiguous PFNs + * @pages: an array of page pointers + * @nr_pages: length of the array, at least 1 + * + * Determine the number of contiguous pages that represent contiguous PFNs + * in @pages, starting from the first page. + * + * In some kernel configs contiguous PFNs will not have contiguous struct + * pages. In these configurations num_pages_contiguous() will return a num + * smaller than ideal number. The caller should continue to check for pfn + * contiguity after each call to num_pages_contiguous(). + * + * Returns the number of contiguous pages. + */ +static inline size_t num_pages_contiguous(struct page **pages, size_t nr_pages) +{ + struct page *cur_page = pages[0]; + unsigned long section = memdesc_section(cur_page->flags); + size_t i; + + for (i = 1; i < nr_pages; i++) { + if (++cur_page != pages[i]) + break; + /* + * In unproblematic kernel configs, page_to_section() == 0 and + * the whole check will get optimized out. + */ + if (memdesc_section(cur_page->flags) != section) + break; + } + + return i; +} + #endif -- cgit v1.2.3 From 95920c2ed02bde551ab654e9749c2ca7bc3100e0 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Tue, 30 Sep 2025 13:43:29 +0200 Subject: page_pool: Fix PP_MAGIC_MASK to avoid crashing on some 32-bit arches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Helge reported that the introduction of PP_MAGIC_MASK let to crashes on boot on his 32-bit parisc machine. The cause of this is the mask is set too wide, so the page_pool_page_is_pp() incurs false positives which crashes the machine. Just disabling the check in page_pool_is_pp() will lead to the page_pool code itself malfunctioning; so instead of doing this, this patch changes the define for PP_DMA_INDEX_BITS to avoid mistaking arbitrary kernel pointers for page_pool-tagged pages. The fix relies on the kernel pointers that alias with the pp_magic field always being above PAGE_OFFSET. With this assumption, we can use the lowest bit of the value of PAGE_OFFSET as the upper bound of the PP_DMA_INDEX_MASK, which should avoid the false positives. Because we cannot rely on PAGE_OFFSET always being a compile-time constant, nor on it always being >0, we fall back to disabling the dma_index storage when there are not enough bits available. This leaves us in the situation we were in before the patch in the Fixes tag, but only on a subset of architecture configurations. This seems to be the best we can do until the transition to page types in complete for page_pool pages. v2: - Make sure there's at least 8 bits available and that the PAGE_OFFSET bit calculation doesn't wrap Link: https://lore.kernel.org/all/aMNJMFa5fDalFmtn@p100/ Fixes: ee62ce7a1d90 ("page_pool: Track DMA-mapped pages and unmap them when destroying the pool") Cc: stable@vger.kernel.org # 6.15+ Tested-by: Helge Deller Signed-off-by: Toke Høiland-Jørgensen Reviewed-by: Mina Almasry Tested-by: Helge Deller Link: https://patch.msgid.link/20250930114331.675412-1-toke@redhat.com Signed-off-by: Jakub Kicinski --- include/linux/mm.h | 22 ++++++++------- net/core/page_pool.c | 76 ++++++++++++++++++++++++++++++++++++---------------- 2 files changed, 66 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 1ae97a0b8ec7..0905eb6b55ec 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4159,14 +4159,13 @@ int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status); * since this value becomes part of PP_SIGNATURE; meaning we can just use the * space between the PP_SIGNATURE value (without POISON_POINTER_DELTA), and the * lowest bits of POISON_POINTER_DELTA. On arches where POISON_POINTER_DELTA is - * 0, we make sure that we leave the two topmost bits empty, as that guarantees - * we won't mistake a valid kernel pointer for a value we set, regardless of the - * VMSPLIT setting. + * 0, we use the lowest bit of PAGE_OFFSET as the boundary if that value is + * known at compile-time. * - * Altogether, this means that the number of bits available is constrained by - * the size of an unsigned long (at the upper end, subtracting two bits per the - * above), and the definition of PP_SIGNATURE (with or without - * POISON_POINTER_DELTA). + * If the value of PAGE_OFFSET is not known at compile time, or if it is too + * small to leave at least 8 bits available above PP_SIGNATURE, we define the + * number of bits to be 0, which turns off the DMA index tracking altogether + * (see page_pool_register_dma_index()). */ #define PP_DMA_INDEX_SHIFT (1 + __fls(PP_SIGNATURE - POISON_POINTER_DELTA)) #if POISON_POINTER_DELTA > 0 @@ -4175,8 +4174,13 @@ int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status); */ #define PP_DMA_INDEX_BITS MIN(32, __ffs(POISON_POINTER_DELTA) - PP_DMA_INDEX_SHIFT) #else -/* Always leave out the topmost two; see above. */ -#define PP_DMA_INDEX_BITS MIN(32, BITS_PER_LONG - PP_DMA_INDEX_SHIFT - 2) +/* Use the lowest bit of PAGE_OFFSET if there's at least 8 bits available; see above */ +#define PP_DMA_INDEX_MIN_OFFSET (1 << (PP_DMA_INDEX_SHIFT + 8)) +#define PP_DMA_INDEX_BITS ((__builtin_constant_p(PAGE_OFFSET) && \ + PAGE_OFFSET >= PP_DMA_INDEX_MIN_OFFSET && \ + !(PAGE_OFFSET & (PP_DMA_INDEX_MIN_OFFSET - 1))) ? \ + MIN(32, __ffs(PAGE_OFFSET) - PP_DMA_INDEX_SHIFT) : 0) + #endif #define PP_DMA_INDEX_MASK GENMASK(PP_DMA_INDEX_BITS + PP_DMA_INDEX_SHIFT - 1, \ diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 492728f9e021..1a5edec485f1 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -468,11 +468,60 @@ page_pool_dma_sync_for_device(const struct page_pool *pool, } } +static int page_pool_register_dma_index(struct page_pool *pool, + netmem_ref netmem, gfp_t gfp) +{ + int err = 0; + u32 id; + + if (unlikely(!PP_DMA_INDEX_BITS)) + goto out; + + if (in_softirq()) + err = xa_alloc(&pool->dma_mapped, &id, netmem_to_page(netmem), + PP_DMA_INDEX_LIMIT, gfp); + else + err = xa_alloc_bh(&pool->dma_mapped, &id, netmem_to_page(netmem), + PP_DMA_INDEX_LIMIT, gfp); + if (err) { + WARN_ONCE(err != -ENOMEM, "couldn't track DMA mapping, please report to netdev@"); + goto out; + } + + netmem_set_dma_index(netmem, id); +out: + return err; +} + +static int page_pool_release_dma_index(struct page_pool *pool, + netmem_ref netmem) +{ + struct page *old, *page = netmem_to_page(netmem); + unsigned long id; + + if (unlikely(!PP_DMA_INDEX_BITS)) + return 0; + + id = netmem_get_dma_index(netmem); + if (!id) + return -1; + + if (in_softirq()) + old = xa_cmpxchg(&pool->dma_mapped, id, page, NULL, 0); + else + old = xa_cmpxchg_bh(&pool->dma_mapped, id, page, NULL, 0); + if (old != page) + return -1; + + netmem_set_dma_index(netmem, 0); + + return 0; +} + static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem, gfp_t gfp) { dma_addr_t dma; int err; - u32 id; /* Setup DMA mapping: use 'struct page' area for storing DMA-addr * since dma_addr_t can be either 32 or 64 bits and does not always fit @@ -491,18 +540,10 @@ static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem, gfp_t g goto unmap_failed; } - if (in_softirq()) - err = xa_alloc(&pool->dma_mapped, &id, netmem_to_page(netmem), - PP_DMA_INDEX_LIMIT, gfp); - else - err = xa_alloc_bh(&pool->dma_mapped, &id, netmem_to_page(netmem), - PP_DMA_INDEX_LIMIT, gfp); - if (err) { - WARN_ONCE(err != -ENOMEM, "couldn't track DMA mapping, please report to netdev@"); + err = page_pool_register_dma_index(pool, netmem, gfp); + if (err) goto unset_failed; - } - netmem_set_dma_index(netmem, id); page_pool_dma_sync_for_device(pool, netmem, pool->p.max_len); return true; @@ -680,8 +721,6 @@ void page_pool_clear_pp_info(netmem_ref netmem) static __always_inline void __page_pool_release_netmem_dma(struct page_pool *pool, netmem_ref netmem) { - struct page *old, *page = netmem_to_page(netmem); - unsigned long id; dma_addr_t dma; if (!pool->dma_map) @@ -690,15 +729,7 @@ static __always_inline void __page_pool_release_netmem_dma(struct page_pool *poo */ return; - id = netmem_get_dma_index(netmem); - if (!id) - return; - - if (in_softirq()) - old = xa_cmpxchg(&pool->dma_mapped, id, page, NULL, 0); - else - old = xa_cmpxchg_bh(&pool->dma_mapped, id, page, NULL, 0); - if (old != page) + if (page_pool_release_dma_index(pool, netmem)) return; dma = page_pool_get_dma_addr_netmem(netmem); @@ -708,7 +739,6 @@ static __always_inline void __page_pool_release_netmem_dma(struct page_pool *poo PAGE_SIZE << pool->p.order, pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); page_pool_set_dma_addr_netmem(netmem, 0); - netmem_set_dma_index(netmem, 0); } /* Disconnects a page (from a page_pool). API users can have a need -- cgit v1.2.3 From 1ed06c83506ecaaf1836ddeb7c65772ff86d8d53 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 7 Oct 2025 11:06:25 +0200 Subject: block: remove bio_iov_iter_get_pages Switch the only caller to bio_iov_iter_get_pages, and explain why it does not have any alignment requirements. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Reviewed-by: Keith Busch Reviewed-by: Qu Wenruo Signed-off-by: Jens Axboe --- block/blk-map.c | 6 +++++- include/linux/bio.h | 5 ----- 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/block/blk-map.c b/block/blk-map.c index 165f2234f00f..6cce652c7fa6 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -283,7 +283,11 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, bio = blk_rq_map_bio_alloc(rq, nr_vecs, gfp_mask); if (!bio) return -ENOMEM; - ret = bio_iov_iter_get_pages(bio, iter); + /* + * No alignment requirements on our part to support arbitrary + * passthrough commands. + */ + ret = bio_iov_iter_get_pages_aligned(bio, iter, 0); if (ret) goto out_put; ret = blk_rq_append_bio(rq, bio); diff --git a/include/linux/bio.h b/include/linux/bio.h index a64a30131031..b01dae9506de 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -449,11 +449,6 @@ int bdev_rw_virt(struct block_device *bdev, sector_t sector, void *data, int bio_iov_iter_get_pages_aligned(struct bio *bio, struct iov_iter *iter, unsigned len_align_mask); -static inline int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) -{ - return bio_iov_iter_get_pages_aligned(bio, iter, 0); -} - void bio_iov_bvec_set(struct bio *bio, const struct iov_iter *iter); void __bio_release_pages(struct bio *bio, bool mark_dirty); extern void bio_set_pages_dirty(struct bio *bio); -- cgit v1.2.3 From 82dd5d763c9b718e2d655b9565e0a06a91bb83dc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 7 Oct 2025 11:06:26 +0200 Subject: block: rename bio_iov_iter_get_pages_aligned to bio_iov_iter_get_pages Now that the bio_iov_iter_get_pages is free again, use it instead of the more complicated now. Also drop the unused export. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Reviewed-by: Keith Busch Reviewed-by: Qu Wenruo Signed-off-by: Jens Axboe --- block/bio.c | 5 ++--- block/blk-map.c | 2 +- include/linux/bio.h | 2 +- include/linux/blkdev.h | 2 +- 4 files changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/block/bio.c b/block/bio.c index 3a1a848940dd..b3a79285c278 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1316,7 +1316,7 @@ static int bio_iov_iter_align_down(struct bio *bio, struct iov_iter *iter, } /** - * bio_iov_iter_get_pages_aligned - add user or kernel pages to a bio + * bio_iov_iter_get_pages - add user or kernel pages to a bio * @bio: bio to add pages to * @iter: iov iterator describing the region to be added * @len_align_mask: the mask to align the total size to, 0 for any length @@ -1336,7 +1336,7 @@ static int bio_iov_iter_align_down(struct bio *bio, struct iov_iter *iter, * MM encounters an error pinning the requested pages, it stops. Error * is returned only if 0 pages could be pinned. */ -int bio_iov_iter_get_pages_aligned(struct bio *bio, struct iov_iter *iter, +int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter, unsigned len_align_mask) { int ret = 0; @@ -1360,7 +1360,6 @@ int bio_iov_iter_get_pages_aligned(struct bio *bio, struct iov_iter *iter, return bio_iov_iter_align_down(bio, iter, len_align_mask); return ret; } -EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages_aligned); static void submit_bio_wait_endio(struct bio *bio) { diff --git a/block/blk-map.c b/block/blk-map.c index 6cce652c7fa6..60faf036fb6e 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -287,7 +287,7 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, * No alignment requirements on our part to support arbitrary * passthrough commands. */ - ret = bio_iov_iter_get_pages_aligned(bio, iter, 0); + ret = bio_iov_iter_get_pages(bio, iter, 0); if (ret) goto out_put; ret = blk_rq_append_bio(rq, bio); diff --git a/include/linux/bio.h b/include/linux/bio.h index b01dae9506de..16c1c85613b7 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -446,7 +446,7 @@ int submit_bio_wait(struct bio *bio); int bdev_rw_virt(struct block_device *bdev, sector_t sector, void *data, size_t len, enum req_op op); -int bio_iov_iter_get_pages_aligned(struct bio *bio, struct iov_iter *iter, +int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter, unsigned len_align_mask); void bio_iov_bvec_set(struct bio *bio, const struct iov_iter *iter); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 066e5309bd45..c97e8b0e67b6 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1876,7 +1876,7 @@ static inline int bio_split_rw_at(struct bio *bio, static inline int bio_iov_iter_get_bdev_pages(struct bio *bio, struct iov_iter *iter, struct block_device *bdev) { - return bio_iov_iter_get_pages_aligned(bio, iter, + return bio_iov_iter_get_pages(bio, iter, bdev_logical_block_size(bdev) - 1); } -- cgit v1.2.3 From 506aa235f6e0baa00bf792df82a5e9f618b7a5d8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 7 Oct 2025 11:06:28 +0200 Subject: block: move bio_iov_iter_get_bdev_pages to block/fops.c Keep bio_iov_iter_get_bdev_pages local with the callers, as blindly looking at the bdev logical block size is often not the best idea unless on a block device. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Reviewed-by: Keith Busch Reviewed-by: Qu Wenruo Signed-off-by: Jens Axboe --- block/fops.c | 13 ++++++++++--- include/linux/blkdev.h | 7 ------- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/block/fops.c b/block/fops.c index c2c0396ea9ee..5e3db9fead77 100644 --- a/block/fops.c +++ b/block/fops.c @@ -43,6 +43,13 @@ static bool blkdev_dio_invalid(struct block_device *bdev, struct kiocb *iocb, (bdev_logical_block_size(bdev) - 1); } +static inline int blkdev_iov_iter_get_pages(struct bio *bio, + struct iov_iter *iter, struct block_device *bdev) +{ + return bio_iov_iter_get_pages(bio, iter, + bdev_logical_block_size(bdev) - 1); +} + #define DIO_INLINE_BIO_VECS 4 static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, @@ -78,7 +85,7 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, if (iocb->ki_flags & IOCB_ATOMIC) bio.bi_opf |= REQ_ATOMIC; - ret = bio_iov_iter_get_bdev_pages(&bio, iter, bdev); + ret = blkdev_iov_iter_get_pages(&bio, iter, bdev); if (unlikely(ret)) goto out; ret = bio.bi_iter.bi_size; @@ -212,7 +219,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, bio->bi_end_io = blkdev_bio_end_io; bio->bi_ioprio = iocb->ki_ioprio; - ret = bio_iov_iter_get_bdev_pages(bio, iter, bdev); + ret = blkdev_iov_iter_get_pages(bio, iter, bdev); if (unlikely(ret)) { bio->bi_status = BLK_STS_IOERR; bio_endio(bio); @@ -348,7 +355,7 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb, */ bio_iov_bvec_set(bio, iter); } else { - ret = bio_iov_iter_get_bdev_pages(bio, iter, bdev); + ret = blkdev_iov_iter_get_pages(bio, iter, bdev); if (unlikely(ret)) goto out_bio_put; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c97e8b0e67b6..d4db5039836d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1873,13 +1873,6 @@ static inline int bio_split_rw_at(struct bio *bio, return bio_split_io_at(bio, lim, segs, max_bytes, lim->dma_alignment); } -static inline int bio_iov_iter_get_bdev_pages(struct bio *bio, - struct iov_iter *iter, struct block_device *bdev) -{ - return bio_iov_iter_get_pages(bio, iter, - bdev_logical_block_size(bdev) - 1); -} - #define DEFINE_IO_COMP_BATCH(name) struct io_comp_batch name = { } #endif /* _LINUX_BLKDEV_H */ -- cgit v1.2.3 From 8375b76517cb52bac0903071feedc218c45d74d2 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 21 Sep 2025 08:44:56 +0300 Subject: kho: replace kho_preserve_phys() with kho_preserve_pages() to make it clear that KHO operates on pages rather than on a random physical address. The kho_preserve_pages() will be also used in upcoming support for vmalloc preservation. Link: https://lkml.kernel.org/r/20250921054458.4043761-3-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Reviewed-by: Pratyush Yadav Reviewed-by: Jason Gunthorpe Cc: Alexander Graf Cc: Baoquan He Cc: Changyuan Lyu Cc: Chris Li Cc: Pasha Tatashin Signed-off-by: Andrew Morton --- include/linux/kexec_handover.h | 5 +++-- kernel/kexec_handover.c | 25 +++++++++++-------------- mm/memblock.c | 4 +++- 3 files changed, 17 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h index 559d13a3bc44..cec663b39861 100644 --- a/include/linux/kexec_handover.h +++ b/include/linux/kexec_handover.h @@ -18,6 +18,7 @@ enum kho_event { struct folio; struct notifier_block; +struct page; #define DECLARE_KHOSER_PTR(name, type) \ union { \ @@ -43,7 +44,7 @@ bool kho_is_enabled(void); bool is_kho_boot(void); int kho_preserve_folio(struct folio *folio); -int kho_preserve_phys(phys_addr_t phys, size_t size); +int kho_preserve_pages(struct page *page, unsigned int nr_pages); struct folio *kho_restore_folio(phys_addr_t phys); int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt); int kho_retrieve_subtree(const char *name, phys_addr_t *phys); @@ -71,7 +72,7 @@ static inline int kho_preserve_folio(struct folio *folio) return -EOPNOTSUPP; } -static inline int kho_preserve_phys(phys_addr_t phys, size_t size) +static inline int kho_preserve_pages(struct page *page, unsigned int nr_pages) { return -EOPNOTSUPP; } diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c index b8d0d63f6145..1c44a55f758e 100644 --- a/kernel/kexec_handover.c +++ b/kernel/kexec_handover.c @@ -725,26 +725,23 @@ int kho_preserve_folio(struct folio *folio) EXPORT_SYMBOL_GPL(kho_preserve_folio); /** - * kho_preserve_phys - preserve a physically contiguous range across kexec. - * @phys: physical address of the range. - * @size: size of the range. + * kho_preserve_pages - preserve contiguous pages across kexec + * @page: first page in the list. + * @nr_pages: number of pages. * - * Instructs KHO to preserve the memory range from @phys to @phys + @size - * across kexec. + * Preserve a contiguous list of order 0 pages. Must be restored using + * kho_restore_pages() to ensure the pages are restored properly as order 0. * * Return: 0 on success, error code on failure */ -int kho_preserve_phys(phys_addr_t phys, size_t size) +int kho_preserve_pages(struct page *page, unsigned int nr_pages) { - unsigned long pfn = PHYS_PFN(phys); + struct kho_mem_track *track = &kho_out.ser.track; + const unsigned long start_pfn = page_to_pfn(page); + const unsigned long end_pfn = start_pfn + nr_pages; + unsigned long pfn = start_pfn; unsigned long failed_pfn = 0; - const unsigned long start_pfn = pfn; - const unsigned long end_pfn = PHYS_PFN(phys + size); int err = 0; - struct kho_mem_track *track = &kho_out.ser.track; - - if (!PAGE_ALIGNED(phys) || !PAGE_ALIGNED(size)) - return -EINVAL; while (pfn < end_pfn) { const unsigned int order = @@ -764,7 +761,7 @@ int kho_preserve_phys(phys_addr_t phys, size_t size) return err; } -EXPORT_SYMBOL_GPL(kho_preserve_phys); +EXPORT_SYMBOL_GPL(kho_preserve_pages); /* Handling for debug/kho/out */ diff --git a/mm/memblock.c b/mm/memblock.c index 120a501a887a..e23e16618e9b 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -2452,8 +2452,10 @@ static int reserve_mem_kho_finalize(struct kho_serialization *ser) for (i = 0; i < reserved_mem_count; i++) { struct reserve_mem_table *map = &reserved_mem_table[i]; + struct page *page = phys_to_page(map->start); + unsigned int nr_pages = map->size >> PAGE_SHIFT; - err |= kho_preserve_phys(map->start, map->size); + err |= kho_preserve_pages(page, nr_pages); } err |= kho_preserve_folio(page_folio(kho_fdt)); -- cgit v1.2.3 From a667300bd53f272a3055238bcefe108f88836270 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 21 Sep 2025 08:44:57 +0300 Subject: kho: add support for preserving vmalloc allocations A vmalloc allocation is preserved using binary structure similar to global KHO memory tracker. It's a linked list of pages where each page is an array of physical address of pages in vmalloc area. kho_preserve_vmalloc() hands out the physical address of the head page to the caller. This address is used as the argument to kho_vmalloc_restore() to restore the mapping in the vmalloc address space and populate it with the preserved pages. [pasha.tatashin@soleen.com: free chunks using free_page() not kfree()] Link: https://lkml.kernel.org/r/mafs0a52idbeg.fsf@kernel.org [akpm@linux-foundation.org: coding-style cleanups] Link: https://lkml.kernel.org/r/20250921054458.4043761-4-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Reviewed-by: Pratyush Yadav Cc: Alexander Graf Cc: Baoquan He Cc: Changyuan Lyu Cc: Chris Li Cc: Jason Gunthorpe Signed-off-by: Andrew Morton --- include/linux/kexec_handover.h | 28 ++++ kernel/kexec_handover.c | 281 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 309 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h index cec663b39861..25042c1d8d54 100644 --- a/include/linux/kexec_handover.h +++ b/include/linux/kexec_handover.h @@ -39,13 +39,24 @@ struct page; struct kho_serialization; +struct kho_vmalloc_chunk; +struct kho_vmalloc { + DECLARE_KHOSER_PTR(first, struct kho_vmalloc_chunk *); + unsigned int total_pages; + unsigned short flags; + unsigned short order; +}; + #ifdef CONFIG_KEXEC_HANDOVER bool kho_is_enabled(void); bool is_kho_boot(void); int kho_preserve_folio(struct folio *folio); int kho_preserve_pages(struct page *page, unsigned int nr_pages); +int kho_preserve_vmalloc(void *ptr, struct kho_vmalloc *preservation); struct folio *kho_restore_folio(phys_addr_t phys); +struct page *kho_restore_pages(phys_addr_t phys, unsigned int nr_pages); +void *kho_restore_vmalloc(const struct kho_vmalloc *preservation); int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt); int kho_retrieve_subtree(const char *name, phys_addr_t *phys); @@ -77,11 +88,28 @@ static inline int kho_preserve_pages(struct page *page, unsigned int nr_pages) return -EOPNOTSUPP; } +static inline int kho_preserve_vmalloc(void *ptr, + struct kho_vmalloc *preservation) +{ + return -EOPNOTSUPP; +} + static inline struct folio *kho_restore_folio(phys_addr_t phys) { return NULL; } +static inline struct page *kho_restore_pages(phys_addr_t phys, + unsigned int nr_pages) +{ + return NULL; +} + +static inline void *kho_restore_vmalloc(const struct kho_vmalloc *preservation) +{ + return NULL; +} + static inline int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt) { diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c index 1c44a55f758e..76f0940fb485 100644 --- a/kernel/kexec_handover.c +++ b/kernel/kexec_handover.c @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -274,6 +275,37 @@ struct folio *kho_restore_folio(phys_addr_t phys) } EXPORT_SYMBOL_GPL(kho_restore_folio); +/** + * kho_restore_pages - restore list of contiguous order 0 pages. + * @phys: physical address of the first page. + * @nr_pages: number of pages. + * + * Restore a contiguous list of order 0 pages that was preserved with + * kho_preserve_pages(). + * + * Return: 0 on success, error code on failure + */ +struct page *kho_restore_pages(phys_addr_t phys, unsigned int nr_pages) +{ + const unsigned long start_pfn = PHYS_PFN(phys); + const unsigned long end_pfn = start_pfn + nr_pages; + unsigned long pfn = start_pfn; + + while (pfn < end_pfn) { + const unsigned int order = + min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn)); + struct page *page = kho_restore_page(PFN_PHYS(pfn)); + + if (!page) + return NULL; + split_page(page, order); + pfn += 1 << order; + } + + return pfn_to_page(start_pfn); +} +EXPORT_SYMBOL_GPL(kho_restore_pages); + /* Serialize and deserialize struct kho_mem_phys across kexec * * Record all the bitmaps in a linked list of pages for the next kernel to @@ -763,6 +795,255 @@ int kho_preserve_pages(struct page *page, unsigned int nr_pages) } EXPORT_SYMBOL_GPL(kho_preserve_pages); +struct kho_vmalloc_hdr { + DECLARE_KHOSER_PTR(next, struct kho_vmalloc_chunk *); +}; + +#define KHO_VMALLOC_SIZE \ + ((PAGE_SIZE - sizeof(struct kho_vmalloc_hdr)) / \ + sizeof(phys_addr_t)) + +struct kho_vmalloc_chunk { + struct kho_vmalloc_hdr hdr; + phys_addr_t phys[KHO_VMALLOC_SIZE]; +}; + +static_assert(sizeof(struct kho_vmalloc_chunk) == PAGE_SIZE); + +/* vmalloc flags KHO supports */ +#define KHO_VMALLOC_SUPPORTED_FLAGS (VM_ALLOC | VM_ALLOW_HUGE_VMAP) + +/* KHO internal flags for vmalloc preservations */ +#define KHO_VMALLOC_ALLOC 0x0001 +#define KHO_VMALLOC_HUGE_VMAP 0x0002 + +static unsigned short vmalloc_flags_to_kho(unsigned int vm_flags) +{ + unsigned short kho_flags = 0; + + if (vm_flags & VM_ALLOC) + kho_flags |= KHO_VMALLOC_ALLOC; + if (vm_flags & VM_ALLOW_HUGE_VMAP) + kho_flags |= KHO_VMALLOC_HUGE_VMAP; + + return kho_flags; +} + +static unsigned int kho_flags_to_vmalloc(unsigned short kho_flags) +{ + unsigned int vm_flags = 0; + + if (kho_flags & KHO_VMALLOC_ALLOC) + vm_flags |= VM_ALLOC; + if (kho_flags & KHO_VMALLOC_HUGE_VMAP) + vm_flags |= VM_ALLOW_HUGE_VMAP; + + return vm_flags; +} + +static struct kho_vmalloc_chunk *new_vmalloc_chunk(struct kho_vmalloc_chunk *cur) +{ + struct kho_vmalloc_chunk *chunk; + int err; + + chunk = (struct kho_vmalloc_chunk *)get_zeroed_page(GFP_KERNEL); + if (!chunk) + return NULL; + + err = kho_preserve_pages(virt_to_page(chunk), 1); + if (err) + goto err_free; + if (cur) + KHOSER_STORE_PTR(cur->hdr.next, chunk); + return chunk; + +err_free: + free_page((unsigned long)chunk); + return NULL; +} + +static void kho_vmalloc_unpreserve_chunk(struct kho_vmalloc_chunk *chunk) +{ + struct kho_mem_track *track = &kho_out.ser.track; + unsigned long pfn = PHYS_PFN(virt_to_phys(chunk)); + + __kho_unpreserve(track, pfn, pfn + 1); + + for (int i = 0; chunk->phys[i]; i++) { + pfn = PHYS_PFN(chunk->phys[i]); + __kho_unpreserve(track, pfn, pfn + 1); + } +} + +static void kho_vmalloc_free_chunks(struct kho_vmalloc *kho_vmalloc) +{ + struct kho_vmalloc_chunk *chunk = KHOSER_LOAD_PTR(kho_vmalloc->first); + + while (chunk) { + struct kho_vmalloc_chunk *tmp = chunk; + + kho_vmalloc_unpreserve_chunk(chunk); + + chunk = KHOSER_LOAD_PTR(chunk->hdr.next); + free_page((unsigned long)tmp); + } +} + +/** + * kho_preserve_vmalloc - preserve memory allocated with vmalloc() across kexec + * @ptr: pointer to the area in vmalloc address space + * @preservation: placeholder for preservation metadata + * + * Instructs KHO to preserve the area in vmalloc address space at @ptr. The + * physical pages mapped at @ptr will be preserved and on successful return + * @preservation will hold the physical address of a structure that describes + * the preservation. + * + * NOTE: The memory allocated with vmalloc_node() variants cannot be reliably + * restored on the same node + * + * Return: 0 on success, error code on failure + */ +int kho_preserve_vmalloc(void *ptr, struct kho_vmalloc *preservation) +{ + struct kho_vmalloc_chunk *chunk; + struct vm_struct *vm = find_vm_area(ptr); + unsigned int order, flags, nr_contig_pages; + unsigned int idx = 0; + int err; + + if (!vm) + return -EINVAL; + + if (vm->flags & ~KHO_VMALLOC_SUPPORTED_FLAGS) + return -EOPNOTSUPP; + + flags = vmalloc_flags_to_kho(vm->flags); + order = get_vm_area_page_order(vm); + + chunk = new_vmalloc_chunk(NULL); + if (!chunk) + return -ENOMEM; + KHOSER_STORE_PTR(preservation->first, chunk); + + nr_contig_pages = (1 << order); + for (int i = 0; i < vm->nr_pages; i += nr_contig_pages) { + phys_addr_t phys = page_to_phys(vm->pages[i]); + + err = kho_preserve_pages(vm->pages[i], nr_contig_pages); + if (err) + goto err_free; + + chunk->phys[idx++] = phys; + if (idx == ARRAY_SIZE(chunk->phys)) { + chunk = new_vmalloc_chunk(chunk); + if (!chunk) + goto err_free; + idx = 0; + } + } + + preservation->total_pages = vm->nr_pages; + preservation->flags = flags; + preservation->order = order; + + return 0; + +err_free: + kho_vmalloc_free_chunks(preservation); + return err; +} +EXPORT_SYMBOL_GPL(kho_preserve_vmalloc); + +/** + * kho_restore_vmalloc - recreates and populates an area in vmalloc address + * space from the preserved memory. + * @preservation: preservation metadata. + * + * Recreates an area in vmalloc address space and populates it with memory that + * was preserved using kho_preserve_vmalloc(). + * + * Return: pointer to the area in the vmalloc address space, NULL on failure. + */ +void *kho_restore_vmalloc(const struct kho_vmalloc *preservation) +{ + struct kho_vmalloc_chunk *chunk = KHOSER_LOAD_PTR(preservation->first); + unsigned int align, order, shift, vm_flags; + unsigned long total_pages, contig_pages; + unsigned long addr, size; + struct vm_struct *area; + struct page **pages; + unsigned int idx = 0; + int err; + + vm_flags = kho_flags_to_vmalloc(preservation->flags); + if (vm_flags & ~KHO_VMALLOC_SUPPORTED_FLAGS) + return NULL; + + total_pages = preservation->total_pages; + pages = kvmalloc_array(total_pages, sizeof(*pages), GFP_KERNEL); + if (!pages) + return NULL; + order = preservation->order; + contig_pages = (1 << order); + shift = PAGE_SHIFT + order; + align = 1 << shift; + + while (chunk) { + struct page *page; + + for (int i = 0; chunk->phys[i]; i++) { + phys_addr_t phys = chunk->phys[i]; + + if (idx + contig_pages > total_pages) + goto err_free_pages_array; + + page = kho_restore_pages(phys, contig_pages); + if (!page) + goto err_free_pages_array; + + for (int j = 0; j < contig_pages; j++) + pages[idx++] = page; + + phys += contig_pages * PAGE_SIZE; + } + + page = kho_restore_pages(virt_to_phys(chunk), 1); + if (!page) + goto err_free_pages_array; + chunk = KHOSER_LOAD_PTR(chunk->hdr.next); + __free_page(page); + } + + if (idx != total_pages) + goto err_free_pages_array; + + area = __get_vm_area_node(total_pages * PAGE_SIZE, align, shift, + vm_flags, VMALLOC_START, VMALLOC_END, + NUMA_NO_NODE, GFP_KERNEL, + __builtin_return_address(0)); + if (!area) + goto err_free_pages_array; + + addr = (unsigned long)area->addr; + size = get_vm_area_size(area); + err = vmap_pages_range(addr, addr + size, PAGE_KERNEL, pages, shift); + if (err) + goto err_free_vm_area; + + area->nr_pages = total_pages; + area->pages = pages; + + return area->addr; + +err_free_vm_area: + free_vm_area(area); +err_free_pages_array: + kvfree(pages); + return NULL; +} +EXPORT_SYMBOL_GPL(kho_restore_vmalloc); + /* Handling for debug/kho/out */ static struct dentry *debugfs_root; -- cgit v1.2.3 From fcc0669c5aa681994c507b50f1c706c969d99730 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Mon, 22 Sep 2025 15:02:03 -0700 Subject: memcg: skip cgroup_file_notify if spinning is not allowed Generally memcg charging is allowed from all the contexts including NMI where even spinning on spinlock can cause locking issues. However one call chain was missed during the addition of memcg charging from any context support. That is try_charge_memcg() -> memcg_memory_event() -> cgroup_file_notify(). The possible function call tree under cgroup_file_notify() can acquire many different spin locks in spinning mode. Some of them are cgroup_file_kn_lock, kernfs_notify_lock, pool_workqeue's lock. So, let's just skip cgroup_file_notify() from memcg charging if the context does not allow spinning. Alternative approach was also explored where instead of skipping cgroup_file_notify(), we defer the memcg event processing to irq_work [1]. However it adds complexity and it was decided to keep things simple until we need more memcg events with !allow_spinning requirement. Link: https://lore.kernel.org/all/5qi2llyzf7gklncflo6gxoozljbm4h3tpnuv4u4ej4ztysvi6f@x44v7nz2wdzd/ [1] Link: https://lkml.kernel.org/r/20250922220203.261714-1-shakeel.butt@linux.dev Fixes: 3ac4638a734a ("memcg: make memcg_rstat_updated nmi safe") Signed-off-by: Shakeel Butt Acked-by: Michal Hocko Closes: https://lore.kernel.org/all/20250905061919.439648-1-yepeilin@google.com/ Cc: Alexei Starovoitov Cc: Johannes Weiner Cc: Kumar Kartikeya Dwivedi Cc: Muchun Song Cc: Peilin Ye Cc: Roman Gushchin Cc: Tejun Heo Cc: Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 26 +++++++++++++++++++------- mm/memcontrol.c | 7 ++++--- 2 files changed, 23 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 16fe0306e50e..873e510d6f8d 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1001,22 +1001,28 @@ static inline void count_memcg_event_mm(struct mm_struct *mm, count_memcg_events_mm(mm, idx, 1); } -static inline void memcg_memory_event(struct mem_cgroup *memcg, - enum memcg_memory_event event) +static inline void __memcg_memory_event(struct mem_cgroup *memcg, + enum memcg_memory_event event, + bool allow_spinning) { bool swap_event = event == MEMCG_SWAP_HIGH || event == MEMCG_SWAP_MAX || event == MEMCG_SWAP_FAIL; + /* For now only MEMCG_MAX can happen with !allow_spinning context. */ + VM_WARN_ON_ONCE(!allow_spinning && event != MEMCG_MAX); + atomic_long_inc(&memcg->memory_events_local[event]); - if (!swap_event) + if (!swap_event && allow_spinning) cgroup_file_notify(&memcg->events_local_file); do { atomic_long_inc(&memcg->memory_events[event]); - if (swap_event) - cgroup_file_notify(&memcg->swap_events_file); - else - cgroup_file_notify(&memcg->events_file); + if (allow_spinning) { + if (swap_event) + cgroup_file_notify(&memcg->swap_events_file); + else + cgroup_file_notify(&memcg->events_file); + } if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) break; @@ -1026,6 +1032,12 @@ static inline void memcg_memory_event(struct mem_cgroup *memcg, !mem_cgroup_is_root(memcg)); } +static inline void memcg_memory_event(struct mem_cgroup *memcg, + enum memcg_memory_event event) +{ + __memcg_memory_event(memcg, event, true); +} + static inline void memcg_memory_event_mm(struct mm_struct *mm, enum memcg_memory_event event) { diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e090f29eb03b..4deda33625f4 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2307,12 +2307,13 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask, bool drained = false; bool raised_max_event = false; unsigned long pflags; + bool allow_spinning = gfpflags_allow_spinning(gfp_mask); retry: if (consume_stock(memcg, nr_pages)) return 0; - if (!gfpflags_allow_spinning(gfp_mask)) + if (!allow_spinning) /* Avoid the refill and flush of the older stock */ batch = nr_pages; @@ -2348,7 +2349,7 @@ retry: if (!gfpflags_allow_blocking(gfp_mask)) goto nomem; - memcg_memory_event(mem_over_limit, MEMCG_MAX); + __memcg_memory_event(mem_over_limit, MEMCG_MAX, allow_spinning); raised_max_event = true; psi_memstall_enter(&pflags); @@ -2415,7 +2416,7 @@ force: * a MEMCG_MAX event. */ if (!raised_max_event) - memcg_memory_event(mem_over_limit, MEMCG_MAX); + __memcg_memory_event(mem_over_limit, MEMCG_MAX, allow_spinning); /* * The allocation either can't fail or will lead to more memory -- cgit v1.2.3 From f04aad36a07cc17b7a5d5b9a2d386ce6fae63e93 Mon Sep 17 00:00:00 2001 From: Jakub Acs Date: Wed, 1 Oct 2025 09:03:52 +0000 Subject: mm/ksm: fix flag-dropping behavior in ksm_madvise syzkaller discovered the following crash: (kernel BUG) [ 44.607039] ------------[ cut here ]------------ [ 44.607422] kernel BUG at mm/userfaultfd.c:2067! [ 44.608148] Oops: invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN NOPTI [ 44.608814] CPU: 1 UID: 0 PID: 2475 Comm: reproducer Not tainted 6.16.0-rc6 #1 PREEMPT(none) [ 44.609635] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 [ 44.610695] RIP: 0010:userfaultfd_release_all+0x3a8/0x460 [ 44.617726] Call Trace: [ 44.617926] [ 44.619284] userfaultfd_release+0xef/0x1b0 [ 44.620976] __fput+0x3f9/0xb60 [ 44.621240] fput_close_sync+0x110/0x210 [ 44.622222] __x64_sys_close+0x8f/0x120 [ 44.622530] do_syscall_64+0x5b/0x2f0 [ 44.622840] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 44.623244] RIP: 0033:0x7f365bb3f227 Kernel panics because it detects UFFD inconsistency during userfaultfd_release_all(). Specifically, a VMA which has a valid pointer to vma->vm_userfaultfd_ctx, but no UFFD flags in vma->vm_flags. The inconsistency is caused in ksm_madvise(): when user calls madvise() with MADV_UNMEARGEABLE on a VMA that is registered for UFFD in MINOR mode, it accidentally clears all flags stored in the upper 32 bits of vma->vm_flags. Assuming x86_64 kernel build, unsigned long is 64-bit and unsigned int and int are 32-bit wide. This setup causes the following mishap during the &= ~VM_MERGEABLE assignment. VM_MERGEABLE is a 32-bit constant of type unsigned int, 0x8000'0000. After ~ is applied, it becomes 0x7fff'ffff unsigned int, which is then promoted to unsigned long before the & operation. This promotion fills upper 32 bits with leading 0s, as we're doing unsigned conversion (and even for a signed conversion, this wouldn't help as the leading bit is 0). & operation thus ends up AND-ing vm_flags with 0x0000'0000'7fff'ffff instead of intended 0xffff'ffff'7fff'ffff and hence accidentally clears the upper 32-bits of its value. Fix it by changing `VM_MERGEABLE` constant to unsigned long, using the BIT() macro. Note: other VM_* flags are not affected: This only happens to the VM_MERGEABLE flag, as the other VM_* flags are all constants of type int and after ~ operation, they end up with leading 1 and are thus converted to unsigned long with leading 1s. Note 2: After commit 31defc3b01d9 ("userfaultfd: remove (VM_)BUG_ON()s"), this is no longer a kernel BUG, but a WARNING at the same place: [ 45.595973] WARNING: CPU: 1 PID: 2474 at mm/userfaultfd.c:2067 but the root-cause (flag-drop) remains the same. [akpm@linux-foundation.org: rust bindgen wasn't able to handle BIT(), from Miguel] Link: https://lore.kernel.org/oe-kbuild-all/202510030449.VfSaAjvd-lkp@intel.com/ Link: https://lkml.kernel.org/r/20251001090353.57523-2-acsjakub@amazon.de Fixes: 7677f7fd8be7 ("userfaultfd: add minor fault registration mode") Signed-off-by: Jakub Acs Signed-off-by: Miguel Ojeda Acked-by: David Hildenbrand Acked-by: SeongJae Park Tested-by: Alice Ryhl Tested-by: Miguel Ojeda Cc: Xu Xin Cc: Chengming Zhou Cc: Peter Xu Cc: Axel Rasmussen Cc: Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 +- rust/bindings/bindings_helper.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 06978b4dbeb8..70a2a76007d4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -323,7 +323,7 @@ extern unsigned int kobjsize(const void *objp); #define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ #define VM_HUGEPAGE 0x20000000 /* MADV_HUGEPAGE marked this vma */ #define VM_NOHUGEPAGE 0x40000000 /* MADV_NOHUGEPAGE marked this vma */ -#define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */ +#define VM_MERGEABLE BIT(31) /* KSM may merge identical pages */ #ifdef CONFIG_ARCH_USES_HIGH_VMA_FLAGS #define VM_HIGH_ARCH_BIT_0 32 /* bit only usable on 64-bit architectures */ diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index 04b75d4d01c3..2e43c66635a2 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -108,6 +108,7 @@ const xa_mark_t RUST_CONST_HELPER_XA_PRESENT = XA_PRESENT; const gfp_t RUST_CONST_HELPER_XA_FLAGS_ALLOC = XA_FLAGS_ALLOC; const gfp_t RUST_CONST_HELPER_XA_FLAGS_ALLOC1 = XA_FLAGS_ALLOC1; +const vm_flags_t RUST_CONST_HELPER_VM_MERGEABLE = VM_MERGEABLE; #if IS_ENABLED(CONFIG_ANDROID_BINDER_IPC_RUST) #include "../../drivers/android/binder/rust_binder.h" -- cgit v1.2.3 From 27c0a7b05d13a0dc54ed0b95fc12218210fdea1a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 31 Jul 2025 12:02:27 -0700 Subject: libceph: Use HMAC-SHA256 library instead of crypto_shash Use the HMAC-SHA256 library functions instead of crypto_shash. This is simpler and faster. Signed-off-by: Eric Biggers Reviewed-by: Viacheslav Dubeyko Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 4 ++- net/ceph/Kconfig | 3 +- net/ceph/messenger_v2.c | 77 ++++++++++++------------------------------ 3 files changed, 26 insertions(+), 58 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 1717cc57cdac..4b49592a738f 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -2,6 +2,7 @@ #ifndef __FS_CEPH_MESSENGER_H #define __FS_CEPH_MESSENGER_H +#include #include #include #include @@ -412,7 +413,8 @@ struct ceph_connection_v2_info { struct ceph_msg_data_cursor in_cursor; struct ceph_msg_data_cursor out_cursor; - struct crypto_shash *hmac_tfm; /* post-auth signature */ + struct hmac_sha256_key hmac_key; /* post-auth signature */ + bool hmac_key_set; struct crypto_aead *gcm_tfm; /* on-wire encryption */ struct aead_request *gcm_req; struct crypto_wait gcm_wait; diff --git a/net/ceph/Kconfig b/net/ceph/Kconfig index 0aa21fcbf6ec..ea60e3ef0834 100644 --- a/net/ceph/Kconfig +++ b/net/ceph/Kconfig @@ -6,8 +6,7 @@ config CEPH_LIB select CRYPTO_AES select CRYPTO_CBC select CRYPTO_GCM - select CRYPTO_HMAC - select CRYPTO_SHA256 + select CRYPTO_LIB_SHA256 select CRYPTO select KEYS default n diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c index 5483b4eed94e..c54c8b5a6526 100644 --- a/net/ceph/messenger_v2.c +++ b/net/ceph/messenger_v2.c @@ -709,7 +709,7 @@ static int setup_crypto(struct ceph_connection *con, dout("%s con %p con_mode %d session_key_len %d con_secret_len %d\n", __func__, con, con->v2.con_mode, session_key_len, con_secret_len); - WARN_ON(con->v2.hmac_tfm || con->v2.gcm_tfm || con->v2.gcm_req); + WARN_ON(con->v2.hmac_key_set || con->v2.gcm_tfm || con->v2.gcm_req); if (con->v2.con_mode != CEPH_CON_MODE_CRC && con->v2.con_mode != CEPH_CON_MODE_SECURE) { @@ -723,22 +723,8 @@ static int setup_crypto(struct ceph_connection *con, return 0; /* auth_none */ } - noio_flag = memalloc_noio_save(); - con->v2.hmac_tfm = crypto_alloc_shash("hmac(sha256)", 0, 0); - memalloc_noio_restore(noio_flag); - if (IS_ERR(con->v2.hmac_tfm)) { - ret = PTR_ERR(con->v2.hmac_tfm); - con->v2.hmac_tfm = NULL; - pr_err("failed to allocate hmac tfm context: %d\n", ret); - return ret; - } - - ret = crypto_shash_setkey(con->v2.hmac_tfm, session_key, - session_key_len); - if (ret) { - pr_err("failed to set hmac key: %d\n", ret); - return ret; - } + hmac_sha256_preparekey(&con->v2.hmac_key, session_key, session_key_len); + con->v2.hmac_key_set = true; if (con->v2.con_mode == CEPH_CON_MODE_CRC) { WARN_ON(con_secret_len); @@ -793,38 +779,26 @@ static int setup_crypto(struct ceph_connection *con, return 0; /* auth_x, secure mode */ } -static int ceph_hmac_sha256(struct ceph_connection *con, - const struct kvec *kvecs, int kvec_cnt, u8 *hmac) +static void ceph_hmac_sha256(struct ceph_connection *con, + const struct kvec *kvecs, int kvec_cnt, + u8 hmac[SHA256_DIGEST_SIZE]) { - SHASH_DESC_ON_STACK(desc, con->v2.hmac_tfm); /* tfm arg is ignored */ - int ret; + struct hmac_sha256_ctx ctx; int i; - dout("%s con %p hmac_tfm %p kvec_cnt %d\n", __func__, con, - con->v2.hmac_tfm, kvec_cnt); + dout("%s con %p hmac_key_set %d kvec_cnt %d\n", __func__, con, + con->v2.hmac_key_set, kvec_cnt); - if (!con->v2.hmac_tfm) { + if (!con->v2.hmac_key_set) { memset(hmac, 0, SHA256_DIGEST_SIZE); - return 0; /* auth_none */ + return; /* auth_none */ } - desc->tfm = con->v2.hmac_tfm; - ret = crypto_shash_init(desc); - if (ret) - goto out; - - for (i = 0; i < kvec_cnt; i++) { - ret = crypto_shash_update(desc, kvecs[i].iov_base, - kvecs[i].iov_len); - if (ret) - goto out; - } - - ret = crypto_shash_final(desc, hmac); - -out: - shash_desc_zero(desc); - return ret; /* auth_x, both plain and secure modes */ + /* auth_x, both plain and secure modes */ + hmac_sha256_init(&ctx, &con->v2.hmac_key); + for (i = 0; i < kvec_cnt; i++) + hmac_sha256_update(&ctx, kvecs[i].iov_base, kvecs[i].iov_len); + hmac_sha256_final(&ctx, hmac); } static void gcm_inc_nonce(struct ceph_gcm_nonce *nonce) @@ -1455,17 +1429,14 @@ static int prepare_auth_request_more(struct ceph_connection *con, static int prepare_auth_signature(struct ceph_connection *con) { void *buf; - int ret; buf = alloc_conn_buf(con, head_onwire_len(SHA256_DIGEST_SIZE, con_secure(con))); if (!buf) return -ENOMEM; - ret = ceph_hmac_sha256(con, con->v2.in_sign_kvecs, - con->v2.in_sign_kvec_cnt, CTRL_BODY(buf)); - if (ret) - return ret; + ceph_hmac_sha256(con, con->v2.in_sign_kvecs, con->v2.in_sign_kvec_cnt, + CTRL_BODY(buf)); return prepare_control(con, FRAME_TAG_AUTH_SIGNATURE, buf, SHA256_DIGEST_SIZE); @@ -2460,10 +2431,8 @@ static int process_auth_signature(struct ceph_connection *con, return -EINVAL; } - ret = ceph_hmac_sha256(con, con->v2.out_sign_kvecs, - con->v2.out_sign_kvec_cnt, hmac); - if (ret) - return ret; + ceph_hmac_sha256(con, con->v2.out_sign_kvecs, con->v2.out_sign_kvec_cnt, + hmac); ceph_decode_need(&p, end, SHA256_DIGEST_SIZE, bad); if (crypto_memneq(p, hmac, SHA256_DIGEST_SIZE)) { @@ -3814,10 +3783,8 @@ void ceph_con_v2_reset_protocol(struct ceph_connection *con) memzero_explicit(&con->v2.in_gcm_nonce, CEPH_GCM_IV_LEN); memzero_explicit(&con->v2.out_gcm_nonce, CEPH_GCM_IV_LEN); - if (con->v2.hmac_tfm) { - crypto_free_shash(con->v2.hmac_tfm); - con->v2.hmac_tfm = NULL; - } + memzero_explicit(&con->v2.hmac_key, sizeof(con->v2.hmac_key)); + con->v2.hmac_key_set = false; if (con->v2.gcm_req) { aead_request_free(con->v2.gcm_req); con->v2.gcm_req = NULL; -- cgit v1.2.3 From 59699a5a7114f09f890e86c09a6b32afb5eaa64c Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Wed, 6 Aug 2025 11:48:53 +0200 Subject: libceph: make ceph_con_get_out_msg() return the message pointer The caller in messenger_v1.c loads it anyway, so let's keep the pointer in the register instead of reloading it from memory. This eliminates a tiny bit of unnecessary overhead. Signed-off-by: Max Kellermann Reviewed-by: Viacheslav Dubeyko Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 2 +- net/ceph/messenger.c | 4 ++-- net/ceph/messenger_v1.c | 3 +-- 3 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 4b49592a738f..9ebcac2981fd 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -550,7 +550,7 @@ void ceph_addr_set_port(struct ceph_entity_addr *addr, int p); void ceph_con_process_message(struct ceph_connection *con); int ceph_con_in_msg_alloc(struct ceph_connection *con, struct ceph_msg_header *hdr, int *skip); -void ceph_con_get_out_msg(struct ceph_connection *con); +struct ceph_msg *ceph_con_get_out_msg(struct ceph_connection *con); /* messenger_v1.c */ int ceph_con_v1_try_read(struct ceph_connection *con); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 9f6d860411cb..b6c7bfc03503 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2110,7 +2110,7 @@ int ceph_con_in_msg_alloc(struct ceph_connection *con, return ret; } -void ceph_con_get_out_msg(struct ceph_connection *con) +struct ceph_msg *ceph_con_get_out_msg(struct ceph_connection *con) { struct ceph_msg *msg; @@ -2141,7 +2141,7 @@ void ceph_con_get_out_msg(struct ceph_connection *con) * message or in case of a fault. */ WARN_ON(con->out_msg); - con->out_msg = ceph_msg_get(msg); + return con->out_msg = ceph_msg_get(msg); } /* diff --git a/net/ceph/messenger_v1.c b/net/ceph/messenger_v1.c index 0cb61c76b9b8..eebe4e19d75a 100644 --- a/net/ceph/messenger_v1.c +++ b/net/ceph/messenger_v1.c @@ -210,8 +210,7 @@ static void prepare_write_message(struct ceph_connection *con) &con->v1.out_temp_ack); } - ceph_con_get_out_msg(con); - m = con->out_msg; + m = ceph_con_get_out_msg(con); dout("prepare_write_message %p seq %lld type %d len %d+%d+%zd\n", m, con->out_seq, le16_to_cpu(m->hdr.type), -- cgit v1.2.3 From 7399212dcf64d90a6ab239bdd98bd325d922fc7e Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Wed, 6 Aug 2025 11:48:54 +0200 Subject: libceph: pass the message pointer instead of loading con->out_msg This pointer is in a register anyway, so let's use that instead of reloading from memory everywhere. [ idryomov: formatting ] Signed-off-by: Max Kellermann Reviewed-by: Viacheslav Dubeyko Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 4 +- net/ceph/messenger.c | 4 +- net/ceph/messenger_v1.c | 45 ++++++----- net/ceph/messenger_v2.c | 168 +++++++++++++++++++++-------------------- 4 files changed, 114 insertions(+), 107 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 9ebcac2981fd..6aa4c6478c9f 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -555,7 +555,7 @@ struct ceph_msg *ceph_con_get_out_msg(struct ceph_connection *con); /* messenger_v1.c */ int ceph_con_v1_try_read(struct ceph_connection *con); int ceph_con_v1_try_write(struct ceph_connection *con); -void ceph_con_v1_revoke(struct ceph_connection *con); +void ceph_con_v1_revoke(struct ceph_connection *con, struct ceph_msg *msg); void ceph_con_v1_revoke_incoming(struct ceph_connection *con); bool ceph_con_v1_opened(struct ceph_connection *con); void ceph_con_v1_reset_session(struct ceph_connection *con); @@ -564,7 +564,7 @@ void ceph_con_v1_reset_protocol(struct ceph_connection *con); /* messenger_v2.c */ int ceph_con_v2_try_read(struct ceph_connection *con); int ceph_con_v2_try_write(struct ceph_connection *con); -void ceph_con_v2_revoke(struct ceph_connection *con); +void ceph_con_v2_revoke(struct ceph_connection *con, struct ceph_msg *msg); void ceph_con_v2_revoke_incoming(struct ceph_connection *con); bool ceph_con_v2_opened(struct ceph_connection *con); void ceph_con_v2_reset_session(struct ceph_connection *con); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index b6c7bfc03503..08a6a083609f 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1793,9 +1793,9 @@ void ceph_msg_revoke(struct ceph_msg *msg) WARN_ON(con->state != CEPH_CON_S_OPEN); dout("%s con %p msg %p was sending\n", __func__, con, msg); if (ceph_msgr2(from_msgr(con->msgr))) - ceph_con_v2_revoke(con); + ceph_con_v2_revoke(con, msg); else - ceph_con_v1_revoke(con); + ceph_con_v1_revoke(con, msg); ceph_msg_put(con->out_msg); con->out_msg = NULL; } else { diff --git a/net/ceph/messenger_v1.c b/net/ceph/messenger_v1.c index eebe4e19d75a..cc4a36ef8462 100644 --- a/net/ceph/messenger_v1.c +++ b/net/ceph/messenger_v1.c @@ -169,10 +169,9 @@ static void prepare_message_data(struct ceph_msg *msg, u32 data_len) * Prepare footer for currently outgoing message, and finish things * off. Assumes out_kvec* are already valid.. we just add on to the end. */ -static void prepare_write_message_footer(struct ceph_connection *con) +static void prepare_write_message_footer(struct ceph_connection *con, + struct ceph_msg *m) { - struct ceph_msg *m = con->out_msg; - m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE; dout("prepare_write_message_footer %p\n", con); @@ -230,31 +229,31 @@ static void prepare_write_message(struct ceph_connection *con) /* fill in hdr crc and finalize hdr */ crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc)); - con->out_msg->hdr.crc = cpu_to_le32(crc); - memcpy(&con->v1.out_hdr, &con->out_msg->hdr, sizeof(con->v1.out_hdr)); + m->hdr.crc = cpu_to_le32(crc); + memcpy(&con->v1.out_hdr, &m->hdr, sizeof(con->v1.out_hdr)); /* fill in front and middle crc, footer */ crc = crc32c(0, m->front.iov_base, m->front.iov_len); - con->out_msg->footer.front_crc = cpu_to_le32(crc); + m->footer.front_crc = cpu_to_le32(crc); if (m->middle) { crc = crc32c(0, m->middle->vec.iov_base, m->middle->vec.iov_len); - con->out_msg->footer.middle_crc = cpu_to_le32(crc); + m->footer.middle_crc = cpu_to_le32(crc); } else - con->out_msg->footer.middle_crc = 0; + m->footer.middle_crc = 0; dout("%s front_crc %u middle_crc %u\n", __func__, - le32_to_cpu(con->out_msg->footer.front_crc), - le32_to_cpu(con->out_msg->footer.middle_crc)); - con->out_msg->footer.flags = 0; + le32_to_cpu(m->footer.front_crc), + le32_to_cpu(m->footer.middle_crc)); + m->footer.flags = 0; /* is there a data payload? */ - con->out_msg->footer.data_crc = 0; + m->footer.data_crc = 0; if (m->data_length) { - prepare_message_data(con->out_msg, m->data_length); + prepare_message_data(m, m->data_length); con->v1.out_more = 1; /* data + footer will follow */ } else { /* no, queue up footer too and be done */ - prepare_write_message_footer(con); + prepare_write_message_footer(con, m); } ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); @@ -461,9 +460,9 @@ out: * 0 -> socket full, but more to do * <0 -> error */ -static int write_partial_message_data(struct ceph_connection *con) +static int write_partial_message_data(struct ceph_connection *con, + struct ceph_msg *msg) { - struct ceph_msg *msg = con->out_msg; struct ceph_msg_data_cursor *cursor = &msg->cursor; bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); u32 crc; @@ -515,7 +514,7 @@ static int write_partial_message_data(struct ceph_connection *con) else msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC; con_out_kvec_reset(con); - prepare_write_message_footer(con); + prepare_write_message_footer(con, msg); return 1; /* must return > 0 to indicate success */ } @@ -1471,6 +1470,7 @@ bad_tag: */ int ceph_con_v1_try_write(struct ceph_connection *con) { + struct ceph_msg *msg; int ret = 1; dout("try_write start %p state %d\n", con, con->state); @@ -1517,14 +1517,15 @@ more: } /* msg pages? */ - if (con->out_msg) { + msg = con->out_msg; + if (msg) { if (con->v1.out_msg_done) { - ceph_msg_put(con->out_msg); + ceph_msg_put(msg); con->out_msg = NULL; /* we're done with this one */ goto do_next; } - ret = write_partial_message_data(con); + ret = write_partial_message_data(con, msg); if (ret == 1) goto more; /* we need to send the footer, too! */ if (ret == 0) @@ -1563,10 +1564,8 @@ out: return ret; } -void ceph_con_v1_revoke(struct ceph_connection *con) +void ceph_con_v1_revoke(struct ceph_connection *con, struct ceph_msg *msg) { - struct ceph_msg *msg = con->out_msg; - WARN_ON(con->v1.out_skip); /* footer */ if (con->v1.out_msg_done) { diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c index c54c8b5a6526..b44e936f3865 100644 --- a/net/ceph/messenger_v2.c +++ b/net/ceph/messenger_v2.c @@ -1560,10 +1560,11 @@ static int prepare_ack(struct ceph_connection *con) return prepare_control(con, FRAME_TAG_ACK, con->v2.out_buf, 8); } -static void prepare_epilogue_plain(struct ceph_connection *con, bool aborted) +static void prepare_epilogue_plain(struct ceph_connection *con, + struct ceph_msg *msg, bool aborted) { dout("%s con %p msg %p aborted %d crcs %u %u %u\n", __func__, con, - con->out_msg, aborted, con->v2.out_epil.front_crc, + msg, aborted, con->v2.out_epil.front_crc, con->v2.out_epil.middle_crc, con->v2.out_epil.data_crc); encode_epilogue_plain(con, aborted); @@ -1574,10 +1575,9 @@ static void prepare_epilogue_plain(struct ceph_connection *con, bool aborted) * For "used" empty segments, crc is -1. For unused (trailing) * segments, crc is 0. */ -static void prepare_message_plain(struct ceph_connection *con) +static void prepare_message_plain(struct ceph_connection *con, + struct ceph_msg *msg) { - struct ceph_msg *msg = con->out_msg; - prepare_head_plain(con, con->v2.out_buf, sizeof(struct ceph_msg_header2), NULL, 0, false); @@ -1618,7 +1618,7 @@ static void prepare_message_plain(struct ceph_connection *con) con->v2.out_state = OUT_S_QUEUE_DATA; } else { con->v2.out_epil.data_crc = 0; - prepare_epilogue_plain(con, false); + prepare_epilogue_plain(con, msg, false); con->v2.out_state = OUT_S_FINISH_MESSAGE; } } @@ -1630,7 +1630,8 @@ static void prepare_message_plain(struct ceph_connection *con) * allocate pages for the entire tail of the message (currently up * to ~32M) and two sgs arrays (up to ~256K each)... */ -static int prepare_message_secure(struct ceph_connection *con) +static int prepare_message_secure(struct ceph_connection *con, + struct ceph_msg *msg) { void *zerop = page_address(ceph_zero_page); struct sg_table enc_sgt = {}; @@ -1645,7 +1646,7 @@ static int prepare_message_secure(struct ceph_connection *con) if (ret) return ret; - tail_len = tail_onwire_len(con->out_msg, true); + tail_len = tail_onwire_len(msg, true); if (!tail_len) { /* * Empty message: once the head is written, @@ -1656,7 +1657,7 @@ static int prepare_message_secure(struct ceph_connection *con) } encode_epilogue_secure(con, false); - ret = setup_message_sgs(&sgt, con->out_msg, zerop, zerop, zerop, + ret = setup_message_sgs(&sgt, msg, zerop, zerop, zerop, &con->v2.out_epil, NULL, 0, false); if (ret) goto out; @@ -1685,7 +1686,7 @@ static int prepare_message_secure(struct ceph_connection *con) goto out; dout("%s con %p msg %p sg_cnt %d enc_page_cnt %d\n", __func__, con, - con->out_msg, sgt.orig_nents, enc_page_cnt); + msg, sgt.orig_nents, enc_page_cnt); con->v2.out_state = OUT_S_QUEUE_ENC_PAGE; out: @@ -1694,19 +1695,19 @@ out: return ret; } -static int prepare_message(struct ceph_connection *con) +static int prepare_message(struct ceph_connection *con, struct ceph_msg *msg) { int lens[] = { sizeof(struct ceph_msg_header2), - front_len(con->out_msg), - middle_len(con->out_msg), - data_len(con->out_msg) + front_len(msg), + middle_len(msg), + data_len(msg) }; struct ceph_frame_desc desc; int ret; dout("%s con %p msg %p logical %d+%d+%d+%d\n", __func__, con, - con->out_msg, lens[0], lens[1], lens[2], lens[3]); + msg, lens[0], lens[1], lens[2], lens[3]); if (con->in_seq > con->in_seq_acked) { dout("%s con %p in_seq_acked %llu -> %llu\n", __func__, con, @@ -1717,15 +1718,15 @@ static int prepare_message(struct ceph_connection *con) reset_out_kvecs(con); init_frame_desc(&desc, FRAME_TAG_MESSAGE, lens, 4); encode_preamble(&desc, con->v2.out_buf); - fill_header2(CTRL_BODY(con->v2.out_buf), &con->out_msg->hdr, + fill_header2(CTRL_BODY(con->v2.out_buf), &msg->hdr, con->in_seq_acked); if (con_secure(con)) { - ret = prepare_message_secure(con); + ret = prepare_message_secure(con, msg); if (ret) return ret; } else { - prepare_message_plain(con); + prepare_message_plain(con, msg); } ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); @@ -3153,20 +3154,20 @@ int ceph_con_v2_try_read(struct ceph_connection *con) } } -static void queue_data(struct ceph_connection *con) +static void queue_data(struct ceph_connection *con, struct ceph_msg *msg) { struct bio_vec bv; con->v2.out_epil.data_crc = -1; - ceph_msg_data_cursor_init(&con->v2.out_cursor, con->out_msg, - data_len(con->out_msg)); + ceph_msg_data_cursor_init(&con->v2.out_cursor, msg, + data_len(msg)); get_bvec_at(&con->v2.out_cursor, &bv); set_out_bvec(con, &bv, true); con->v2.out_state = OUT_S_QUEUE_DATA_CONT; } -static void queue_data_cont(struct ceph_connection *con) +static void queue_data_cont(struct ceph_connection *con, struct ceph_msg *msg) { struct bio_vec bv; @@ -3187,7 +3188,7 @@ static void queue_data_cont(struct ceph_connection *con) * we are done. */ reset_out_kvecs(con); - prepare_epilogue_plain(con, false); + prepare_epilogue_plain(con, msg, false); con->v2.out_state = OUT_S_FINISH_MESSAGE; } @@ -3219,7 +3220,7 @@ static void queue_enc_page(struct ceph_connection *con) con->v2.out_state = OUT_S_FINISH_MESSAGE; } -static void queue_zeros(struct ceph_connection *con) +static void queue_zeros(struct ceph_connection *con, struct ceph_msg *msg) { dout("%s con %p out_zero %d\n", __func__, con, con->v2.out_zero); @@ -3236,7 +3237,7 @@ static void queue_zeros(struct ceph_connection *con) * Once it's written, we are done patching up for the revoke. */ reset_out_kvecs(con); - prepare_epilogue_plain(con, true); + prepare_epilogue_plain(con, msg, true); con->v2.out_state = OUT_S_FINISH_MESSAGE; } @@ -3263,6 +3264,7 @@ static void finish_message(struct ceph_connection *con) static int populate_out_iter(struct ceph_connection *con) { + struct ceph_msg *msg; int ret; dout("%s con %p state %d out_state %d\n", __func__, con, con->state, @@ -3278,18 +3280,18 @@ static int populate_out_iter(struct ceph_connection *con) switch (con->v2.out_state) { case OUT_S_QUEUE_DATA: WARN_ON(!con->out_msg); - queue_data(con); + queue_data(con, con->out_msg); goto populated; case OUT_S_QUEUE_DATA_CONT: WARN_ON(!con->out_msg); - queue_data_cont(con); + queue_data_cont(con, con->out_msg); goto populated; case OUT_S_QUEUE_ENC_PAGE: queue_enc_page(con); goto populated; case OUT_S_QUEUE_ZEROS: WARN_ON(con->out_msg); /* revoked */ - queue_zeros(con); + queue_zeros(con, con->out_msg); goto populated; case OUT_S_FINISH_MESSAGE: finish_message(con); @@ -3309,8 +3311,8 @@ static int populate_out_iter(struct ceph_connection *con) return ret; } } else if (!list_empty(&con->out_queue)) { - ceph_con_get_out_msg(con); - ret = prepare_message(con); + msg = ceph_con_get_out_msg(con); + ret = prepare_message(con, msg); if (ret) { pr_err("prepare_message failed: %d\n", ret); return ret; @@ -3422,17 +3424,18 @@ static u32 crc32c_zeros(u32 crc, int zero_len) return crc; } -static void prepare_zero_front(struct ceph_connection *con, int resid) +static void prepare_zero_front(struct ceph_connection *con, + struct ceph_msg *msg, int resid) { int sent; - WARN_ON(!resid || resid > front_len(con->out_msg)); - sent = front_len(con->out_msg) - resid; + WARN_ON(!resid || resid > front_len(msg)); + sent = front_len(msg) - resid; dout("%s con %p sent %d resid %d\n", __func__, con, sent, resid); if (sent) { con->v2.out_epil.front_crc = - crc32c(-1, con->out_msg->front.iov_base, sent); + crc32c(-1, msg->front.iov_base, sent); con->v2.out_epil.front_crc = crc32c_zeros(con->v2.out_epil.front_crc, resid); } else { @@ -3443,17 +3446,18 @@ static void prepare_zero_front(struct ceph_connection *con, int resid) out_zero_add(con, resid); } -static void prepare_zero_middle(struct ceph_connection *con, int resid) +static void prepare_zero_middle(struct ceph_connection *con, + struct ceph_msg *msg, int resid) { int sent; - WARN_ON(!resid || resid > middle_len(con->out_msg)); - sent = middle_len(con->out_msg) - resid; + WARN_ON(!resid || resid > middle_len(msg)); + sent = middle_len(msg) - resid; dout("%s con %p sent %d resid %d\n", __func__, con, sent, resid); if (sent) { con->v2.out_epil.middle_crc = - crc32c(-1, con->out_msg->middle->vec.iov_base, sent); + crc32c(-1, msg->middle->vec.iov_base, sent); con->v2.out_epil.middle_crc = crc32c_zeros(con->v2.out_epil.middle_crc, resid); } else { @@ -3464,61 +3468,64 @@ static void prepare_zero_middle(struct ceph_connection *con, int resid) out_zero_add(con, resid); } -static void prepare_zero_data(struct ceph_connection *con) +static void prepare_zero_data(struct ceph_connection *con, + struct ceph_msg *msg) { dout("%s con %p\n", __func__, con); - con->v2.out_epil.data_crc = crc32c_zeros(-1, data_len(con->out_msg)); - out_zero_add(con, data_len(con->out_msg)); + con->v2.out_epil.data_crc = crc32c_zeros(-1, data_len(msg)); + out_zero_add(con, data_len(msg)); } -static void revoke_at_queue_data(struct ceph_connection *con) +static void revoke_at_queue_data(struct ceph_connection *con, + struct ceph_msg *msg) { int boundary; int resid; - WARN_ON(!data_len(con->out_msg)); + WARN_ON(!data_len(msg)); WARN_ON(!iov_iter_is_kvec(&con->v2.out_iter)); resid = iov_iter_count(&con->v2.out_iter); - boundary = front_len(con->out_msg) + middle_len(con->out_msg); + boundary = front_len(msg) + middle_len(msg); if (resid > boundary) { resid -= boundary; WARN_ON(resid > MESSAGE_HEAD_PLAIN_LEN); dout("%s con %p was sending head\n", __func__, con); - if (front_len(con->out_msg)) - prepare_zero_front(con, front_len(con->out_msg)); - if (middle_len(con->out_msg)) - prepare_zero_middle(con, middle_len(con->out_msg)); - prepare_zero_data(con); + if (front_len(msg)) + prepare_zero_front(con, msg, front_len(msg)); + if (middle_len(msg)) + prepare_zero_middle(con, msg, middle_len(msg)); + prepare_zero_data(con, msg); WARN_ON(iov_iter_count(&con->v2.out_iter) != resid); con->v2.out_state = OUT_S_QUEUE_ZEROS; return; } - boundary = middle_len(con->out_msg); + boundary = middle_len(msg); if (resid > boundary) { resid -= boundary; dout("%s con %p was sending front\n", __func__, con); - prepare_zero_front(con, resid); - if (middle_len(con->out_msg)) - prepare_zero_middle(con, middle_len(con->out_msg)); - prepare_zero_data(con); - queue_zeros(con); + prepare_zero_front(con, msg, resid); + if (middle_len(msg)) + prepare_zero_middle(con, msg, middle_len(msg)); + prepare_zero_data(con, msg); + queue_zeros(con, msg); return; } WARN_ON(!resid); dout("%s con %p was sending middle\n", __func__, con); - prepare_zero_middle(con, resid); - prepare_zero_data(con); - queue_zeros(con); + prepare_zero_middle(con, msg, resid); + prepare_zero_data(con, msg); + queue_zeros(con, msg); } -static void revoke_at_queue_data_cont(struct ceph_connection *con) +static void revoke_at_queue_data_cont(struct ceph_connection *con, + struct ceph_msg *msg) { int sent, resid; /* current piece of data */ - WARN_ON(!data_len(con->out_msg)); + WARN_ON(!data_len(msg)); WARN_ON(!iov_iter_is_bvec(&con->v2.out_iter)); resid = iov_iter_count(&con->v2.out_iter); WARN_ON(!resid || resid > con->v2.out_bvec.bv_len); @@ -3537,10 +3544,11 @@ static void revoke_at_queue_data_cont(struct ceph_connection *con) con->v2.out_iter.count -= resid; out_zero_add(con, con->v2.out_cursor.total_resid); - queue_zeros(con); + queue_zeros(con, msg); } -static void revoke_at_finish_message(struct ceph_connection *con) +static void revoke_at_finish_message(struct ceph_connection *con, + struct ceph_msg *msg) { int boundary; int resid; @@ -3548,39 +3556,39 @@ static void revoke_at_finish_message(struct ceph_connection *con) WARN_ON(!iov_iter_is_kvec(&con->v2.out_iter)); resid = iov_iter_count(&con->v2.out_iter); - if (!front_len(con->out_msg) && !middle_len(con->out_msg) && - !data_len(con->out_msg)) { + if (!front_len(msg) && !middle_len(msg) && + !data_len(msg)) { WARN_ON(!resid || resid > MESSAGE_HEAD_PLAIN_LEN); dout("%s con %p was sending head (empty message) - noop\n", __func__, con); return; } - boundary = front_len(con->out_msg) + middle_len(con->out_msg) + + boundary = front_len(msg) + middle_len(msg) + CEPH_EPILOGUE_PLAIN_LEN; if (resid > boundary) { resid -= boundary; WARN_ON(resid > MESSAGE_HEAD_PLAIN_LEN); dout("%s con %p was sending head\n", __func__, con); - if (front_len(con->out_msg)) - prepare_zero_front(con, front_len(con->out_msg)); - if (middle_len(con->out_msg)) - prepare_zero_middle(con, middle_len(con->out_msg)); + if (front_len(msg)) + prepare_zero_front(con, msg, front_len(msg)); + if (middle_len(msg)) + prepare_zero_middle(con, msg, middle_len(msg)); con->v2.out_iter.count -= CEPH_EPILOGUE_PLAIN_LEN; WARN_ON(iov_iter_count(&con->v2.out_iter) != resid); con->v2.out_state = OUT_S_QUEUE_ZEROS; return; } - boundary = middle_len(con->out_msg) + CEPH_EPILOGUE_PLAIN_LEN; + boundary = middle_len(msg) + CEPH_EPILOGUE_PLAIN_LEN; if (resid > boundary) { resid -= boundary; dout("%s con %p was sending front\n", __func__, con); - prepare_zero_front(con, resid); - if (middle_len(con->out_msg)) - prepare_zero_middle(con, middle_len(con->out_msg)); + prepare_zero_front(con, msg, resid); + if (middle_len(msg)) + prepare_zero_middle(con, msg, middle_len(msg)); con->v2.out_iter.count -= CEPH_EPILOGUE_PLAIN_LEN; - queue_zeros(con); + queue_zeros(con, msg); return; } @@ -3588,9 +3596,9 @@ static void revoke_at_finish_message(struct ceph_connection *con) if (resid > boundary) { resid -= boundary; dout("%s con %p was sending middle\n", __func__, con); - prepare_zero_middle(con, resid); + prepare_zero_middle(con, msg, resid); con->v2.out_iter.count -= CEPH_EPILOGUE_PLAIN_LEN; - queue_zeros(con); + queue_zeros(con, msg); return; } @@ -3598,7 +3606,7 @@ static void revoke_at_finish_message(struct ceph_connection *con) dout("%s con %p was sending epilogue - noop\n", __func__, con); } -void ceph_con_v2_revoke(struct ceph_connection *con) +void ceph_con_v2_revoke(struct ceph_connection *con, struct ceph_msg *msg) { WARN_ON(con->v2.out_zero); @@ -3611,13 +3619,13 @@ void ceph_con_v2_revoke(struct ceph_connection *con) switch (con->v2.out_state) { case OUT_S_QUEUE_DATA: - revoke_at_queue_data(con); + revoke_at_queue_data(con, msg); break; case OUT_S_QUEUE_DATA_CONT: - revoke_at_queue_data_cont(con); + revoke_at_queue_data_cont(con, msg); break; case OUT_S_FINISH_MESSAGE: - revoke_at_finish_message(con); + revoke_at_finish_message(con, msg); break; default: WARN(1, "bad out_state %d", con->v2.out_state); -- cgit v1.2.3 From 207696b17f38e869e59889b44d395ab24bb678d3 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Thu, 18 Sep 2025 22:30:18 +0300 Subject: tpm: use a map for tpm2_calc_ordinal_duration() The current shenanigans for duration calculation introduce too much complexity for a trivial problem, and further the code is hard to patch and maintain. Address these issues with a flat look-up table, which is easy to understand and patch. If leaf driver specific patching is required in future, it is easy enough to make a copy of this table during driver initialization and add the chip parameter back. 'chip->duration' is retained for TPM 1.x. As the first entry for this new behavior address TCG spec update mentioned in this issue: https://github.com/raspberrypi/linux/issues/7054 Therefore, for TPM_SelfTest the duration is set to 3000 ms. This does not categorize a as bug, given that this is introduced to the spec after the feature was originally made. Reviewed-by: Serge Hallyn Signed-off-by: Jarkko Sakkinen --- drivers/char/tpm/tpm-interface.c | 2 +- drivers/char/tpm/tpm.h | 2 +- drivers/char/tpm/tpm2-cmd.c | 127 ++++++++++----------------------------- include/linux/tpm.h | 5 +- 4 files changed, 37 insertions(+), 99 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c index b71725827743..c9f173001d0e 100644 --- a/drivers/char/tpm/tpm-interface.c +++ b/drivers/char/tpm/tpm-interface.c @@ -52,7 +52,7 @@ MODULE_PARM_DESC(suspend_pcr, unsigned long tpm_calc_ordinal_duration(struct tpm_chip *chip, u32 ordinal) { if (chip->flags & TPM_CHIP_FLAG_TPM2) - return tpm2_calc_ordinal_duration(chip, ordinal); + return tpm2_calc_ordinal_duration(ordinal); else return tpm1_calc_ordinal_duration(chip, ordinal); } diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h index 7bb87fa5f7a1..2726bd38e5ac 100644 --- a/drivers/char/tpm/tpm.h +++ b/drivers/char/tpm/tpm.h @@ -299,7 +299,7 @@ ssize_t tpm2_get_tpm_pt(struct tpm_chip *chip, u32 property_id, ssize_t tpm2_get_pcr_allocation(struct tpm_chip *chip); int tpm2_auto_startup(struct tpm_chip *chip); void tpm2_shutdown(struct tpm_chip *chip, u16 shutdown_type); -unsigned long tpm2_calc_ordinal_duration(struct tpm_chip *chip, u32 ordinal); +unsigned long tpm2_calc_ordinal_duration(u32 ordinal); int tpm2_probe(struct tpm_chip *chip); int tpm2_get_cc_attrs_tbl(struct tpm_chip *chip); int tpm2_find_cc(struct tpm_chip *chip, u32 cc); diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c index 524d802ede26..7d77f6fbc152 100644 --- a/drivers/char/tpm/tpm2-cmd.c +++ b/drivers/char/tpm/tpm2-cmd.c @@ -28,120 +28,57 @@ static struct tpm2_hash tpm2_hash_map[] = { int tpm2_get_timeouts(struct tpm_chip *chip) { - /* Fixed timeouts for TPM2 */ chip->timeout_a = msecs_to_jiffies(TPM2_TIMEOUT_A); chip->timeout_b = msecs_to_jiffies(TPM2_TIMEOUT_B); chip->timeout_c = msecs_to_jiffies(TPM2_TIMEOUT_C); chip->timeout_d = msecs_to_jiffies(TPM2_TIMEOUT_D); - - /* PTP spec timeouts */ - chip->duration[TPM_SHORT] = msecs_to_jiffies(TPM2_DURATION_SHORT); - chip->duration[TPM_MEDIUM] = msecs_to_jiffies(TPM2_DURATION_MEDIUM); - chip->duration[TPM_LONG] = msecs_to_jiffies(TPM2_DURATION_LONG); - - /* Key creation commands long timeouts */ - chip->duration[TPM_LONG_LONG] = - msecs_to_jiffies(TPM2_DURATION_LONG_LONG); - chip->flags |= TPM_CHIP_FLAG_HAVE_TIMEOUTS; - return 0; } -/** - * tpm2_ordinal_duration_index() - returns an index to the chip duration table - * @ordinal: TPM command ordinal. - * - * The function returns an index to the chip duration table - * (enum tpm_duration), that describes the maximum amount of - * time the chip could take to return the result for a particular ordinal. - * - * The values of the MEDIUM, and LONG durations are taken - * from the PC Client Profile (PTP) specification (750, 2000 msec) - * - * LONG_LONG is for commands that generates keys which empirically takes - * a longer time on some systems. - * - * Return: - * * TPM_MEDIUM - * * TPM_LONG - * * TPM_LONG_LONG - * * TPM_UNDEFINED +/* + * Contains the maximum durations in milliseconds for TPM2 commands. */ -static u8 tpm2_ordinal_duration_index(u32 ordinal) -{ - switch (ordinal) { - /* Startup */ - case TPM2_CC_STARTUP: /* 144 */ - return TPM_MEDIUM; - - case TPM2_CC_SELF_TEST: /* 143 */ - return TPM_LONG; - - case TPM2_CC_GET_RANDOM: /* 17B */ - return TPM_LONG; - - case TPM2_CC_SEQUENCE_UPDATE: /* 15C */ - return TPM_MEDIUM; - case TPM2_CC_SEQUENCE_COMPLETE: /* 13E */ - return TPM_MEDIUM; - case TPM2_CC_EVENT_SEQUENCE_COMPLETE: /* 185 */ - return TPM_MEDIUM; - case TPM2_CC_HASH_SEQUENCE_START: /* 186 */ - return TPM_MEDIUM; - - case TPM2_CC_VERIFY_SIGNATURE: /* 177 */ - return TPM_LONG_LONG; - - case TPM2_CC_PCR_EXTEND: /* 182 */ - return TPM_MEDIUM; - - case TPM2_CC_HIERARCHY_CONTROL: /* 121 */ - return TPM_LONG; - case TPM2_CC_HIERARCHY_CHANGE_AUTH: /* 129 */ - return TPM_LONG; - - case TPM2_CC_GET_CAPABILITY: /* 17A */ - return TPM_MEDIUM; - - case TPM2_CC_NV_READ: /* 14E */ - return TPM_LONG; - - case TPM2_CC_CREATE_PRIMARY: /* 131 */ - return TPM_LONG_LONG; - case TPM2_CC_CREATE: /* 153 */ - return TPM_LONG_LONG; - case TPM2_CC_CREATE_LOADED: /* 191 */ - return TPM_LONG_LONG; - - default: - return TPM_UNDEFINED; - } -} +static const struct { + unsigned long ordinal; + unsigned long duration; +} tpm2_ordinal_duration_map[] = { + {TPM2_CC_STARTUP, 750}, + {TPM2_CC_SELF_TEST, 3000}, + {TPM2_CC_GET_RANDOM, 2000}, + {TPM2_CC_SEQUENCE_UPDATE, 750}, + {TPM2_CC_SEQUENCE_COMPLETE, 750}, + {TPM2_CC_EVENT_SEQUENCE_COMPLETE, 750}, + {TPM2_CC_HASH_SEQUENCE_START, 750}, + {TPM2_CC_VERIFY_SIGNATURE, 30000}, + {TPM2_CC_PCR_EXTEND, 750}, + {TPM2_CC_HIERARCHY_CONTROL, 2000}, + {TPM2_CC_HIERARCHY_CHANGE_AUTH, 2000}, + {TPM2_CC_GET_CAPABILITY, 750}, + {TPM2_CC_NV_READ, 2000}, + {TPM2_CC_CREATE_PRIMARY, 30000}, + {TPM2_CC_CREATE, 30000}, + {TPM2_CC_CREATE_LOADED, 30000}, +}; /** - * tpm2_calc_ordinal_duration() - calculate the maximum command duration - * @chip: TPM chip to use. + * tpm2_calc_ordinal_duration() - Calculate the maximum command duration * @ordinal: TPM command ordinal. * - * The function returns the maximum amount of time the chip could take - * to return the result for a particular ordinal in jiffies. - * - * Return: A maximal duration time for an ordinal in jiffies. + * Returns the maximum amount of time the chip is expected by kernel to + * take in jiffies. */ -unsigned long tpm2_calc_ordinal_duration(struct tpm_chip *chip, u32 ordinal) +unsigned long tpm2_calc_ordinal_duration(u32 ordinal) { - unsigned int index; + int i; - index = tpm2_ordinal_duration_index(ordinal); + for (i = 0; i < ARRAY_SIZE(tpm2_ordinal_duration_map); i++) + if (ordinal == tpm2_ordinal_duration_map[i].ordinal) + return msecs_to_jiffies(tpm2_ordinal_duration_map[i].duration); - if (index != TPM_UNDEFINED) - return chip->duration[index]; - else - return msecs_to_jiffies(TPM2_DURATION_DEFAULT); + return msecs_to_jiffies(TPM2_DURATION_DEFAULT); } - struct tpm2_pcr_read_out { __be32 update_cnt; __be32 pcr_selects_cnt; diff --git a/include/linux/tpm.h b/include/linux/tpm.h index b0e9eb5ef022..dc0338a783f3 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -228,10 +228,11 @@ enum tpm2_timeouts { TPM2_TIMEOUT_B = 4000, TPM2_TIMEOUT_C = 200, TPM2_TIMEOUT_D = 30, +}; + +enum tpm2_durations { TPM2_DURATION_SHORT = 20, - TPM2_DURATION_MEDIUM = 750, TPM2_DURATION_LONG = 2000, - TPM2_DURATION_LONG_LONG = 300000, TPM2_DURATION_DEFAULT = 120000, }; -- cgit v1.2.3