From f781f661e8c99b0cb34129f2e374234d61864e77 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 13 Jun 2023 10:09:20 +0200 Subject: dma-buf: keep the signaling time of merged fences v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some Android CTS is testing if the signaling time keeps consistent during merges. v2: use the current time if the fence is still in the signaling path and the timestamp not yet available. v3: improve comment, fix one more case to use the correct timestamp Signed-off-by: Christian König Reviewed-by: Luben Tuikov Link: https://patchwork.freedesktop.org/patch/msgid/20230630120041.109216-1-christian.koenig@amd.com --- include/linux/dma-fence.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index d54b595a0fe0..0d678e9a7b24 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -606,7 +606,7 @@ static inline signed long dma_fence_wait(struct dma_fence *fence, bool intr) void dma_fence_set_deadline(struct dma_fence *fence, ktime_t deadline); struct dma_fence *dma_fence_get_stub(void); -struct dma_fence *dma_fence_allocate_private_stub(void); +struct dma_fence *dma_fence_allocate_private_stub(ktime_t timestamp); u64 dma_fence_context_alloc(unsigned num); extern const struct dma_fence_ops dma_fence_array_ops; -- cgit v1.2.3 From 2fb48d88e77f29bf9d278f25bcfe82cf59a0e09b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 9 Jun 2023 23:11:39 -0700 Subject: blk-crypto: use dynamic lock class for blk_crypto_profile::lock When a device-mapper device is passing through the inline encryption support of an underlying device, calls to blk_crypto_evict_key() take the blk_crypto_profile::lock of the device-mapper device, then take the blk_crypto_profile::lock of the underlying device (nested). This isn't a real deadlock, but it causes a lockdep report because there is only one lock class for all instances of this lock. Lockdep subclasses don't really work here because the hierarchy of block devices is dynamic and could have more than 2 levels. Instead, register a dynamic lock class for each blk_crypto_profile, and associate that with the lock. This avoids false-positive lockdep reports like the following: ============================================ WARNING: possible recursive locking detected 6.4.0-rc5 #2 Not tainted -------------------------------------------- fscryptctl/1421 is trying to acquire lock: ffffff80829ca418 (&profile->lock){++++}-{3:3}, at: __blk_crypto_evict_key+0x44/0x1c0 but task is already holding lock: ffffff8086b68ca8 (&profile->lock){++++}-{3:3}, at: __blk_crypto_evict_key+0xc8/0x1c0 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&profile->lock); lock(&profile->lock); *** DEADLOCK *** May be due to missing lock nesting notation Fixes: 1b2628397058 ("block: Keyslot Manager for Inline Encryption") Reported-by: Bart Van Assche Signed-off-by: Eric Biggers Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20230610061139.212085-1-ebiggers@kernel.org Signed-off-by: Jens Axboe --- block/blk-crypto-profile.c | 12 ++++++++++-- include/linux/blk-crypto-profile.h | 1 + 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/block/blk-crypto-profile.c b/block/blk-crypto-profile.c index 2a67d3fb63e5..7fabc883e39f 100644 --- a/block/blk-crypto-profile.c +++ b/block/blk-crypto-profile.c @@ -79,7 +79,14 @@ int blk_crypto_profile_init(struct blk_crypto_profile *profile, unsigned int slot_hashtable_size; memset(profile, 0, sizeof(*profile)); - init_rwsem(&profile->lock); + + /* + * profile->lock of an underlying device can nest inside profile->lock + * of a device-mapper device, so use a dynamic lock class to avoid + * false-positive lockdep reports. + */ + lockdep_register_key(&profile->lockdep_key); + __init_rwsem(&profile->lock, "&profile->lock", &profile->lockdep_key); if (num_slots == 0) return 0; @@ -89,7 +96,7 @@ int blk_crypto_profile_init(struct blk_crypto_profile *profile, profile->slots = kvcalloc(num_slots, sizeof(profile->slots[0]), GFP_KERNEL); if (!profile->slots) - return -ENOMEM; + goto err_destroy; profile->num_slots = num_slots; @@ -435,6 +442,7 @@ void blk_crypto_profile_destroy(struct blk_crypto_profile *profile) { if (!profile) return; + lockdep_unregister_key(&profile->lockdep_key); kvfree(profile->slot_hashtable); kvfree_sensitive(profile->slots, sizeof(profile->slots[0]) * profile->num_slots); diff --git a/include/linux/blk-crypto-profile.h b/include/linux/blk-crypto-profile.h index e6802b69cdd6..90ab33cb5d0e 100644 --- a/include/linux/blk-crypto-profile.h +++ b/include/linux/blk-crypto-profile.h @@ -111,6 +111,7 @@ struct blk_crypto_profile { * keyslots while ensuring that they can't be changed concurrently. */ struct rw_semaphore lock; + struct lock_class_key lockdep_key; /* List of idle slots, with least recently used slot at front */ wait_queue_head_t idle_slots_wait_queue; -- cgit v1.2.3 From 6b5c13b591d753c6022fbd12f8c0c0a9a07fc065 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Fri, 7 Jul 2023 12:56:20 +0200 Subject: s390/ism: Fix locking for forwarding of IRQs and events to clients The clients array references all registered clients and is protected by the clients_lock. Besides its use as general list of clients the clients array is accessed in ism_handle_irq() to forward ISM device events to clients. While the clients_lock is taken in the IRQ handler when calling handle_event() it is however incorrectly not held during the client->handle_irq() call and for the preceding clients[] access leaving it unprotected against concurrent client (un-)registration. Furthermore the accesses to ism->sba_client_arr[] in ism_register_dmb() and ism_unregister_dmb() are not protected by any lock. This is especially problematic as the client ID from the ism->sba_client_arr[] is not checked against NO_CLIENT and neither is the client pointer checked. Instead of expanding the use of the clients_lock further add a separate array in struct ism_dev which references clients subscribed to the device's events and IRQs. This array is protected by ism->lock which is already taken in ism_handle_irq() and can be taken outside the IRQ handler when adding/removing subscribers or the accessing ism->sba_client_arr[]. This also means that the clients_lock is no longer taken in IRQ context. Fixes: 89e7d2ba61b7 ("net/ism: Add new API for client registration") Signed-off-by: Niklas Schnelle Reviewed-by: Alexandra Winter Signed-off-by: David S. Miller --- drivers/s390/net/ism_drv.c | 44 ++++++++++++++++++++++++++++++++++++-------- include/linux/ism.h | 1 + 2 files changed, 37 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c index 9b5fccdbc7d6..b664e4a08645 100644 --- a/drivers/s390/net/ism_drv.c +++ b/drivers/s390/net/ism_drv.c @@ -47,6 +47,15 @@ static struct ism_dev_list ism_dev_list = { .mutex = __MUTEX_INITIALIZER(ism_dev_list.mutex), }; +static void ism_setup_forwarding(struct ism_client *client, struct ism_dev *ism) +{ + unsigned long flags; + + spin_lock_irqsave(&ism->lock, flags); + ism->subs[client->id] = client; + spin_unlock_irqrestore(&ism->lock, flags); +} + int ism_register_client(struct ism_client *client) { struct ism_dev *ism; @@ -71,6 +80,7 @@ int ism_register_client(struct ism_client *client) list_for_each_entry(ism, &ism_dev_list.list, list) { ism->priv[i] = NULL; client->add(ism); + ism_setup_forwarding(client, ism); } } mutex_unlock(&ism_dev_list.mutex); @@ -92,6 +102,9 @@ int ism_unregister_client(struct ism_client *client) max_client--; spin_unlock_irqrestore(&clients_lock, flags); list_for_each_entry(ism, &ism_dev_list.list, list) { + spin_lock_irqsave(&ism->lock, flags); + /* Stop forwarding IRQs and events */ + ism->subs[client->id] = NULL; for (int i = 0; i < ISM_NR_DMBS; ++i) { if (ism->sba_client_arr[i] == client->id) { pr_err("%s: attempt to unregister client '%s'" @@ -101,6 +114,7 @@ int ism_unregister_client(struct ism_client *client) goto out; } } + spin_unlock_irqrestore(&ism->lock, flags); } out: mutex_unlock(&ism_dev_list.mutex); @@ -328,6 +342,7 @@ int ism_register_dmb(struct ism_dev *ism, struct ism_dmb *dmb, struct ism_client *client) { union ism_reg_dmb cmd; + unsigned long flags; int ret; ret = ism_alloc_dmb(ism, dmb); @@ -351,7 +366,9 @@ int ism_register_dmb(struct ism_dev *ism, struct ism_dmb *dmb, goto out; } dmb->dmb_tok = cmd.response.dmb_tok; + spin_lock_irqsave(&ism->lock, flags); ism->sba_client_arr[dmb->sba_idx - ISM_DMB_BIT_OFFSET] = client->id; + spin_unlock_irqrestore(&ism->lock, flags); out: return ret; } @@ -360,6 +377,7 @@ EXPORT_SYMBOL_GPL(ism_register_dmb); int ism_unregister_dmb(struct ism_dev *ism, struct ism_dmb *dmb) { union ism_unreg_dmb cmd; + unsigned long flags; int ret; memset(&cmd, 0, sizeof(cmd)); @@ -368,7 +386,9 @@ int ism_unregister_dmb(struct ism_dev *ism, struct ism_dmb *dmb) cmd.request.dmb_tok = dmb->dmb_tok; + spin_lock_irqsave(&ism->lock, flags); ism->sba_client_arr[dmb->sba_idx - ISM_DMB_BIT_OFFSET] = NO_CLIENT; + spin_unlock_irqrestore(&ism->lock, flags); ret = ism_cmd(ism, &cmd); if (ret && ret != ISM_ERROR) @@ -491,6 +511,7 @@ static u16 ism_get_chid(struct ism_dev *ism) static void ism_handle_event(struct ism_dev *ism) { struct ism_event *entry; + struct ism_client *clt; int i; while ((ism->ieq_idx + 1) != READ_ONCE(ism->ieq->header.idx)) { @@ -499,21 +520,21 @@ static void ism_handle_event(struct ism_dev *ism) entry = &ism->ieq->entry[ism->ieq_idx]; debug_event(ism_debug_info, 2, entry, sizeof(*entry)); - spin_lock(&clients_lock); - for (i = 0; i < max_client; ++i) - if (clients[i]) - clients[i]->handle_event(ism, entry); - spin_unlock(&clients_lock); + for (i = 0; i < max_client; ++i) { + clt = ism->subs[i]; + if (clt) + clt->handle_event(ism, entry); + } } } static irqreturn_t ism_handle_irq(int irq, void *data) { struct ism_dev *ism = data; - struct ism_client *clt; unsigned long bit, end; unsigned long *bv; u16 dmbemask; + u8 client_id; bv = (void *) &ism->sba->dmb_bits[ISM_DMB_WORD_OFFSET]; end = sizeof(ism->sba->dmb_bits) * BITS_PER_BYTE - ISM_DMB_BIT_OFFSET; @@ -530,8 +551,10 @@ static irqreturn_t ism_handle_irq(int irq, void *data) dmbemask = ism->sba->dmbe_mask[bit + ISM_DMB_BIT_OFFSET]; ism->sba->dmbe_mask[bit + ISM_DMB_BIT_OFFSET] = 0; barrier(); - clt = clients[ism->sba_client_arr[bit]]; - clt->handle_irq(ism, bit + ISM_DMB_BIT_OFFSET, dmbemask); + client_id = ism->sba_client_arr[bit]; + if (unlikely(client_id == NO_CLIENT || !ism->subs[client_id])) + continue; + ism->subs[client_id]->handle_irq(ism, bit + ISM_DMB_BIT_OFFSET, dmbemask); } if (ism->sba->e) { @@ -554,6 +577,7 @@ static void ism_dev_add_work_func(struct work_struct *work) add_work); client->add(client->tgt_ism); + ism_setup_forwarding(client, client->tgt_ism); atomic_dec(&client->tgt_ism->add_dev_cnt); wake_up(&client->tgt_ism->waitq); } @@ -691,7 +715,11 @@ static void ism_dev_remove_work_func(struct work_struct *work) { struct ism_client *client = container_of(work, struct ism_client, remove_work); + unsigned long flags; + spin_lock_irqsave(&client->tgt_ism->lock, flags); + client->tgt_ism->subs[client->id] = NULL; + spin_unlock_irqrestore(&client->tgt_ism->lock, flags); client->remove(client->tgt_ism); atomic_dec(&client->tgt_ism->free_clients_cnt); wake_up(&client->tgt_ism->waitq); diff --git a/include/linux/ism.h b/include/linux/ism.h index ea2bcdae7401..5160d47e5ea9 100644 --- a/include/linux/ism.h +++ b/include/linux/ism.h @@ -44,6 +44,7 @@ struct ism_dev { u64 local_gid; int ieq_idx; + struct ism_client *subs[MAX_CLIENTS]; atomic_t free_clients_cnt; atomic_t add_dev_cnt; wait_queue_head_t waitq; -- cgit v1.2.3 From 76631ffa2fd2d45bae5ad717eef716b94144e0e7 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Fri, 7 Jul 2023 12:56:21 +0200 Subject: s390/ism: Fix and simplify add()/remove() callback handling Previously the clients_lock was protecting the clients array against concurrent addition/removal of clients but was also accessed from IRQ context. This meant that it had to be a spinlock and that the add() and remove() callbacks in which clients need to do allocation and take mutexes can't be called under the clients_lock. To work around this these callbacks were moved to workqueues. This not only introduced significant complexity but is also subtly broken in at least one way. In ism_dev_init() and ism_dev_exit() clients[i]->tgt_ism is used to communicate the added/removed ISM device to the work function. While write access to client[i]->tgt_ism is protected by the clients_lock and the code waits that there is no pending add/remove work before and after setting clients[i]->tgt_ism this is not enough. The problem is that the wait happens based on per ISM device counters. Thus a concurrent ism_dev_init()/ism_dev_exit() for a different ISM device may overwrite a clients[i]->tgt_ism between unlocking the clients_lock and the subsequent wait for the work to finnish. Thankfully with the clients_lock no longer held in IRQ context it can be turned into a mutex which can be held during the calls to add()/remove() completely removing the need for the workqueues and the associated broken housekeeping including the per ISM device counters and the clients[i]->tgt_ism. Fixes: 89e7d2ba61b7 ("net/ism: Add new API for client registration") Signed-off-by: Niklas Schnelle Signed-off-by: David S. Miller --- drivers/s390/net/ism_drv.c | 86 +++++++++++++--------------------------------- include/linux/ism.h | 6 ---- 2 files changed, 24 insertions(+), 68 deletions(-) (limited to 'include/linux') diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c index b664e4a08645..54091b7aea16 100644 --- a/drivers/s390/net/ism_drv.c +++ b/drivers/s390/net/ism_drv.c @@ -36,7 +36,7 @@ static const struct smcd_ops ism_ops; static struct ism_client *clients[MAX_CLIENTS]; /* use an array rather than */ /* a list for fast mapping */ static u8 max_client; -static DEFINE_SPINLOCK(clients_lock); +static DEFINE_MUTEX(clients_lock); struct ism_dev_list { struct list_head list; struct mutex mutex; /* protects ism device list */ @@ -59,11 +59,10 @@ static void ism_setup_forwarding(struct ism_client *client, struct ism_dev *ism) int ism_register_client(struct ism_client *client) { struct ism_dev *ism; - unsigned long flags; int i, rc = -ENOSPC; mutex_lock(&ism_dev_list.mutex); - spin_lock_irqsave(&clients_lock, flags); + mutex_lock(&clients_lock); for (i = 0; i < MAX_CLIENTS; ++i) { if (!clients[i]) { clients[i] = client; @@ -74,7 +73,8 @@ int ism_register_client(struct ism_client *client) break; } } - spin_unlock_irqrestore(&clients_lock, flags); + mutex_unlock(&clients_lock); + if (i < MAX_CLIENTS) { /* initialize with all devices that we got so far */ list_for_each_entry(ism, &ism_dev_list.list, list) { @@ -96,11 +96,11 @@ int ism_unregister_client(struct ism_client *client) int rc = 0; mutex_lock(&ism_dev_list.mutex); - spin_lock_irqsave(&clients_lock, flags); + mutex_lock(&clients_lock); clients[client->id] = NULL; if (client->id + 1 == max_client) max_client--; - spin_unlock_irqrestore(&clients_lock, flags); + mutex_unlock(&clients_lock); list_for_each_entry(ism, &ism_dev_list.list, list) { spin_lock_irqsave(&ism->lock, flags); /* Stop forwarding IRQs and events */ @@ -571,21 +571,9 @@ static u64 ism_get_local_gid(struct ism_dev *ism) return ism->local_gid; } -static void ism_dev_add_work_func(struct work_struct *work) -{ - struct ism_client *client = container_of(work, struct ism_client, - add_work); - - client->add(client->tgt_ism); - ism_setup_forwarding(client, client->tgt_ism); - atomic_dec(&client->tgt_ism->add_dev_cnt); - wake_up(&client->tgt_ism->waitq); -} - static int ism_dev_init(struct ism_dev *ism) { struct pci_dev *pdev = ism->pdev; - unsigned long flags; int i, ret; ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI); @@ -618,25 +606,16 @@ static int ism_dev_init(struct ism_dev *ism) /* hardware is V2 capable */ ism_create_system_eid(); - init_waitqueue_head(&ism->waitq); - atomic_set(&ism->free_clients_cnt, 0); - atomic_set(&ism->add_dev_cnt, 0); - - wait_event(ism->waitq, !atomic_read(&ism->add_dev_cnt)); - spin_lock_irqsave(&clients_lock, flags); - for (i = 0; i < max_client; ++i) + mutex_lock(&ism_dev_list.mutex); + mutex_lock(&clients_lock); + for (i = 0; i < max_client; ++i) { if (clients[i]) { - INIT_WORK(&clients[i]->add_work, - ism_dev_add_work_func); - clients[i]->tgt_ism = ism; - atomic_inc(&ism->add_dev_cnt); - schedule_work(&clients[i]->add_work); + clients[i]->add(ism); + ism_setup_forwarding(clients[i], ism); } - spin_unlock_irqrestore(&clients_lock, flags); - - wait_event(ism->waitq, !atomic_read(&ism->add_dev_cnt)); + } + mutex_unlock(&clients_lock); - mutex_lock(&ism_dev_list.mutex); list_add(&ism->list, &ism_dev_list.list); mutex_unlock(&ism_dev_list.mutex); @@ -711,40 +690,24 @@ err_dev: return ret; } -static void ism_dev_remove_work_func(struct work_struct *work) -{ - struct ism_client *client = container_of(work, struct ism_client, - remove_work); - unsigned long flags; - - spin_lock_irqsave(&client->tgt_ism->lock, flags); - client->tgt_ism->subs[client->id] = NULL; - spin_unlock_irqrestore(&client->tgt_ism->lock, flags); - client->remove(client->tgt_ism); - atomic_dec(&client->tgt_ism->free_clients_cnt); - wake_up(&client->tgt_ism->waitq); -} - -/* Callers must hold ism_dev_list.mutex */ static void ism_dev_exit(struct ism_dev *ism) { struct pci_dev *pdev = ism->pdev; unsigned long flags; int i; - wait_event(ism->waitq, !atomic_read(&ism->free_clients_cnt)); - spin_lock_irqsave(&clients_lock, flags); + spin_lock_irqsave(&ism->lock, flags); for (i = 0; i < max_client; ++i) - if (clients[i]) { - INIT_WORK(&clients[i]->remove_work, - ism_dev_remove_work_func); - clients[i]->tgt_ism = ism; - atomic_inc(&ism->free_clients_cnt); - schedule_work(&clients[i]->remove_work); - } - spin_unlock_irqrestore(&clients_lock, flags); + ism->subs[i] = NULL; + spin_unlock_irqrestore(&ism->lock, flags); - wait_event(ism->waitq, !atomic_read(&ism->free_clients_cnt)); + mutex_lock(&ism_dev_list.mutex); + mutex_lock(&clients_lock); + for (i = 0; i < max_client; ++i) { + if (clients[i]) + clients[i]->remove(ism); + } + mutex_unlock(&clients_lock); if (SYSTEM_EID.serial_number[0] != '0' || SYSTEM_EID.type[0] != '0') @@ -755,15 +718,14 @@ static void ism_dev_exit(struct ism_dev *ism) kfree(ism->sba_client_arr); pci_free_irq_vectors(pdev); list_del_init(&ism->list); + mutex_unlock(&ism_dev_list.mutex); } static void ism_remove(struct pci_dev *pdev) { struct ism_dev *ism = dev_get_drvdata(&pdev->dev); - mutex_lock(&ism_dev_list.mutex); ism_dev_exit(ism); - mutex_unlock(&ism_dev_list.mutex); pci_release_mem_regions(pdev); pci_disable_device(pdev); diff --git a/include/linux/ism.h b/include/linux/ism.h index 5160d47e5ea9..9a4c204df3da 100644 --- a/include/linux/ism.h +++ b/include/linux/ism.h @@ -45,9 +45,6 @@ struct ism_dev { int ieq_idx; struct ism_client *subs[MAX_CLIENTS]; - atomic_t free_clients_cnt; - atomic_t add_dev_cnt; - wait_queue_head_t waitq; }; struct ism_event { @@ -69,9 +66,6 @@ struct ism_client { */ void (*handle_irq)(struct ism_dev *dev, unsigned int bit, u16 dmbemask); /* Private area - don't touch! */ - struct work_struct remove_work; - struct work_struct add_work; - struct ism_dev *tgt_ism; u8 id; }; -- cgit v1.2.3 From aff037078ecaecf34a7c2afab1341815f90fba5e Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 29 Jun 2023 17:56:12 -0700 Subject: sched/psi: use kernfs polling functions for PSI trigger polling Destroying psi trigger in cgroup_file_release causes UAF issues when a cgroup is removed from under a polling process. This is happening because cgroup removal causes a call to cgroup_file_release while the actual file is still alive. Destroying the trigger at this point would also destroy its waitqueue head and if there is still a polling process on that file accessing the waitqueue, it will step on the freed pointer: do_select vfs_poll do_rmdir cgroup_rmdir kernfs_drain_open_files cgroup_file_release cgroup_pressure_release psi_trigger_destroy wake_up_pollfree(&t->event_wait) // vfs_poll is unblocked synchronize_rcu kfree(t) poll_freewait -> UAF access to the trigger's waitqueue head Patch [1] fixed this issue for epoll() case using wake_up_pollfree(), however the same issue exists for synchronous poll() case. The root cause of this issue is that the lifecycles of the psi trigger's waitqueue and of the file associated with the trigger are different. Fix this by using kernfs_generic_poll function when polling on cgroup-specific psi triggers. It internally uses kernfs_open_node->poll waitqueue head with its lifecycle tied to the file's lifecycle. This also renders the fix in [1] obsolete, so revert it. [1] commit c2dbe32d5db5 ("sched/psi: Fix use-after-free in ep_remove_wait_queue()") Fixes: 0e94682b73bf ("psi: introduce psi monitor") Closes: https://lore.kernel.org/all/20230613062306.101831-1-lujialin4@huawei.com/ Reported-by: Lu Jialin Signed-off-by: Suren Baghdasaryan Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20230630005612.1014540-1-surenb@google.com --- include/linux/psi.h | 5 +++-- include/linux/psi_types.h | 3 +++ kernel/cgroup/cgroup.c | 2 +- kernel/sched/psi.c | 29 +++++++++++++++++++++-------- 4 files changed, 28 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/psi.h b/include/linux/psi.h index ab26200c2803..e0745873e3f2 100644 --- a/include/linux/psi.h +++ b/include/linux/psi.h @@ -23,8 +23,9 @@ void psi_memstall_enter(unsigned long *flags); void psi_memstall_leave(unsigned long *flags); int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res); -struct psi_trigger *psi_trigger_create(struct psi_group *group, - char *buf, enum psi_res res, struct file *file); +struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf, + enum psi_res res, struct file *file, + struct kernfs_open_file *of); void psi_trigger_destroy(struct psi_trigger *t); __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file, diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h index 040c089581c6..f1fd3a8044e0 100644 --- a/include/linux/psi_types.h +++ b/include/linux/psi_types.h @@ -137,6 +137,9 @@ struct psi_trigger { /* Wait queue for polling */ wait_queue_head_t event_wait; + /* Kernfs file for cgroup triggers */ + struct kernfs_open_file *of; + /* Pending event flag */ int event; diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index bfe3cd8ccf36..f55a40db065f 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -3730,7 +3730,7 @@ static ssize_t pressure_write(struct kernfs_open_file *of, char *buf, } psi = cgroup_psi(cgrp); - new = psi_trigger_create(psi, buf, res, of->file); + new = psi_trigger_create(psi, buf, res, of->file, of); if (IS_ERR(new)) { cgroup_put(cgrp); return PTR_ERR(new); diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 81fca77397f6..9bb3f2b3ccfc 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -493,8 +493,12 @@ static u64 update_triggers(struct psi_group *group, u64 now, bool *update_total, continue; /* Generate an event */ - if (cmpxchg(&t->event, 0, 1) == 0) - wake_up_interruptible(&t->event_wait); + if (cmpxchg(&t->event, 0, 1) == 0) { + if (t->of) + kernfs_notify(t->of->kn); + else + wake_up_interruptible(&t->event_wait); + } t->last_event_time = now; /* Reset threshold breach flag once event got generated */ t->pending_event = false; @@ -1271,8 +1275,9 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res) return 0; } -struct psi_trigger *psi_trigger_create(struct psi_group *group, - char *buf, enum psi_res res, struct file *file) +struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf, + enum psi_res res, struct file *file, + struct kernfs_open_file *of) { struct psi_trigger *t; enum psi_states state; @@ -1331,7 +1336,9 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group, t->event = 0; t->last_event_time = 0; - init_waitqueue_head(&t->event_wait); + t->of = of; + if (!of) + init_waitqueue_head(&t->event_wait); t->pending_event = false; t->aggregator = privileged ? PSI_POLL : PSI_AVGS; @@ -1388,7 +1395,10 @@ void psi_trigger_destroy(struct psi_trigger *t) * being accessed later. Can happen if cgroup is deleted from under a * polling process. */ - wake_up_pollfree(&t->event_wait); + if (t->of) + kernfs_notify(t->of->kn); + else + wake_up_interruptible(&t->event_wait); if (t->aggregator == PSI_AVGS) { mutex_lock(&group->avgs_lock); @@ -1465,7 +1475,10 @@ __poll_t psi_trigger_poll(void **trigger_ptr, if (!t) return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI; - poll_wait(file, &t->event_wait, wait); + if (t->of) + kernfs_generic_poll(t->of, wait); + else + poll_wait(file, &t->event_wait, wait); if (cmpxchg(&t->event, 1, 0) == 1) ret |= EPOLLPRI; @@ -1535,7 +1548,7 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf, return -EBUSY; } - new = psi_trigger_create(&psi_system, buf, res, file); + new = psi_trigger_create(&psi_system, buf, res, file, NULL); if (IS_ERR(new)) { mutex_unlock(&seq->lock); return PTR_ERR(new); -- cgit v1.2.3 From b938e6603660652dc3db66d3c915fbfed3bce21d Mon Sep 17 00:00:00 2001 From: Ankit Kumar Date: Fri, 23 Jun 2023 18:08:05 +0530 Subject: nvme: fix the NVME_ID_NS_NVM_STS_MASK definition As per NVMe command set specification 1.0c Storage tag size is 7 bits. Fixes: 4020aad85c67 ("nvme: add support for enhanced metadata") Signed-off-by: Ankit Kumar Reviewed-by: Kanchan Joshi Signed-off-by: Keith Busch --- include/linux/nvme.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 182b6d614eb1..26dd3f859d9d 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -473,7 +473,7 @@ struct nvme_id_ns_nvm { }; enum { - NVME_ID_NS_NVM_STS_MASK = 0x3f, + NVME_ID_NS_NVM_STS_MASK = 0x7f, NVME_ID_NS_NVM_GUARD_SHIFT = 7, NVME_ID_NS_NVM_GUARD_MASK = 0x3, }; -- cgit v1.2.3 From dc8cbb65dc17b0daebca84375d35ce54ff730762 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 10 Jul 2023 10:23:05 -0600 Subject: block: remove dead struc request->completion_data field It's no longer used. While in there, also update the comment as to why it can coexist with the rb_node. Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 2b7fb8e87793..b96e00499f9e 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -158,13 +158,13 @@ struct request { /* * The rb_node is only used inside the io scheduler, requests - * are pruned when moved to the dispatch queue. So let the - * completion_data share space with the rb_node. + * are pruned when moved to the dispatch queue. special_vec must + * only be used if RQF_SPECIAL_PAYLOAD is set, and those cannot be + * insert into an IO scheduler. */ union { struct rb_node rb_node; /* sort/lookup */ struct bio_vec special_vec; - void *completion_data; }; /* -- cgit v1.2.3 From 195b9cb5b288fec1c871ef89f78cc9a7461aad3a Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Fri, 7 Jul 2023 23:03:19 +0900 Subject: fprobe: Ensure running fprobe_exit_handler() finished before calling rethook_free() Ensure running fprobe_exit_handler() has finished before calling rethook_free() in the unregister_fprobe() so that caller can free the fprobe right after unregister_fprobe(). unregister_fprobe() ensured that all running fprobe_entry/exit_handler() have finished by calling unregister_ftrace_function() which synchronizes RCU. But commit 5f81018753df ("fprobe: Release rethook after the ftrace_ops is unregistered") changed to call rethook_free() after unregister_ftrace_function(). So call rethook_stop() to make rethook disabled before unregister_ftrace_function() and ensure it again. Here is the possible code flow that can call the exit handler after unregister_fprobe(). ------ CPU1 CPU2 call unregister_fprobe(fp) ... __fprobe_handler() rethook_hook() on probed function unregister_ftrace_function() return from probed function rethook hooks find rh->handler == fprobe_exit_handler call fprobe_exit_handler() rethook_free(): set rh->handler = NULL; return from unreigster_fprobe; call fp->exit_handler() <- (*) ------ (*) At this point, the exit handler is called after returning from unregister_fprobe(). This fixes it as following; ------ CPU1 CPU2 call unregister_fprobe() ... rethook_stop(): set rh->handler = NULL; __fprobe_handler() rethook_hook() on probed function unregister_ftrace_function() return from probed function rethook hooks find rh->handler == NULL return from rethook rethook_free() return from unreigster_fprobe; ------ Link: https://lore.kernel.org/all/168873859949.156157.13039240432299335849.stgit@devnote2/ Fixes: 5f81018753df ("fprobe: Release rethook after the ftrace_ops is unregistered") Cc: stable@vger.kernel.org Signed-off-by: Masami Hiramatsu (Google) Reviewed-by: Steven Rostedt (Google) --- include/linux/rethook.h | 1 + kernel/trace/fprobe.c | 3 +++ kernel/trace/rethook.c | 13 +++++++++++++ 3 files changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rethook.h b/include/linux/rethook.h index c8ac1e5afcd1..bdbe6717f45a 100644 --- a/include/linux/rethook.h +++ b/include/linux/rethook.h @@ -59,6 +59,7 @@ struct rethook_node { }; struct rethook *rethook_alloc(void *data, rethook_handler_t handler); +void rethook_stop(struct rethook *rh); void rethook_free(struct rethook *rh); void rethook_add_node(struct rethook *rh, struct rethook_node *node); struct rethook_node *rethook_try_get(struct rethook *rh); diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c index 58e4b3607aef..2571f7f3d5f2 100644 --- a/kernel/trace/fprobe.c +++ b/kernel/trace/fprobe.c @@ -366,6 +366,9 @@ int unregister_fprobe(struct fprobe *fp) fp->ops.saved_func != fprobe_kprobe_handler)) return -EINVAL; + if (fp->rethook) + rethook_stop(fp->rethook); + ret = unregister_ftrace_function(&fp->ops); if (ret < 0) return ret; diff --git a/kernel/trace/rethook.c b/kernel/trace/rethook.c index 60f6cb2b486b..468006cce7ca 100644 --- a/kernel/trace/rethook.c +++ b/kernel/trace/rethook.c @@ -53,6 +53,19 @@ static void rethook_free_rcu(struct rcu_head *head) kfree(rh); } +/** + * rethook_stop() - Stop using a rethook. + * @rh: the struct rethook to stop. + * + * Stop using a rethook to prepare for freeing it. If you want to wait for + * all running rethook handler before calling rethook_free(), you need to + * call this first and wait RCU, and call rethook_free(). + */ +void rethook_stop(struct rethook *rh) +{ + WRITE_ONCE(rh->handler, NULL); +} + /** * rethook_free() - Free struct rethook. * @rh: the struct rethook to be freed. -- cgit v1.2.3 From 7d8b31b73c79835572611ed1eed649e4d2e14245 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 17 May 2023 14:51:48 +0200 Subject: tracing: arm64: Avoid missing-prototype warnings These are all tracing W=1 warnings in arm64 allmodconfig about missing prototypes: kernel/trace/trace_kprobe_selftest.c:7:5: error: no previous prototype for 'kprobe_trace_selftest_target' [-Werror=missing-pro totypes] kernel/trace/ftrace.c:329:5: error: no previous prototype for '__register_ftrace_function' [-Werror=missing-prototypes] kernel/trace/ftrace.c:372:5: error: no previous prototype for '__unregister_ftrace_function' [-Werror=missing-prototypes] kernel/trace/ftrace.c:4130:15: error: no previous prototype for 'arch_ftrace_match_adjust' [-Werror=missing-prototypes] kernel/trace/fgraph.c:243:15: error: no previous prototype for 'ftrace_return_to_handler' [-Werror=missing-prototypes] kernel/trace/fgraph.c:358:6: error: no previous prototype for 'ftrace_graph_sleep_time_control' [-Werror=missing-prototypes] arch/arm64/kernel/ftrace.c:460:6: error: no previous prototype for 'prepare_ftrace_return' [-Werror=missing-prototypes] arch/arm64/kernel/ptrace.c:2172:5: error: no previous prototype for 'syscall_trace_enter' [-Werror=missing-prototypes] arch/arm64/kernel/ptrace.c:2195:6: error: no previous prototype for 'syscall_trace_exit' [-Werror=missing-prototypes] Move the declarations to an appropriate header where they can be seen by the caller and callee, and make sure the headers are included where needed. Link: https://lore.kernel.org/linux-trace-kernel/20230517125215.930689-1-arnd@kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Will Deacon Cc: Kees Cook Cc: Florent Revest Signed-off-by: Arnd Bergmann Acked-by: Catalin Marinas [ Fixed ftrace_return_to_handler() to handle CONFIG_HAVE_FUNCTION_GRAPH_RETVAL case ] Signed-off-by: Steven Rostedt (Google) --- arch/arm64/include/asm/ftrace.h | 4 ++++ arch/arm64/include/asm/syscall.h | 3 +++ arch/arm64/kernel/syscall.c | 3 --- include/linux/ftrace.h | 9 +++++++++ kernel/trace/fgraph.c | 1 + kernel/trace/ftrace_internal.h | 5 +++-- kernel/trace/trace_kprobe_selftest.c | 3 +++ 7 files changed, 23 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index 21ac1c5c71d3..ab158196480c 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -211,6 +211,10 @@ static inline unsigned long fgraph_ret_regs_frame_pointer(struct fgraph_ret_regs { return ret_regs->fp; } + +void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, + unsigned long frame_pointer); + #endif /* ifdef CONFIG_FUNCTION_GRAPH_TRACER */ #endif diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h index 4cfe9b49709b..ab8e14b96f68 100644 --- a/arch/arm64/include/asm/syscall.h +++ b/arch/arm64/include/asm/syscall.h @@ -85,4 +85,7 @@ static inline int syscall_get_arch(struct task_struct *task) return AUDIT_ARCH_AARCH64; } +int syscall_trace_enter(struct pt_regs *regs); +void syscall_trace_exit(struct pt_regs *regs); + #endif /* __ASM_SYSCALL_H */ diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 5a668d7f3c1f..b1ae2f2eaf77 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -75,9 +75,6 @@ static inline bool has_syscall_work(unsigned long flags) return unlikely(flags & _TIF_SYSCALL_WORK); } -int syscall_trace_enter(struct pt_regs *regs); -void syscall_trace_exit(struct pt_regs *regs); - static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, const syscall_fn_t syscall_table[]) { diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 8e59bd954153..ce156c7704ee 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -41,6 +41,15 @@ struct ftrace_ops; struct ftrace_regs; struct dyn_ftrace; +char *arch_ftrace_match_adjust(char *str, const char *search); + +#ifdef CONFIG_HAVE_FUNCTION_GRAPH_RETVAL +struct fgraph_ret_regs; +unsigned long ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs); +#else +unsigned long ftrace_return_to_handler(unsigned long frame_pointer); +#endif + #ifdef CONFIG_FUNCTION_TRACER /* * If the arch's mcount caller does not support all of ftrace's diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index cd2c35b1dd8f..c83c005e654e 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -15,6 +15,7 @@ #include #include "ftrace_internal.h" +#include "trace.h" #ifdef CONFIG_DYNAMIC_FTRACE #define ASSIGN_OPS_HASH(opsname, val) \ diff --git a/kernel/trace/ftrace_internal.h b/kernel/trace/ftrace_internal.h index 382775edf690..5012c04f92c0 100644 --- a/kernel/trace/ftrace_internal.h +++ b/kernel/trace/ftrace_internal.h @@ -2,6 +2,9 @@ #ifndef _LINUX_KERNEL_FTRACE_INTERNAL_H #define _LINUX_KERNEL_FTRACE_INTERNAL_H +int __register_ftrace_function(struct ftrace_ops *ops); +int __unregister_ftrace_function(struct ftrace_ops *ops); + #ifdef CONFIG_FUNCTION_TRACER extern struct mutex ftrace_lock; @@ -15,8 +18,6 @@ int ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs); #else /* !CONFIG_DYNAMIC_FTRACE */ -int __register_ftrace_function(struct ftrace_ops *ops); -int __unregister_ftrace_function(struct ftrace_ops *ops); /* Keep as macros so we do not need to define the commands */ # define ftrace_startup(ops, command) \ ({ \ diff --git a/kernel/trace/trace_kprobe_selftest.c b/kernel/trace/trace_kprobe_selftest.c index 16548ee4c8c6..3851cd1e6a62 100644 --- a/kernel/trace/trace_kprobe_selftest.c +++ b/kernel/trace/trace_kprobe_selftest.c @@ -1,4 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 + +#include "trace_kprobe_selftest.h" + /* * Function used during the kprobe self test. This function is in a separate * compile unit so it can be compile with CC_FLAGS_FTRACE to ensure that it -- cgit v1.2.3