diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-04-24 03:08:04 +0300 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-04-24 03:08:04 +0300 |
| commit | dd6c438c3e64a5ff0b5d7e78f7f9be547803ef1b (patch) | |
| tree | c96be83061cb29d3e1d7cb6667440441e265e226 | |
| parent | bd1886d6e4ca6b84041d17ba6e11d0f85f7ee1a4 (diff) | |
| parent | ac8777cc36224b4705d2c6efb10c56135d479b21 (diff) | |
| download | linux-master.tar.xz | |
Pull vfs fixes from Christian Brauner:
- eventpoll: fix ep_remove() UAF and follow-up cleanup
- fs: aio: set VMA_DONTCOPY_BIT in mmap to fix NULL-pointer-dereference
error
- writeback: Fix use after free in inode_switch_wbs_work_fn()
- fuse: reject oversized dirents in page cache
- fs: aio: reject partial mremap to avoid Null-pointer-dereference
error
- nstree: fix func. parameter kernel-doc warnings
- fs: Handle multiply claimed blocks more gracefully with mmb
* tag 'vfs-7.1-rc1.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
eventpoll: drop vestigial epi->dying flag
eventpoll: drop dead bool return from ep_remove_epi()
eventpoll: refresh eventpoll_release() fast-path comment
eventpoll: move f_lock acquisition into ep_remove_file()
eventpoll: fix ep_remove struct eventpoll / struct file UAF
eventpoll: move epi_fget() up
eventpoll: rename ep_remove_safe() back to ep_remove()
eventpoll: drop vestigial __ prefix from ep_remove_{file,epi}()
eventpoll: kill __ep_remove()
eventpoll: split __ep_remove()
eventpoll: use hlist_is_singular_node() in __ep_remove()
fs: Handle multiply claimed blocks more gracefully with mmb
nstree: fix func. parameter kernel-doc warnings
fs: aio: reject partial mremap to avoid Null-pointer-dereference error
fuse: reject oversized dirents in page cache
writeback: Fix use after free in inode_switch_wbs_work_fn()
fs: aio: set VMA_DONTCOPY_BIT in mmap to fix NULL-pointer-dereference error
| -rw-r--r-- | fs/aio.c | 5 | ||||
| -rw-r--r-- | fs/buffer.c | 9 | ||||
| -rw-r--r-- | fs/eventpoll.c | 158 | ||||
| -rw-r--r-- | fs/fs-writeback.c | 36 | ||||
| -rw-r--r-- | fs/fuse/readdir.c | 4 | ||||
| -rw-r--r-- | include/linux/eventpoll.h | 16 | ||||
| -rw-r--r-- | include/linux/nstree.h | 6 |
7 files changed, 125 insertions, 109 deletions
@@ -422,7 +422,8 @@ static int aio_ring_mremap(struct vm_area_struct *vma) ctx = rcu_dereference(table->table[i]); if (ctx && ctx->aio_ring_file == file) { - if (!atomic_read(&ctx->dead)) { + if (!atomic_read(&ctx->dead) && + (ctx->mmap_size == (vma->vm_end - vma->vm_start))) { ctx->user_id = ctx->mmap_base = vma->vm_start; res = 0; } @@ -447,7 +448,7 @@ static const struct vm_operations_struct aio_ring_vm_ops = { static int aio_ring_mmap_prepare(struct vm_area_desc *desc) { - vma_desc_set_flags(desc, VMA_DONTEXPAND_BIT); + vma_desc_set_flags(desc, VMA_DONTEXPAND_BIT, VMA_DONTCOPY_BIT); desc->vm_ops = &aio_ring_vm_ops; return 0; } diff --git a/fs/buffer.c b/fs/buffer.c index d6e062c42a8d..b0b3792b1496 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -719,8 +719,15 @@ void mmb_mark_buffer_dirty(struct buffer_head *bh, mark_buffer_dirty(bh); if (!bh->b_mmb) { spin_lock(&mmb->lock); + /* + * For a corrupted filesystem with multiply claimed blocks this + * can fail. Avoid corrupting the linked list in that case. + */ + if (cmpxchg(&bh->b_mmb, NULL, mmb) != NULL) { + spin_unlock(&mmb->lock); + return; + } list_move_tail(&bh->b_assoc_buffers, &mmb->list); - bh->b_mmb = mmb; spin_unlock(&mmb->lock); } } diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 23f3c6ac0bad..a3090b446af1 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -148,13 +148,6 @@ struct epitem { /* The file descriptor information this item refers to */ struct epoll_filefd ffd; - /* - * Protected by file->f_lock, true for to-be-released epitem already - * removed from the "struct file" items list; together with - * eventpoll->refcount orchestrates "struct eventpoll" disposal - */ - bool dying; - /* List containing poll wait queues */ struct eppoll_entry *pwqlist; @@ -220,10 +213,7 @@ struct eventpoll { struct hlist_head refs; u8 loop_check_depth; - /* - * usage count, used together with epitem->dying to - * orchestrate the disposal of this struct - */ + /* usage count, orchestrates "struct eventpoll" disposal */ refcount_t refcount; /* used to defer freeing past ep_get_upwards_depth_proc() RCU walk */ @@ -827,36 +817,47 @@ static void ep_free(struct eventpoll *ep) } /* - * Removes a "struct epitem" from the eventpoll RB tree and deallocates - * all the associated resources. Must be called with "mtx" held. - * If the dying flag is set, do the removal only if force is true. - * This prevents ep_clear_and_put() from dropping all the ep references - * while running concurrently with eventpoll_release_file(). - * Returns true if the eventpoll can be disposed. + * The ffd.file pointer may be in the process of being torn down due to + * being closed, but we may not have finished eventpoll_release() yet. + * + * Normally, even with the atomic_long_inc_not_zero, the file may have + * been free'd and then gotten re-allocated to something else (since + * files are not RCU-delayed, they are SLAB_TYPESAFE_BY_RCU). + * + * But for epoll, users hold the ep->mtx mutex, and as such any file in + * the process of being free'd will block in eventpoll_release_file() + * and thus the underlying file allocation will not be free'd, and the + * file re-use cannot happen. + * + * For the same reason we can avoid a rcu_read_lock() around the + * operation - 'ffd.file' cannot go away even if the refcount has + * reached zero (but we must still not call out to ->poll() functions + * etc). */ -static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force) +static struct file *epi_fget(const struct epitem *epi) { - struct file *file = epi->ffd.file; - struct epitems_head *to_free; - struct hlist_head *head; + struct file *file; - lockdep_assert_irqs_enabled(); + file = epi->ffd.file; + if (!file_ref_get(&file->f_ref)) + file = NULL; + return file; +} - /* - * Removes poll wait queue hooks. - */ - ep_unregister_pollwait(ep, epi); +/* + * Takes &file->f_lock; returns with it released. + */ +static void ep_remove_file(struct eventpoll *ep, struct epitem *epi, + struct file *file) +{ + struct epitems_head *to_free = NULL; + struct hlist_head *head; - /* Remove the current item from the list of epoll hooks */ - spin_lock(&file->f_lock); - if (epi->dying && !force) { - spin_unlock(&file->f_lock); - return false; - } + lockdep_assert_held(&ep->mtx); - to_free = NULL; + spin_lock(&file->f_lock); head = file->f_ep; - if (head->first == &epi->fllink && !epi->fllink.next) { + if (hlist_is_singular_node(&epi->fllink, head)) { /* See eventpoll_release() for details. */ WRITE_ONCE(file->f_ep, NULL); if (!is_file_epoll(file)) { @@ -869,6 +870,11 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force) hlist_del_rcu(&epi->fllink); spin_unlock(&file->f_lock); free_ephead(to_free); +} + +static void ep_remove_epi(struct eventpoll *ep, struct epitem *epi) +{ + lockdep_assert_held(&ep->mtx); rb_erase_cached(&epi->rbn, &ep->rbr); @@ -888,16 +894,32 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force) kfree_rcu(epi, rcu); percpu_counter_dec(&ep->user->epoll_watches); - return true; } /* * ep_remove variant for callers owing an additional reference to the ep */ -static void ep_remove_safe(struct eventpoll *ep, struct epitem *epi) +static void ep_remove(struct eventpoll *ep, struct epitem *epi) { - if (__ep_remove(ep, epi, false)) - WARN_ON_ONCE(ep_refcount_dec_and_test(ep)); + struct file *file __free(fput) = NULL; + + lockdep_assert_irqs_enabled(); + lockdep_assert_held(&ep->mtx); + + ep_unregister_pollwait(ep, epi); + + /* + * If we manage to grab a reference it means we're not in + * eventpoll_release_file() and aren't going to be: once @file's + * refcount has reached zero, file_ref_get() cannot bring it back. + */ + file = epi_fget(epi); + if (!file) + return; + + ep_remove_file(ep, epi, file); + ep_remove_epi(ep, epi); + WARN_ON_ONCE(ep_refcount_dec_and_test(ep)); } static void ep_clear_and_put(struct eventpoll *ep) @@ -923,7 +945,7 @@ static void ep_clear_and_put(struct eventpoll *ep) /* * Walks through the whole tree and try to free each "struct epitem". - * Note that ep_remove_safe() will not remove the epitem in case of a + * Note that ep_remove() will not remove the epitem in case of a * racing eventpoll_release_file(); the latter will do the removal. * At this point we are sure no poll callbacks will be lingering around. * Since we still own a reference to the eventpoll struct, the loop can't @@ -932,7 +954,7 @@ static void ep_clear_and_put(struct eventpoll *ep) for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = next) { next = rb_next(rbp); epi = rb_entry(rbp, struct epitem, rbn); - ep_remove_safe(ep, epi); + ep_remove(ep, epi); cond_resched(); } @@ -1013,34 +1035,6 @@ static __poll_t __ep_eventpoll_poll(struct file *file, poll_table *wait, int dep } /* - * The ffd.file pointer may be in the process of being torn down due to - * being closed, but we may not have finished eventpoll_release() yet. - * - * Normally, even with the atomic_long_inc_not_zero, the file may have - * been free'd and then gotten re-allocated to something else (since - * files are not RCU-delayed, they are SLAB_TYPESAFE_BY_RCU). - * - * But for epoll, users hold the ep->mtx mutex, and as such any file in - * the process of being free'd will block in eventpoll_release_file() - * and thus the underlying file allocation will not be free'd, and the - * file re-use cannot happen. - * - * For the same reason we can avoid a rcu_read_lock() around the - * operation - 'ffd.file' cannot go away even if the refcount has - * reached zero (but we must still not call out to ->poll() functions - * etc). - */ -static struct file *epi_fget(const struct epitem *epi) -{ - struct file *file; - - file = epi->ffd.file; - if (!file_ref_get(&file->f_ref)) - file = NULL; - return file; -} - -/* * Differs from ep_eventpoll_poll() in that internal callers already have * the ep->mtx so we need to start from depth=1, such that mutex_lock_nested() * is correctly annotated. @@ -1117,18 +1111,17 @@ void eventpoll_release_file(struct file *file) { struct eventpoll *ep; struct epitem *epi; - bool dispose; /* - * Use the 'dying' flag to prevent a concurrent ep_clear_and_put() from - * touching the epitems list before eventpoll_release_file() can access - * the ep->mtx. + * A concurrent ep_remove() cannot outrace us: it pins @file via + * epi_fget(), which fails once __fput() has dropped the refcount + * to zero -- the path we're on. So any racing ep_remove() bails + * and leaves the epi for us to clean up here. */ again: spin_lock(&file->f_lock); if (file->f_ep && file->f_ep->first) { epi = hlist_entry(file->f_ep->first, struct epitem, fllink); - epi->dying = true; spin_unlock(&file->f_lock); /* @@ -1137,10 +1130,15 @@ again: */ ep = epi->ep; mutex_lock(&ep->mtx); - dispose = __ep_remove(ep, epi, true); + + ep_unregister_pollwait(ep, epi); + + ep_remove_file(ep, epi, file); + ep_remove_epi(ep, epi); + mutex_unlock(&ep->mtx); - if (dispose && ep_refcount_dec_and_test(ep)) + if (ep_refcount_dec_and_test(ep)) ep_free(ep); goto again; } @@ -1619,21 +1617,21 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, mutex_unlock(&tep->mtx); /* - * ep_remove_safe() calls in the later error paths can't lead to + * ep_remove() calls in the later error paths can't lead to * ep_free() as the ep file itself still holds an ep reference. */ ep_get(ep); /* now check if we've created too many backpaths */ if (unlikely(full_check && reverse_path_check())) { - ep_remove_safe(ep, epi); + ep_remove(ep, epi); return -EINVAL; } if (epi->event.events & EPOLLWAKEUP) { error = ep_create_wakeup_source(epi); if (error) { - ep_remove_safe(ep, epi); + ep_remove(ep, epi); return error; } } @@ -1657,7 +1655,7 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, * high memory pressure. */ if (unlikely(!epq.epi)) { - ep_remove_safe(ep, epi); + ep_remove(ep, epi); return -ENOMEM; } @@ -2352,7 +2350,7 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds, * The eventpoll itself is still alive: the refcount * can't go to zero here. */ - ep_remove_safe(ep, epi); + ep_remove(ep, epi); error = 0; } else { error = -ENOENT; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index e1fbdf9ee769..a65694cbfe68 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -568,28 +568,30 @@ void inode_switch_wbs_work_fn(struct work_struct *work) struct inode_switch_wbs_context *isw, *next_isw; struct llist_node *list; + list = llist_del_all(&new_wb->switch_wbs_ctxs); /* - * Grab out reference to wb so that it cannot get freed under us + * Nothing to do? That would be a problem as references held by isw + * items protect wb from freeing... + */ + if (WARN_ON_ONCE(!list)) + return; + + /* + * Grab our reference to wb so that it cannot get freed under us * after we process all the isw items. */ wb_get(new_wb); - while (1) { - list = llist_del_all(&new_wb->switch_wbs_ctxs); - /* Nothing to do? */ - if (!list) - break; - /* - * In addition to synchronizing among switchers, I_WB_SWITCH - * tells the RCU protected stat update paths to grab the i_page - * lock so that stat transfer can synchronize against them. - * Let's continue after I_WB_SWITCH is guaranteed to be - * visible. - */ - synchronize_rcu(); + /* + * In addition to synchronizing among switchers, I_WB_SWITCH + * tells the RCU protected stat update paths to grab the i_page + * lock so that stat transfer can synchronize against them. + * Let's continue after I_WB_SWITCH is guaranteed to be + * visible. + */ + synchronize_rcu(); - llist_for_each_entry_safe(isw, next_isw, list, list) - process_inode_switch_wbs(new_wb, isw); - } + llist_for_each_entry_safe(isw, next_isw, list, list) + process_inode_switch_wbs(new_wb, isw); wb_put(new_wb); } diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c index c88194e52d18..db5ae8ec1030 100644 --- a/fs/fuse/readdir.c +++ b/fs/fuse/readdir.c @@ -41,6 +41,10 @@ static void fuse_add_dirent_to_cache(struct file *file, unsigned int offset; void *addr; + /* Dirent doesn't fit in readdir cache page? Skip caching. */ + if (reclen > PAGE_SIZE) + return; + spin_lock(&fi->rdc.lock); /* * Is cache already completed? Or this entry does not go at the end of diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index ea9ca0e4172a..728fb5dee5ed 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -39,12 +39,16 @@ static inline void eventpoll_release(struct file *file) { /* - * Fast check to avoid the get/release of the semaphore. Since - * we're doing this outside the semaphore lock, it might return - * false negatives, but we don't care. It'll help in 99.99% of cases - * to avoid the semaphore lock. False positives simply cannot happen - * because the file in on the way to be removed and nobody ( but - * eventpoll ) has still a reference to this file. + * Fast check to skip the slow path in the common case where the + * file was never attached to an epoll. Safe without file->f_lock + * because every f_ep writer excludes a concurrent __fput() on + * @file: + * - ep_insert() requires the file alive (refcount > 0); + * - ep_remove() holds @file pinned via epi_fget() across the + * write; + * - eventpoll_release_file() runs from __fput() itself. + * We are in __fput() here, so none of those can race us: a NULL + * observation truly means no epoll path has work left on @file. */ if (likely(!READ_ONCE(file->f_ep))) return; diff --git a/include/linux/nstree.h b/include/linux/nstree.h index 175e4625bfa6..5b64d4572881 100644 --- a/include/linux/nstree.h +++ b/include/linux/nstree.h @@ -61,7 +61,7 @@ static inline void __ns_tree_add(struct ns_common *ns, struct ns_tree_root *ns_t /** * ns_tree_add_raw - Add a namespace to a namespace - * @ns: Namespace to add + * @__ns: Namespace to add * * This function adds a namespace to the appropriate namespace tree * without assigning a id. @@ -70,7 +70,7 @@ static inline void __ns_tree_add(struct ns_common *ns, struct ns_tree_root *ns_t /** * ns_tree_add - Add a namespace to a namespace tree - * @ns: Namespace to add + * @__ns: Namespace to add * * This function assigns a new id to the namespace and adds it to the * appropriate namespace tree and list. @@ -81,7 +81,7 @@ static inline void __ns_tree_add(struct ns_common *ns, struct ns_tree_root *ns_t /** * ns_tree_remove - Remove a namespace from a namespace tree - * @ns: Namespace to remove + * @__ns: Namespace to remove * * This function removes a namespace from the appropriate namespace * tree and list. |
