From 066e053fe208a3b83ee89dc5a192146add688861 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 12 Mar 2025 08:38:47 +0100 Subject: fsnotify: add pre-content hooks on mmap() Pre-content hooks in page faults introduces potential deadlock of HSM handler in userspace with filesystem freezing. The requirement with pre-content event is that for every accessed file range an event covering at least this range will be generated at least once before the file data is accesses. In preparation to disabling pre-content event hooks on page faults, add pre-content hooks at mmap() variants for the entire mmaped range, so HSM can fill content when user requests to map a portion of the file. Note that exec() variant also calls vm_mmap_pgoff() internally to map code sections, so pre-content hooks are also generated in this case. Link: https://lore.kernel.org/linux-fsdevel/7ehxrhbvehlrjwvrduoxsao5k3x4aw275patsb3krkwuq573yv@o2hskrfawbnc/ Suggested-by: Josef Bacik Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/20250312073852.2123409-2-amir73il@gmail.com --- include/linux/fsnotify.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include') diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 6a33288bd6a1..83d3ac97f826 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -170,6 +170,21 @@ static inline int fsnotify_file_area_perm(struct file *file, int perm_mask, return fsnotify_path(&file->f_path, FS_ACCESS_PERM); } +/* + * fsnotify_mmap_perm - permission hook before mmap of file range + */ +static inline int fsnotify_mmap_perm(struct file *file, int prot, + const loff_t off, size_t len) +{ + /* + * mmap() generates only pre-content events. + */ + if (!file || likely(!FMODE_FSNOTIFY_HSM(file->f_mode))) + return 0; + + return fsnotify_pre_content(&file->f_path, &off, len); +} + /* * fsnotify_truncate_perm - permission hook before file truncate */ @@ -223,6 +238,12 @@ static inline int fsnotify_file_area_perm(struct file *file, int perm_mask, return 0; } +static inline int fsnotify_mmap_perm(struct file *file, int prot, + const loff_t off, size_t len) +{ + return 0; +} + static inline int fsnotify_truncate_perm(const struct path *path, loff_t length) { return 0; -- cgit v1.2.3 From 955fbe0ef19df4197595a98d0906c94025c4beef Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 12 Mar 2025 08:38:50 +0100 Subject: Revert "fsnotify: generate pre-content permission event on page fault" This reverts commit 8392bc2ff8c8bf7c4c5e6dfa71ccd893a3c046f6. In the use case of buffered write whose input buffer is mmapped file on a filesystem with a pre-content mark, the prefaulting of the buffer can happen under the filesystem freeze protection (obtained in vfs_write()) which breaks assumptions of pre-content hook and introduces potential deadlock of HSM handler in userspace with filesystem freezing. Now that we have pre-content hooks at file mmap() time, disable the pre-content event hooks on page fault to avoid the potential deadlock. Reported-by: syzbot+7229071b47908b19d5b7@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-fsdevel/7ehxrhbvehlrjwvrduoxsao5k3x4aw275patsb3krkwuq573yv@o2hskrfawbnc/ Fixes: 8392bc2ff8c8 ("fsnotify: generate pre-content permission event on page fault") Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/20250312073852.2123409-5-amir73il@gmail.com --- include/linux/mm.h | 1 - mm/filemap.c | 74 ------------------------------------------------------ mm/nommu.c | 7 ------ 3 files changed, 82 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 7b1068ddcbb7..8483e09aeb2c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3420,7 +3420,6 @@ extern vm_fault_t filemap_fault(struct vm_fault *vmf); extern vm_fault_t filemap_map_pages(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff); extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf); -extern vm_fault_t filemap_fsnotify_fault(struct vm_fault *vmf); extern unsigned long stack_guard_gap; /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */ diff --git a/mm/filemap.c b/mm/filemap.c index 2974691fdfad..ff5fcdd96136 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -47,7 +47,6 @@ #include #include #include -#include #include #include #include "internal.h" @@ -3336,48 +3335,6 @@ static vm_fault_t filemap_fault_recheck_pte_none(struct vm_fault *vmf) return ret; } -/** - * filemap_fsnotify_fault - maybe emit a pre-content event. - * @vmf: struct vm_fault containing details of the fault. - * - * If we have a pre-content watch on this file we will emit an event for this - * range. If we return anything the fault caller should return immediately, we - * will return VM_FAULT_RETRY if we had to emit an event, which will trigger the - * fault again and then the fault handler will run the second time through. - * - * Return: a bitwise-OR of %VM_FAULT_ codes, 0 if nothing happened. - */ -vm_fault_t filemap_fsnotify_fault(struct vm_fault *vmf) -{ - struct file *fpin = NULL; - int mask = (vmf->flags & FAULT_FLAG_WRITE) ? MAY_WRITE : MAY_ACCESS; - loff_t pos = vmf->pgoff >> PAGE_SHIFT; - size_t count = PAGE_SIZE; - int err; - - /* - * We already did this and now we're retrying with everything locked, - * don't emit the event and continue. - */ - if (vmf->flags & FAULT_FLAG_TRIED) - return 0; - - /* No watches, we're done. */ - if (likely(!FMODE_FSNOTIFY_HSM(vmf->vma->vm_file->f_mode))) - return 0; - - fpin = maybe_unlock_mmap_for_io(vmf, fpin); - if (!fpin) - return VM_FAULT_SIGBUS; - - err = fsnotify_file_area_perm(fpin, mask, &pos, count); - fput(fpin); - if (err) - return VM_FAULT_SIGBUS; - return VM_FAULT_RETRY; -} -EXPORT_SYMBOL_GPL(filemap_fsnotify_fault); - /** * filemap_fault - read in file data for page fault handling * @vmf: struct vm_fault containing details of the fault @@ -3481,37 +3438,6 @@ retry_find: * or because readahead was otherwise unable to retrieve it. */ if (unlikely(!folio_test_uptodate(folio))) { - /* - * If this is a precontent file we have can now emit an event to - * try and populate the folio. - */ - if (!(vmf->flags & FAULT_FLAG_TRIED) && - unlikely(FMODE_FSNOTIFY_HSM(file->f_mode))) { - loff_t pos = folio_pos(folio); - size_t count = folio_size(folio); - - /* We're NOWAIT, we have to retry. */ - if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT) { - folio_unlock(folio); - goto out_retry; - } - - if (mapping_locked) - filemap_invalidate_unlock_shared(mapping); - mapping_locked = false; - - folio_unlock(folio); - fpin = maybe_unlock_mmap_for_io(vmf, fpin); - if (!fpin) - goto out_retry; - - error = fsnotify_file_area_perm(fpin, MAY_ACCESS, &pos, - count); - if (error) - ret = VM_FAULT_SIGBUS; - goto out_retry; - } - /* * If the invalidate lock is not held, the folio was in cache * and uptodate and now it is not. Strange but possible since we diff --git a/mm/nommu.c b/mm/nommu.c index baa79abdaf03..9cb6e99215e2 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1613,13 +1613,6 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, } EXPORT_SYMBOL(remap_vmalloc_range); -vm_fault_t filemap_fsnotify_fault(struct vm_fault *vmf) -{ - BUG(); - return 0; -} -EXPORT_SYMBOL_GPL(filemap_fsnotify_fault); - vm_fault_t filemap_fault(struct vm_fault *vmf) { BUG(); -- cgit v1.2.3