diff options
author | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2025-05-02 08:59:33 +0300 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2025-05-02 08:59:33 +0300 |
commit | b004c046df92c429202e899f3f117814a5eb02d3 (patch) | |
tree | 1ac2b86f3a22d67776025c40840f522e269558e3 /fs/xfs/xfs_aops.c | |
parent | c04005ef3576893e2db315d9ea2cfa4cf88b2c05 (diff) | |
parent | c72e4daa7ab2814e402f2b67dd1681ea4db446af (diff) | |
download | linux-rolling-lts.tar.xz |
Merge v6.12.26linux-rolling-lts
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'fs/xfs/xfs_aops.c')
-rw-r--r-- | fs/xfs/xfs_aops.c | 41 |
1 files changed, 37 insertions, 4 deletions
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 559a3a577097..ba6b4a180e80 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -19,6 +19,7 @@ #include "xfs_reflink.h" #include "xfs_errortag.h" #include "xfs_error.h" +#include "xfs_icache.h" struct xfs_writepage_ctx { struct iomap_writepage_ctx ctx; @@ -528,12 +529,44 @@ xfs_vm_readahead( } static int -xfs_iomap_swapfile_activate( +xfs_vm_swap_activate( struct swap_info_struct *sis, struct file *swap_file, sector_t *span) { - sis->bdev = xfs_inode_buftarg(XFS_I(file_inode(swap_file)))->bt_bdev; + struct xfs_inode *ip = XFS_I(file_inode(swap_file)); + + /* + * Swap file activation can race against concurrent shared extent + * removal in files that have been cloned. If this happens, + * iomap_swapfile_iter() can fail because it encountered a shared + * extent even though an operation is in progress to remove those + * shared extents. + * + * This race becomes problematic when we defer extent removal + * operations beyond the end of a syscall (i.e. use async background + * processing algorithms). Users think the extents are no longer + * shared, but iomap_swapfile_iter() still sees them as shared + * because the refcountbt entries for the extents being removed have + * not yet been updated. Hence the swapon call fails unexpectedly. + * + * The race condition is currently most obvious from the unlink() + * operation as extent removal is deferred until after the last + * reference to the inode goes away. We then process the extent + * removal asynchronously, hence triggers the "syscall completed but + * work not done" condition mentioned above. To close this race + * window, we need to flush any pending inodegc operations to ensure + * they have updated the refcountbt records before we try to map the + * swapfile. + */ + xfs_inodegc_flush(ip->i_mount); + + /* + * Direct the swap code to the correct block device when this file + * sits on the RT device. + */ + sis->bdev = xfs_inode_buftarg(ip)->bt_bdev; + return iomap_swapfile_activate(sis, swap_file, span, &xfs_read_iomap_ops); } @@ -549,11 +582,11 @@ const struct address_space_operations xfs_address_space_operations = { .migrate_folio = filemap_migrate_folio, .is_partially_uptodate = iomap_is_partially_uptodate, .error_remove_folio = generic_error_remove_folio, - .swap_activate = xfs_iomap_swapfile_activate, + .swap_activate = xfs_vm_swap_activate, }; const struct address_space_operations xfs_dax_aops = { .writepages = xfs_dax_writepages, .dirty_folio = noop_dirty_folio, - .swap_activate = xfs_iomap_swapfile_activate, + .swap_activate = xfs_vm_swap_activate, }; |