From c642256b91770e201519d037a91f255a617a4602 Mon Sep 17 00:00:00 2001 From: Azeem Shaikh Date: Wed, 10 May 2023 22:11:19 +0000 Subject: vfs: Replace all non-returning strlcpy with strscpy strlcpy() reads the entire source buffer first. This read may exceed the destination size limit. This is both inefficient and can lead to linear read overflows if a source string is not NUL-terminated [1]. In an effort to remove strlcpy() completely [2], replace strlcpy() here with strscpy(). No return values were used, so direct replacement is safe. [1] https://www.kernel.org/doc/html/latest/process/deprecated.html#strlcpy [2] https://github.com/KSPP/linux/issues/89 Signed-off-by: Azeem Shaikh Reviewed-by: Kees Cook Message-Id: <20230510221119.3508930-1-azeemshaikh38@gmail.com> Signed-off-by: Christian Brauner --- fs/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/super.c') diff --git a/fs/super.c b/fs/super.c index 34afe411cf2b..8d8d68799b34 100644 --- a/fs/super.c +++ b/fs/super.c @@ -595,7 +595,7 @@ retry: fc->s_fs_info = NULL; s->s_type = fc->fs_type; s->s_iflags |= fc->s_iflags; - strlcpy(s->s_id, s->s_type->name, sizeof(s->s_id)); + strscpy(s->s_id, s->s_type->name, sizeof(s->s_id)); list_add_tail(&s->s_list, &super_blocks); hlist_add_head(&s->s_instances, &s->s_type->fs_supers); spin_unlock(&sb_lock); @@ -674,7 +674,7 @@ retry: return ERR_PTR(err); } s->s_type = type; - strlcpy(s->s_id, type->name, sizeof(s->s_id)); + strscpy(s->s_id, type->name, sizeof(s->s_id)); list_add_tail(&s->s_list, &super_blocks); hlist_add_head(&s->s_instances, &type->fs_supers); spin_unlock(&sb_lock); -- cgit v1.2.3 From 576215cffdefc1f0ceebffd87abb390926e6b037 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 25 May 2023 16:17:10 +0200 Subject: fs: Drop wait_unfrozen wait queue wait_unfrozen waitqueue is used only in quota code to wait for filesystem to become unfrozen. In that place we can just use sb_start_write() - sb_end_write() pair to achieve the same. So just remove the waitqueue. Reviewed-by: Christian Brauner Message-Id: <20230525141710.7595-1-jack@suse.cz> Signed-off-by: Jan Kara --- fs/quota/quota.c | 5 +++-- fs/super.c | 4 ---- include/linux/fs.h | 1 - 3 files changed, 3 insertions(+), 7 deletions(-) (limited to 'fs/super.c') diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 052f143e2e0e..0e41fb84060f 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -895,8 +895,9 @@ retry: up_write(&sb->s_umount); else up_read(&sb->s_umount); - wait_event(sb->s_writers.wait_unfrozen, - sb->s_writers.frozen == SB_UNFROZEN); + /* Wait for sb to unfreeze */ + sb_start_write(sb); + sb_end_write(sb); put_super(sb); goto retry; } diff --git a/fs/super.c b/fs/super.c index 34afe411cf2b..6283cea67280 100644 --- a/fs/super.c +++ b/fs/super.c @@ -236,7 +236,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags, &type->s_writers_key[i])) goto fail; } - init_waitqueue_head(&s->s_writers.wait_unfrozen); s->s_bdi = &noop_backing_dev_info; s->s_flags = flags; if (s->s_user_ns != &init_user_ns) @@ -1706,7 +1705,6 @@ int freeze_super(struct super_block *sb) if (ret) { sb->s_writers.frozen = SB_UNFROZEN; sb_freeze_unlock(sb, SB_FREEZE_PAGEFAULT); - wake_up(&sb->s_writers.wait_unfrozen); deactivate_locked_super(sb); return ret; } @@ -1722,7 +1720,6 @@ int freeze_super(struct super_block *sb) "VFS:Filesystem freeze failed\n"); sb->s_writers.frozen = SB_UNFROZEN; sb_freeze_unlock(sb, SB_FREEZE_FS); - wake_up(&sb->s_writers.wait_unfrozen); deactivate_locked_super(sb); return ret; } @@ -1768,7 +1765,6 @@ static int thaw_super_locked(struct super_block *sb) sb->s_writers.frozen = SB_UNFROZEN; sb_freeze_unlock(sb, SB_FREEZE_FS); out: - wake_up(&sb->s_writers.wait_unfrozen); deactivate_locked_super(sb); return 0; } diff --git a/include/linux/fs.h b/include/linux/fs.h index 21a981680856..3b65a6194485 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1146,7 +1146,6 @@ enum { struct sb_writers { int frozen; /* Is sb frozen? */ - wait_queue_head_t wait_unfrozen; /* wait for thaw */ struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS]; }; -- cgit v1.2.3 From 0718afd47f70cf46877c39c25d06b786e1a3f36c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 1 Jun 2023 11:44:52 +0200 Subject: block: introduce holder ops Add a new blk_holder_ops structure, which is passed to blkdev_get_by_* and installed in the block_device for exclusive claims. It will be used to allow the block layer to call back into the user of the block device for thing like notification of a removed device or a device resize. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Acked-by: Dave Chinner Reviewed-by: Dave Chinner Link: https://lore.kernel.org/r/20230601094459.1350643-10-hch@lst.de Signed-off-by: Jens Axboe --- block/bdev.c | 41 ++++++++++++++++++++++++++----------- block/fops.c | 2 +- block/genhd.c | 6 ++++-- block/ioctl.c | 3 ++- drivers/block/drbd/drbd_nl.c | 3 ++- drivers/block/loop.c | 2 +- drivers/block/pktcdvd.c | 5 +++-- drivers/block/rnbd/rnbd-srv.c | 2 +- drivers/block/xen-blkback/xenbus.c | 2 +- drivers/block/zram/zram_drv.c | 2 +- drivers/md/bcache/super.c | 2 +- drivers/md/dm.c | 2 +- drivers/md/md.c | 2 +- drivers/mtd/devices/block2mtd.c | 4 ++-- drivers/nvme/target/io-cmd-bdev.c | 2 +- drivers/s390/block/dasd_genhd.c | 2 +- drivers/target/target_core_iblock.c | 2 +- drivers/target/target_core_pscsi.c | 3 ++- fs/btrfs/dev-replace.c | 2 +- fs/btrfs/volumes.c | 6 +++--- fs/erofs/super.c | 2 +- fs/ext4/super.c | 3 ++- fs/f2fs/super.c | 4 ++-- fs/jfs/jfs_logmgr.c | 2 +- fs/nfs/blocklayout/dev.c | 5 +++-- fs/nilfs2/super.c | 2 +- fs/ocfs2/cluster/heartbeat.c | 2 +- fs/reiserfs/journal.c | 5 +++-- fs/super.c | 4 ++-- fs/xfs/xfs_super.c | 2 +- include/linux/blk_types.h | 2 ++ include/linux/blkdev.h | 11 +++++++--- kernel/power/swap.c | 4 ++-- mm/swapfile.c | 3 ++- 34 files changed, 90 insertions(+), 56 deletions(-) (limited to 'fs/super.c') diff --git a/block/bdev.c b/block/bdev.c index f5ffcac762e0..5c46ff107706 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -102,7 +102,7 @@ int truncate_bdev_range(struct block_device *bdev, fmode_t mode, * under live filesystem. */ if (!(mode & FMODE_EXCL)) { - int err = bd_prepare_to_claim(bdev, truncate_bdev_range); + int err = bd_prepare_to_claim(bdev, truncate_bdev_range, NULL); if (err) goto invalidate; } @@ -415,6 +415,7 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno) bdev = I_BDEV(inode); mutex_init(&bdev->bd_fsfreeze_mutex); spin_lock_init(&bdev->bd_size_lock); + mutex_init(&bdev->bd_holder_lock); bdev->bd_partno = partno; bdev->bd_inode = inode; bdev->bd_queue = disk->queue; @@ -464,13 +465,15 @@ long nr_blockdev_pages(void) * bd_may_claim - test whether a block device can be claimed * @bdev: block device of interest * @holder: holder trying to claim @bdev + * @hops: holder ops * * Test whether @bdev can be claimed by @holder. * * RETURNS: * %true if @bdev can be claimed, %false otherwise. */ -static bool bd_may_claim(struct block_device *bdev, void *holder) +static bool bd_may_claim(struct block_device *bdev, void *holder, + const struct blk_holder_ops *hops) { struct block_device *whole = bdev_whole(bdev); @@ -480,8 +483,11 @@ static bool bd_may_claim(struct block_device *bdev, void *holder) /* * The same holder can always re-claim. */ - if (bdev->bd_holder == holder) + if (bdev->bd_holder == holder) { + if (WARN_ON_ONCE(bdev->bd_holder_ops != hops)) + return false; return true; + } return false; } @@ -499,6 +505,7 @@ static bool bd_may_claim(struct block_device *bdev, void *holder) * bd_prepare_to_claim - claim a block device * @bdev: block device of interest * @holder: holder trying to claim @bdev + * @hops: holder ops. * * Claim @bdev. This function fails if @bdev is already claimed by another * holder and waits if another claiming is in progress. return, the caller @@ -507,7 +514,8 @@ static bool bd_may_claim(struct block_device *bdev, void *holder) * RETURNS: * 0 if @bdev can be claimed, -EBUSY otherwise. */ -int bd_prepare_to_claim(struct block_device *bdev, void *holder) +int bd_prepare_to_claim(struct block_device *bdev, void *holder, + const struct blk_holder_ops *hops) { struct block_device *whole = bdev_whole(bdev); @@ -516,7 +524,7 @@ int bd_prepare_to_claim(struct block_device *bdev, void *holder) retry: mutex_lock(&bdev_lock); /* if someone else claimed, fail */ - if (!bd_may_claim(bdev, holder)) { + if (!bd_may_claim(bdev, holder, hops)) { mutex_unlock(&bdev_lock); return -EBUSY; } @@ -557,12 +565,13 @@ static void bd_clear_claiming(struct block_device *whole, void *holder) * Finish exclusive open of a block device. Mark the device as exlusively * open by the holder and wake up all waiters for exclusive open to finish. */ -static void bd_finish_claiming(struct block_device *bdev, void *holder) +static void bd_finish_claiming(struct block_device *bdev, void *holder, + const struct blk_holder_ops *hops) { struct block_device *whole = bdev_whole(bdev); mutex_lock(&bdev_lock); - BUG_ON(!bd_may_claim(bdev, holder)); + BUG_ON(!bd_may_claim(bdev, holder, hops)); /* * Note that for a whole device bd_holders will be incremented twice, * and bd_holder will be set to bd_may_claim before being set to holder @@ -570,7 +579,10 @@ static void bd_finish_claiming(struct block_device *bdev, void *holder) whole->bd_holders++; whole->bd_holder = bd_may_claim; bdev->bd_holders++; + mutex_lock(&bdev->bd_holder_lock); bdev->bd_holder = holder; + bdev->bd_holder_ops = hops; + mutex_unlock(&bdev->bd_holder_lock); bd_clear_claiming(whole, holder); mutex_unlock(&bdev_lock); } @@ -605,7 +617,10 @@ static void bd_end_claim(struct block_device *bdev) WARN_ON_ONCE(--bdev->bd_holders < 0); WARN_ON_ONCE(--whole->bd_holders < 0); if (!bdev->bd_holders) { + mutex_lock(&bdev->bd_holder_lock); bdev->bd_holder = NULL; + bdev->bd_holder_ops = NULL; + mutex_unlock(&bdev->bd_holder_lock); if (bdev->bd_write_holder) unblock = true; } @@ -735,6 +750,7 @@ void blkdev_put_no_open(struct block_device *bdev) * @dev: device number of block device to open * @mode: FMODE_* mask * @holder: exclusive holder identifier + * @hops: holder operations * * Open the block device described by device number @dev. If @mode includes * %FMODE_EXCL, the block device is opened with exclusive access. Specifying @@ -751,7 +767,8 @@ void blkdev_put_no_open(struct block_device *bdev) * RETURNS: * Reference to the block_device on success, ERR_PTR(-errno) on failure. */ -struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder) +struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder, + const struct blk_holder_ops *hops) { bool unblock_events = true; struct block_device *bdev; @@ -771,7 +788,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder) disk = bdev->bd_disk; if (mode & FMODE_EXCL) { - ret = bd_prepare_to_claim(bdev, holder); + ret = bd_prepare_to_claim(bdev, holder, hops); if (ret) goto put_blkdev; } @@ -791,7 +808,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder) if (ret) goto put_module; if (mode & FMODE_EXCL) { - bd_finish_claiming(bdev, holder); + bd_finish_claiming(bdev, holder, hops); /* * Block event polling for write claims if requested. Any write @@ -842,7 +859,7 @@ EXPORT_SYMBOL(blkdev_get_by_dev); * Reference to the block_device on success, ERR_PTR(-errno) on failure. */ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, - void *holder) + void *holder, const struct blk_holder_ops *hops) { struct block_device *bdev; dev_t dev; @@ -852,7 +869,7 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, if (error) return ERR_PTR(error); - bdev = blkdev_get_by_dev(dev, mode, holder); + bdev = blkdev_get_by_dev(dev, mode, holder, hops); if (!IS_ERR(bdev) && (mode & FMODE_WRITE) && bdev_read_only(bdev)) { blkdev_put(bdev, mode); return ERR_PTR(-EACCES); diff --git a/block/fops.c b/block/fops.c index b12c4b2a3a69..6a3087b750a6 100644 --- a/block/fops.c +++ b/block/fops.c @@ -490,7 +490,7 @@ static int blkdev_open(struct inode *inode, struct file *filp) if ((filp->f_flags & O_ACCMODE) == 3) filp->f_mode |= FMODE_WRITE_IOCTL; - bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode, filp); + bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode, filp, NULL); if (IS_ERR(bdev)) return PTR_ERR(bdev); diff --git a/block/genhd.c b/block/genhd.c index a668d2f02087..b3bd58e9fbea 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -370,13 +370,15 @@ int disk_scan_partitions(struct gendisk *disk, fmode_t mode) * scanners. */ if (!(mode & FMODE_EXCL)) { - ret = bd_prepare_to_claim(disk->part0, disk_scan_partitions); + ret = bd_prepare_to_claim(disk->part0, disk_scan_partitions, + NULL); if (ret) return ret; } set_bit(GD_NEED_PART_SCAN, &disk->state); - bdev = blkdev_get_by_dev(disk_devt(disk), mode & ~FMODE_EXCL, NULL); + bdev = blkdev_get_by_dev(disk_devt(disk), mode & ~FMODE_EXCL, NULL, + NULL); if (IS_ERR(bdev)) ret = PTR_ERR(bdev); else diff --git a/block/ioctl.c b/block/ioctl.c index 9c5f637ff153..c7d7d4345edb 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -454,7 +454,8 @@ static int blkdev_bszset(struct block_device *bdev, fmode_t mode, if (mode & FMODE_EXCL) return set_blocksize(bdev, n); - if (IS_ERR(blkdev_get_by_dev(bdev->bd_dev, mode | FMODE_EXCL, &bdev))) + if (IS_ERR(blkdev_get_by_dev(bdev->bd_dev, mode | FMODE_EXCL, &bdev, + NULL))) return -EBUSY; ret = set_blocksize(bdev, n); blkdev_put(bdev, mode | FMODE_EXCL); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 1a5d3d72d91d..cab59dab3410 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1641,7 +1641,8 @@ static struct block_device *open_backing_dev(struct drbd_device *device, int err = 0; bdev = blkdev_get_by_path(bdev_path, - FMODE_READ | FMODE_WRITE | FMODE_EXCL, claim_ptr); + FMODE_READ | FMODE_WRITE | FMODE_EXCL, + claim_ptr, NULL); if (IS_ERR(bdev)) { drbd_err(device, "open(\"%s\") failed with %ld\n", bdev_path, PTR_ERR(bdev)); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index bc31bb7072a2..a73c857f5bfe 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1015,7 +1015,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, * here to avoid changing device under exclusive owner. */ if (!(mode & FMODE_EXCL)) { - error = bd_prepare_to_claim(bdev, loop_configure); + error = bd_prepare_to_claim(bdev, loop_configure, NULL); if (error) goto out_putf; } diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index d5d7884cedd4..377f8b345352 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2125,7 +2125,8 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write) * to read/write from/to it. It is already opened in O_NONBLOCK mode * so open should not fail. */ - bdev = blkdev_get_by_dev(pd->bdev->bd_dev, FMODE_READ | FMODE_EXCL, pd); + bdev = blkdev_get_by_dev(pd->bdev->bd_dev, FMODE_READ | FMODE_EXCL, pd, + NULL); if (IS_ERR(bdev)) { ret = PTR_ERR(bdev); goto out; @@ -2530,7 +2531,7 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) } } - bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_NDELAY, NULL); + bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_NDELAY, NULL, NULL); if (IS_ERR(bdev)) return PTR_ERR(bdev); sdev = scsi_device_from_queue(bdev->bd_disk->queue); diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c index 2cfed2e58d64..cec22bbae2f9 100644 --- a/drivers/block/rnbd/rnbd-srv.c +++ b/drivers/block/rnbd/rnbd-srv.c @@ -719,7 +719,7 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess, goto reject; } - bdev = blkdev_get_by_path(full_path, open_flags, THIS_MODULE); + bdev = blkdev_get_by_path(full_path, open_flags, THIS_MODULE, NULL); if (IS_ERR(bdev)) { ret = PTR_ERR(bdev); pr_err("Opening device '%s' on session %s failed, failed to open the block device, err: %d\n", diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 4807af1d5805..43b36da9b354 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -492,7 +492,7 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, vbd->pdevice = MKDEV(major, minor); bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ? - FMODE_READ : FMODE_WRITE, NULL); + FMODE_READ : FMODE_WRITE, NULL, NULL); if (IS_ERR(bdev)) { pr_warn("xen_vbd_create: device %08x could not be opened\n", diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index b86691d2133e..0bc779446c6f 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -508,7 +508,7 @@ static ssize_t backing_dev_store(struct device *dev, } bdev = blkdev_get_by_dev(inode->i_rdev, - FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram); + FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram, NULL); if (IS_ERR(bdev)) { err = PTR_ERR(bdev); bdev = NULL; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 7e9d19fd21dd..d84c09a73af8 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -2560,7 +2560,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, err = "failed to open device"; bdev = blkdev_get_by_path(strim(path), FMODE_READ|FMODE_WRITE|FMODE_EXCL, - sb); + sb, NULL); if (IS_ERR(bdev)) { if (bdev == ERR_PTR(-EBUSY)) { dev_t dev; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 3b694ba3a106..d759f8bdb3df 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -746,7 +746,7 @@ static struct table_device *open_table_device(struct mapped_device *md, return ERR_PTR(-ENOMEM); refcount_set(&td->count, 1); - bdev = blkdev_get_by_dev(dev, mode | FMODE_EXCL, _dm_claim_ptr); + bdev = blkdev_get_by_dev(dev, mode | FMODE_EXCL, _dm_claim_ptr, NULL); if (IS_ERR(bdev)) { r = PTR_ERR(bdev); goto out_free_td; diff --git a/drivers/md/md.c b/drivers/md/md.c index 6a559a7e89c0..fabf9c543735 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3642,7 +3642,7 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe rdev->bdev = blkdev_get_by_dev(newdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, - super_format == -2 ? &claim_rdev : rdev); + super_format == -2 ? &claim_rdev : rdev, NULL); if (IS_ERR(rdev->bdev)) { pr_warn("md: could not open device unknown-block(%u,%u).\n", MAJOR(newdev), MINOR(newdev)); diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c index 4cd37ec45762..7ac82c6fe350 100644 --- a/drivers/mtd/devices/block2mtd.c +++ b/drivers/mtd/devices/block2mtd.c @@ -235,7 +235,7 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size, return NULL; /* Get a handle on the device */ - bdev = blkdev_get_by_path(devname, mode, dev); + bdev = blkdev_get_by_path(devname, mode, dev, NULL); #ifndef MODULE /* @@ -257,7 +257,7 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size, devt = name_to_dev_t(devname); if (!devt) continue; - bdev = blkdev_get_by_dev(devt, mode, dev); + bdev = blkdev_get_by_dev(devt, mode, dev, NULL); } #endif diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index c2d6cea0236b..9b6d6d85c725 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -85,7 +85,7 @@ int nvmet_bdev_ns_enable(struct nvmet_ns *ns) return -ENOTBLK; ns->bdev = blkdev_get_by_path(ns->device_path, - FMODE_READ | FMODE_WRITE, NULL); + FMODE_READ | FMODE_WRITE, NULL, NULL); if (IS_ERR(ns->bdev)) { ret = PTR_ERR(ns->bdev); if (ret != -ENOTBLK) { diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index 998a961e1704..f21198bc483e 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -130,7 +130,7 @@ int dasd_scan_partitions(struct dasd_block *block) struct block_device *bdev; int rc; - bdev = blkdev_get_by_dev(disk_devt(block->gdp), FMODE_READ, NULL); + bdev = blkdev_get_by_dev(disk_devt(block->gdp), FMODE_READ, NULL, NULL); if (IS_ERR(bdev)) { DBF_DEV_EVENT(DBF_ERR, block->base, "scan partitions error, blkdev_get returned %ld", diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index cc838ffd1294..a5cbbefa78ee 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -114,7 +114,7 @@ static int iblock_configure_device(struct se_device *dev) else dev->dev_flags |= DF_READ_ONLY; - bd = blkdev_get_by_path(ib_dev->ibd_udev_path, mode, ib_dev); + bd = blkdev_get_by_path(ib_dev->ibd_udev_path, mode, ib_dev, NULL); if (IS_ERR(bd)) { ret = PTR_ERR(bd); goto out_free_bioset; diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index e7425549e39c..e3494e036c6c 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -367,7 +367,8 @@ static int pscsi_create_type_disk(struct se_device *dev, struct scsi_device *sd) * for TYPE_DISK and TYPE_ZBC using supplied udev_path */ bd = blkdev_get_by_path(dev->udev_path, - FMODE_WRITE|FMODE_READ|FMODE_EXCL, pdv); + FMODE_WRITE|FMODE_READ|FMODE_EXCL, pdv, + NULL); if (IS_ERR(bd)) { pr_err("pSCSI: blkdev_get_by_path() failed\n"); scsi_device_put(sd); diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 78696d331639..4de4984fa99b 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -258,7 +258,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, } bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL, - fs_info->bdev_holder); + fs_info->bdev_holder, NULL); if (IS_ERR(bdev)) { btrfs_err(fs_info, "target device %s is invalid!", device_path); return PTR_ERR(bdev); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 841e799dece5..784ccc8f6c69 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -496,7 +496,7 @@ btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder, { int ret; - *bdev = blkdev_get_by_path(device_path, flags, holder); + *bdev = blkdev_get_by_path(device_path, flags, holder, NULL); if (IS_ERR(*bdev)) { ret = PTR_ERR(*bdev); @@ -1377,7 +1377,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags, * values temporarily, as the device paths of the fsid are the only * required information for assembling the volume. */ - bdev = blkdev_get_by_path(path, flags, holder); + bdev = blkdev_get_by_path(path, flags, holder, NULL); if (IS_ERR(bdev)) return ERR_CAST(bdev); @@ -2629,7 +2629,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path return -EROFS; bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL, - fs_info->bdev_holder); + fs_info->bdev_holder, NULL); if (IS_ERR(bdev)) return PTR_ERR(bdev); diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 811ab66d805e..6c263e9cd38b 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -254,7 +254,7 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb, dif->fscache = fscache; } else if (!sbi->devs->flatdev) { bdev = blkdev_get_by_path(dif->path, FMODE_READ | FMODE_EXCL, - sb->s_type); + sb->s_type, NULL); if (IS_ERR(bdev)) return PTR_ERR(bdev); dif->bdev = bdev; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9680fe753e59..865625089ecc 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1103,7 +1103,8 @@ static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb) { struct block_device *bdev; - bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb); + bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb, + NULL); if (IS_ERR(bdev)) goto fail; return bdev; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 9f15b03037db..7c34ab082f13 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -4025,7 +4025,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) /* Single zoned block device mount */ FDEV(0).bdev = blkdev_get_by_dev(sbi->sb->s_bdev->bd_dev, - sbi->sb->s_mode, sbi->sb->s_type); + sbi->sb->s_mode, sbi->sb->s_type, NULL); } else { /* Multi-device mount */ memcpy(FDEV(i).path, RDEV(i).path, MAX_PATH_LEN); @@ -4044,7 +4044,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) sbi->log_blocks_per_seg) - 1; } FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path, - sbi->sb->s_mode, sbi->sb->s_type); + sbi->sb->s_mode, sbi->sb->s_type, NULL); } if (IS_ERR(FDEV(i).bdev)) return PTR_ERR(FDEV(i).bdev); diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index 15c645827dec..46d393c8088a 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -1101,7 +1101,7 @@ int lmLogOpen(struct super_block *sb) */ bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, - log); + log, NULL); if (IS_ERR(bdev)) { rc = PTR_ERR(bdev); goto free; diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c index fea5f8821da5..38b066ca699e 100644 --- a/fs/nfs/blocklayout/dev.c +++ b/fs/nfs/blocklayout/dev.c @@ -243,7 +243,7 @@ bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d, if (!dev) return -EIO; - bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_WRITE, NULL); + bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_WRITE, NULL, NULL); if (IS_ERR(bdev)) { printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n", MAJOR(dev), MINOR(dev), PTR_ERR(bdev)); @@ -312,7 +312,8 @@ bl_open_path(struct pnfs_block_volume *v, const char *prefix) if (!devname) return ERR_PTR(-ENOMEM); - bdev = blkdev_get_by_path(devname, FMODE_READ | FMODE_WRITE, NULL); + bdev = blkdev_get_by_path(devname, FMODE_READ | FMODE_WRITE, NULL, + NULL); if (IS_ERR(bdev)) { pr_warn("pNFS: failed to open device %s (%ld)\n", devname, PTR_ERR(bdev)); diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 77f1e5778d1c..91bfbd973d1d 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1285,7 +1285,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags, if (!(flags & SB_RDONLY)) mode |= FMODE_WRITE; - sd.bdev = blkdev_get_by_path(dev_name, mode, fs_type); + sd.bdev = blkdev_get_by_path(dev_name, mode, fs_type, NULL); if (IS_ERR(sd.bdev)) return ERR_CAST(sd.bdev); diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 60b97c92e2b2..6b13b8c3f2b8 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1786,7 +1786,7 @@ static ssize_t o2hb_region_dev_store(struct config_item *item, goto out2; reg->hr_bdev = blkdev_get_by_dev(f.file->f_mapping->host->i_rdev, - FMODE_WRITE | FMODE_READ, NULL); + FMODE_WRITE | FMODE_READ, NULL, NULL); if (IS_ERR(reg->hr_bdev)) { ret = PTR_ERR(reg->hr_bdev); reg->hr_bdev = NULL; diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 4d11d60f493c..5e4db9a0c8e5 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2616,7 +2616,7 @@ static int journal_init_dev(struct super_block *super, if (jdev == super->s_dev) blkdev_mode &= ~FMODE_EXCL; journal->j_dev_bd = blkdev_get_by_dev(jdev, blkdev_mode, - journal); + journal, NULL); journal->j_dev_mode = blkdev_mode; if (IS_ERR(journal->j_dev_bd)) { result = PTR_ERR(journal->j_dev_bd); @@ -2632,7 +2632,8 @@ static int journal_init_dev(struct super_block *super, } journal->j_dev_mode = blkdev_mode; - journal->j_dev_bd = blkdev_get_by_path(jdev_name, blkdev_mode, journal); + journal->j_dev_bd = blkdev_get_by_path(jdev_name, blkdev_mode, journal, + NULL); if (IS_ERR(journal->j_dev_bd)) { result = PTR_ERR(journal->j_dev_bd); journal->j_dev_bd = NULL; diff --git a/fs/super.c b/fs/super.c index 34afe411cf2b..012ce1400803 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1248,7 +1248,7 @@ int get_tree_bdev(struct fs_context *fc, if (!fc->source) return invalf(fc, "No source specified"); - bdev = blkdev_get_by_path(fc->source, mode, fc->fs_type); + bdev = blkdev_get_by_path(fc->source, mode, fc->fs_type, NULL); if (IS_ERR(bdev)) { errorf(fc, "%s: Can't open blockdev", fc->source); return PTR_ERR(bdev); @@ -1333,7 +1333,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type, if (!(flags & SB_RDONLY)) mode |= FMODE_WRITE; - bdev = blkdev_get_by_path(dev_name, mode, fs_type); + bdev = blkdev_get_by_path(dev_name, mode, fs_type, NULL); if (IS_ERR(bdev)) return ERR_CAST(bdev); diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 7e706255f165..5684c538eb76 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -386,7 +386,7 @@ xfs_blkdev_get( int error = 0; *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL, - mp); + mp, NULL); if (IS_ERR(*bdevp)) { error = PTR_ERR(*bdevp); xfs_warn(mp, "Invalid device [%s], error=%d", name, error); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 8ef209e3aa96..deb69eeab6bd 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -55,6 +55,8 @@ struct block_device { struct super_block * bd_super; void * bd_claiming; void * bd_holder; + const struct blk_holder_ops *bd_holder_ops; + struct mutex bd_holder_lock; /* The counter of freeze processes */ int bd_fsfreeze_count; int bd_holders; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d89c2da14698..44f2a8bc57e8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1470,10 +1470,15 @@ void blkdev_show(struct seq_file *seqf, off_t offset); #define BLKDEV_MAJOR_MAX 0 #endif +struct blk_holder_ops { +}; + +struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder, + const struct blk_holder_ops *hops); struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, - void *holder); -struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder); -int bd_prepare_to_claim(struct block_device *bdev, void *holder); + void *holder, const struct blk_holder_ops *hops); +int bd_prepare_to_claim(struct block_device *bdev, void *holder, + const struct blk_holder_ops *hops); void bd_abort_claiming(struct block_device *bdev, void *holder); void blkdev_put(struct block_device *bdev, fmode_t mode); diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 92e41ed292ad..801c411530d1 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -357,7 +357,7 @@ static int swsusp_swap_check(void) root_swap = res; hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device, FMODE_WRITE, - NULL); + NULL, NULL); if (IS_ERR(hib_resume_bdev)) return PTR_ERR(hib_resume_bdev); @@ -1524,7 +1524,7 @@ int swsusp_check(void) mode |= FMODE_EXCL; hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device, - mode, &holder); + mode, &holder, NULL); if (!IS_ERR(hib_resume_bdev)) { set_blocksize(hib_resume_bdev, PAGE_SIZE); clear_page(swsusp_header); diff --git a/mm/swapfile.c b/mm/swapfile.c index 274bbf797480..cfbcf7d5705f 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2770,7 +2770,8 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode) if (S_ISBLK(inode->i_mode)) { p->bdev = blkdev_get_by_dev(inode->i_rdev, - FMODE_READ | FMODE_WRITE | FMODE_EXCL, p); + FMODE_READ | FMODE_WRITE | FMODE_EXCL, p, + NULL); if (IS_ERR(p->bdev)) { error = PTR_ERR(p->bdev); p->bdev = NULL; -- cgit v1.2.3 From 87efb39075be6a288cd7f23858f15bd01c83028a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 1 Jun 2023 11:44:54 +0200 Subject: fs: add a method to shut down the file system Add a new ->shutdown super operation that can be used to tell the file system to shut down, and call it from newly created holder ops when the block device under a file system shuts down. This only covers the main block device for "simple" file systems using get_tree_bdev / mount_bdev. File systems their own get_tree method or opening additional devices will need to set up their own blk_holder_ops. Signed-off-by: Christoph Hellwig Reviewed-by: Christian Brauner Reviewed-by: Jan Kara Reviewed-by: Darrick J. Wong Acked-by: Dave Chinner Reviewed-by: Dave Chinner Link: https://lore.kernel.org/r/20230601094459.1350643-12-hch@lst.de Signed-off-by: Jens Axboe --- fs/super.c | 21 +++++++++++++++++++-- include/linux/fs.h | 1 + 2 files changed, 20 insertions(+), 2 deletions(-) (limited to 'fs/super.c') diff --git a/fs/super.c b/fs/super.c index 012ce1400803..f127589700ab 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1206,6 +1206,22 @@ int get_tree_keyed(struct fs_context *fc, EXPORT_SYMBOL(get_tree_keyed); #ifdef CONFIG_BLOCK +static void fs_mark_dead(struct block_device *bdev) +{ + struct super_block *sb; + + sb = get_super(bdev); + if (!sb) + return; + + if (sb->s_op->shutdown) + sb->s_op->shutdown(sb); + drop_super(sb); +} + +static const struct blk_holder_ops fs_holder_ops = { + .mark_dead = fs_mark_dead, +}; static int set_bdev_super(struct super_block *s, void *data) { @@ -1248,7 +1264,8 @@ int get_tree_bdev(struct fs_context *fc, if (!fc->source) return invalf(fc, "No source specified"); - bdev = blkdev_get_by_path(fc->source, mode, fc->fs_type, NULL); + bdev = blkdev_get_by_path(fc->source, mode, fc->fs_type, + &fs_holder_ops); if (IS_ERR(bdev)) { errorf(fc, "%s: Can't open blockdev", fc->source); return PTR_ERR(bdev); @@ -1333,7 +1350,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type, if (!(flags & SB_RDONLY)) mode |= FMODE_WRITE; - bdev = blkdev_get_by_path(dev_name, mode, fs_type, NULL); + bdev = blkdev_get_by_path(dev_name, mode, fs_type, &fs_holder_ops); if (IS_ERR(bdev)) return ERR_CAST(bdev); diff --git a/include/linux/fs.h b/include/linux/fs.h index 08ba2ae1d3ce..7b2053649820 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1932,6 +1932,7 @@ struct super_operations { struct shrink_control *); long (*free_cached_objects)(struct super_block *, struct shrink_control *); + void (*shutdown)(struct super_block *sb); }; /* -- cgit v1.2.3 From 2736e8eeb0ccdc71d1f4256c9c9a28f58cc43307 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 8 Jun 2023 13:02:43 +0200 Subject: block: use the holder as indication for exclusive opens The current interface for exclusive opens is rather confusing as it requires both the FMODE_EXCL flag and a holder. Remove the need to pass FMODE_EXCL and just key off the exclusive open off a non-NULL holder. For blkdev_put this requires adding the holder argument, which provides better debug checking that only the holder actually releases the hold, but at the same time allows removing the now superfluous mode argument. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Acked-by: Christian Brauner Acked-by: David Sterba [btrfs] Acked-by: Jack Wang [rnbd] Link: https://lore.kernel.org/r/20230608110258.189493-16-hch@lst.de Signed-off-by: Jens Axboe --- block/bdev.c | 37 ++++++++++++++++++++---------------- block/fops.c | 6 ++++-- block/genhd.c | 5 ++--- block/ioctl.c | 5 ++--- drivers/block/drbd/drbd_nl.c | 23 +++++++++++++--------- drivers/block/pktcdvd.c | 13 ++++++------- drivers/block/rnbd/rnbd-srv.c | 4 ++-- drivers/block/xen-blkback/xenbus.c | 2 +- drivers/block/zram/zram_drv.c | 8 ++++---- drivers/md/bcache/super.c | 15 +++++++-------- drivers/md/dm.c | 6 +++--- drivers/md/md.c | 38 +++++++++++++++++++------------------ drivers/mtd/devices/block2mtd.c | 4 ++-- drivers/nvme/target/io-cmd-bdev.c | 2 +- drivers/s390/block/dasd_genhd.c | 2 +- drivers/target/target_core_iblock.c | 6 +++--- drivers/target/target_core_pscsi.c | 8 +++----- fs/btrfs/dev-replace.c | 6 +++--- fs/btrfs/ioctl.c | 12 ++++++------ fs/btrfs/volumes.c | 28 +++++++++++++-------------- fs/btrfs/volumes.h | 6 +++--- fs/erofs/super.c | 7 ++++--- fs/ext4/super.c | 11 +++-------- fs/f2fs/super.c | 2 +- fs/jfs/jfs_logmgr.c | 6 +++--- fs/nfs/blocklayout/dev.c | 4 ++-- fs/nilfs2/super.c | 6 +++--- fs/ocfs2/cluster/heartbeat.c | 4 ++-- fs/reiserfs/journal.c | 19 +++++++++---------- fs/reiserfs/reiserfs.h | 1 - fs/super.c | 20 +++++++++---------- fs/xfs/xfs_super.c | 15 ++++++++------- include/linux/blkdev.h | 2 +- kernel/power/hibernate.c | 12 ++++-------- kernel/power/power.h | 2 +- kernel/power/swap.c | 21 +++++++++----------- mm/swapfile.c | 7 +++---- 37 files changed, 183 insertions(+), 192 deletions(-) (limited to 'fs/super.c') diff --git a/block/bdev.c b/block/bdev.c index 2c6888ceb378..db63e5bcc46f 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -604,7 +604,7 @@ void bd_abort_claiming(struct block_device *bdev, void *holder) } EXPORT_SYMBOL(bd_abort_claiming); -static void bd_end_claim(struct block_device *bdev) +static void bd_end_claim(struct block_device *bdev, void *holder) { struct block_device *whole = bdev_whole(bdev); bool unblock = false; @@ -614,6 +614,7 @@ static void bd_end_claim(struct block_device *bdev) * bdev_lock. open_mutex is used to synchronize disk_holder unlinking. */ mutex_lock(&bdev_lock); + WARN_ON_ONCE(bdev->bd_holder != holder); WARN_ON_ONCE(--bdev->bd_holders < 0); WARN_ON_ONCE(--whole->bd_holders < 0); if (!bdev->bd_holders) { @@ -750,10 +751,9 @@ void blkdev_put_no_open(struct block_device *bdev) * @holder: exclusive holder identifier * @hops: holder operations * - * Open the block device described by device number @dev. If @mode includes - * %FMODE_EXCL, the block device is opened with exclusive access. Specifying - * %FMODE_EXCL with a %NULL @holder is invalid. Exclusive opens may nest for - * the same @holder. + * Open the block device described by device number @dev. If @holder is not + * %NULL, the block device is opened with exclusive access. Exclusive opens may + * nest for the same @holder. * * Use this interface ONLY if you really do not have anything better - i.e. when * you are behind a truly sucky interface and all you are given is a device @@ -785,10 +785,16 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder, return ERR_PTR(-ENXIO); disk = bdev->bd_disk; - if (mode & FMODE_EXCL) { + if (holder) { + mode |= FMODE_EXCL; ret = bd_prepare_to_claim(bdev, holder, hops); if (ret) goto put_blkdev; + } else { + if (WARN_ON_ONCE(mode & FMODE_EXCL)) { + ret = -EIO; + goto put_blkdev; + } } disk_block_events(disk); @@ -805,7 +811,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder, ret = blkdev_get_whole(bdev, mode); if (ret) goto put_module; - if (mode & FMODE_EXCL) { + if (holder) { bd_finish_claiming(bdev, holder, hops); /* @@ -829,7 +835,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder, put_module: module_put(disk->fops->owner); abort_claiming: - if (mode & FMODE_EXCL) + if (holder) bd_abort_claiming(bdev, holder); mutex_unlock(&disk->open_mutex); disk_unblock_events(disk); @@ -845,10 +851,9 @@ EXPORT_SYMBOL(blkdev_get_by_dev); * @mode: FMODE_* mask * @holder: exclusive holder identifier * - * Open the block device described by the device file at @path. If @mode - * includes %FMODE_EXCL, the block device is opened with exclusive access. - * Specifying %FMODE_EXCL with a %NULL @holder is invalid. Exclusive opens may - * nest for the same @holder. + * Open the block device described by the device file at @path. If @holder is + * not %NULL, the block device is opened with exclusive access. Exclusive opens + * may nest for the same @holder. * * CONTEXT: * Might sleep. @@ -869,7 +874,7 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, bdev = blkdev_get_by_dev(dev, mode, holder, hops); if (!IS_ERR(bdev) && (mode & FMODE_WRITE) && bdev_read_only(bdev)) { - blkdev_put(bdev, mode); + blkdev_put(bdev, holder); return ERR_PTR(-EACCES); } @@ -877,7 +882,7 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, } EXPORT_SYMBOL(blkdev_get_by_path); -void blkdev_put(struct block_device *bdev, fmode_t mode) +void blkdev_put(struct block_device *bdev, void *holder) { struct gendisk *disk = bdev->bd_disk; @@ -892,8 +897,8 @@ void blkdev_put(struct block_device *bdev, fmode_t mode) sync_blockdev(bdev); mutex_lock(&disk->open_mutex); - if (mode & FMODE_EXCL) - bd_end_claim(bdev); + if (holder) + bd_end_claim(bdev, holder); /* * Trigger event checking and tell drivers to flush MEDIA_CHANGE diff --git a/block/fops.c b/block/fops.c index 26af2b39c758..9f26e25bafa1 100644 --- a/block/fops.c +++ b/block/fops.c @@ -490,7 +490,9 @@ static int blkdev_open(struct inode *inode, struct file *filp) if ((filp->f_flags & O_ACCMODE) == 3) filp->f_mode |= FMODE_WRITE_IOCTL; - bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode, filp, NULL); + bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode, + (filp->f_mode & FMODE_EXCL) ? filp : NULL, + NULL); if (IS_ERR(bdev)) return PTR_ERR(bdev); @@ -504,7 +506,7 @@ static int blkdev_release(struct inode *inode, struct file *filp) { struct block_device *bdev = filp->private_data; - blkdev_put(bdev, filp->f_mode); + blkdev_put(bdev, (filp->f_mode & FMODE_EXCL) ? filp : NULL); return 0; } diff --git a/block/genhd.c b/block/genhd.c index 4e5fd6aaa883..b56f8b5c88b3 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -365,12 +365,11 @@ int disk_scan_partitions(struct gendisk *disk, fmode_t mode) } set_bit(GD_NEED_PART_SCAN, &disk->state); - bdev = blkdev_get_by_dev(disk_devt(disk), mode & ~FMODE_EXCL, NULL, - NULL); + bdev = blkdev_get_by_dev(disk_devt(disk), mode, NULL, NULL); if (IS_ERR(bdev)) ret = PTR_ERR(bdev); else - blkdev_put(bdev, mode & ~FMODE_EXCL); + blkdev_put(bdev, NULL); /* * If blkdev_get_by_dev() failed early, GD_NEED_PART_SCAN is still set, diff --git a/block/ioctl.c b/block/ioctl.c index c7d7d4345edb..b39bd5b41ee4 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -454,11 +454,10 @@ static int blkdev_bszset(struct block_device *bdev, fmode_t mode, if (mode & FMODE_EXCL) return set_blocksize(bdev, n); - if (IS_ERR(blkdev_get_by_dev(bdev->bd_dev, mode | FMODE_EXCL, &bdev, - NULL))) + if (IS_ERR(blkdev_get_by_dev(bdev->bd_dev, mode, &bdev, NULL))) return -EBUSY; ret = set_blocksize(bdev, n); - blkdev_put(bdev, mode | FMODE_EXCL); + blkdev_put(bdev, &bdev); return ret; } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index cab59dab3410..10b1e5171332 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1640,8 +1640,7 @@ static struct block_device *open_backing_dev(struct drbd_device *device, struct block_device *bdev; int err = 0; - bdev = blkdev_get_by_path(bdev_path, - FMODE_READ | FMODE_WRITE | FMODE_EXCL, + bdev = blkdev_get_by_path(bdev_path, FMODE_READ | FMODE_WRITE, claim_ptr, NULL); if (IS_ERR(bdev)) { drbd_err(device, "open(\"%s\") failed with %ld\n", @@ -1654,7 +1653,7 @@ static struct block_device *open_backing_dev(struct drbd_device *device, err = bd_link_disk_holder(bdev, device->vdisk); if (err) { - blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + blkdev_put(bdev, claim_ptr); drbd_err(device, "bd_link_disk_holder(\"%s\", ...) failed with %d\n", bdev_path, err); bdev = ERR_PTR(err); @@ -1696,13 +1695,13 @@ static int open_backing_devices(struct drbd_device *device, } static void close_backing_dev(struct drbd_device *device, struct block_device *bdev, - bool do_bd_unlink) + void *claim_ptr, bool do_bd_unlink) { if (!bdev) return; if (do_bd_unlink) bd_unlink_disk_holder(bdev, device->vdisk); - blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + blkdev_put(bdev, claim_ptr); } void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *ldev) @@ -1710,8 +1709,11 @@ void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev * if (ldev == NULL) return; - close_backing_dev(device, ldev->md_bdev, ldev->md_bdev != ldev->backing_bdev); - close_backing_dev(device, ldev->backing_bdev, true); + close_backing_dev(device, ldev->md_bdev, + ldev->md.meta_dev_idx < 0 ? + (void *)device : (void *)drbd_m_holder, + ldev->md_bdev != ldev->backing_bdev); + close_backing_dev(device, ldev->backing_bdev, device, true); kfree(ldev->disk_conf); kfree(ldev); @@ -2127,8 +2129,11 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) fail: conn_reconfig_done(connection); if (nbc) { - close_backing_dev(device, nbc->md_bdev, nbc->md_bdev != nbc->backing_bdev); - close_backing_dev(device, nbc->backing_bdev, true); + close_backing_dev(device, nbc->md_bdev, + nbc->disk_conf->meta_dev_idx < 0 ? + (void *)device : (void *)drbd_m_holder, + nbc->md_bdev != nbc->backing_bdev); + close_backing_dev(device, nbc->backing_bdev, device, true); kfree(nbc); } kfree(new_disk_conf); diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 7bfc058cb665..c3299e49edd5 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2167,8 +2167,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write) * to read/write from/to it. It is already opened in O_NONBLOCK mode * so open should not fail. */ - bdev = blkdev_get_by_dev(pd->bdev->bd_dev, FMODE_READ | FMODE_EXCL, pd, - NULL); + bdev = blkdev_get_by_dev(pd->bdev->bd_dev, FMODE_READ, pd, NULL); if (IS_ERR(bdev)) { ret = PTR_ERR(bdev); goto out; @@ -2215,7 +2214,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write) return 0; out_putdev: - blkdev_put(bdev, FMODE_READ | FMODE_EXCL); + blkdev_put(bdev, pd); out: return ret; } @@ -2234,7 +2233,7 @@ static void pkt_release_dev(struct pktcdvd_device *pd, int flush) pkt_lock_door(pd, 0); pkt_set_speed(pd, MAX_SPEED, MAX_SPEED); - blkdev_put(pd->bdev, FMODE_READ | FMODE_EXCL); + blkdev_put(pd->bdev, pd); pkt_shrink_pktlist(pd); } @@ -2520,7 +2519,7 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) return PTR_ERR(bdev); sdev = scsi_device_from_queue(bdev->bd_disk->queue); if (!sdev) { - blkdev_put(bdev, FMODE_READ | FMODE_NDELAY); + blkdev_put(bdev, NULL); return -EINVAL; } put_device(&sdev->sdev_gendev); @@ -2545,7 +2544,7 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) return 0; out_mem: - blkdev_put(bdev, FMODE_READ | FMODE_NDELAY); + blkdev_put(bdev, NULL); /* This is safe: open() is still holding a reference. */ module_put(THIS_MODULE); return -ENOMEM; @@ -2751,7 +2750,7 @@ static int pkt_remove_dev(dev_t pkt_dev) pkt_debugfs_dev_remove(pd); pkt_sysfs_dev_remove(pd); - blkdev_put(pd->bdev, FMODE_READ | FMODE_NDELAY); + blkdev_put(pd->bdev, NULL); remove_proc_entry(pd->disk->disk_name, pkt_proc); dev_notice(ddev, "writer unmapped\n"); diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c index a92a4289d0ec..a909f8763ce7 100644 --- a/drivers/block/rnbd/rnbd-srv.c +++ b/drivers/block/rnbd/rnbd-srv.c @@ -219,7 +219,7 @@ void rnbd_destroy_sess_dev(struct rnbd_srv_sess_dev *sess_dev, bool keep_id) rnbd_put_sess_dev(sess_dev); wait_for_completion(&dc); /* wait for inflights to drop to zero */ - blkdev_put(sess_dev->bdev, sess_dev->open_flags); + blkdev_put(sess_dev->bdev, NULL); mutex_lock(&sess_dev->dev->lock); list_del(&sess_dev->dev_list); if (sess_dev->open_flags & FMODE_WRITE) @@ -791,7 +791,7 @@ srv_dev_put: } rnbd_put_srv_dev(srv_dev); blkdev_put: - blkdev_put(bdev, open_flags); + blkdev_put(bdev, NULL); free_path: kfree(full_path); reject: diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 43b36da9b354..141b60aad570 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -473,7 +473,7 @@ static void xenvbd_sysfs_delif(struct xenbus_device *dev) static void xen_vbd_free(struct xen_vbd *vbd) { if (vbd->bdev) - blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE); + blkdev_put(vbd->bdev, NULL); vbd->bdev = NULL; } diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index f5644c606040..21615d67a9bd 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -420,7 +420,7 @@ static void reset_bdev(struct zram *zram) return; bdev = zram->bdev; - blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); + blkdev_put(bdev, zram); /* hope filp_close flush all of IO */ filp_close(zram->backing_dev, NULL); zram->backing_dev = NULL; @@ -507,8 +507,8 @@ static ssize_t backing_dev_store(struct device *dev, goto out; } - bdev = blkdev_get_by_dev(inode->i_rdev, - FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram, NULL); + bdev = blkdev_get_by_dev(inode->i_rdev, FMODE_READ | FMODE_WRITE, zram, + NULL); if (IS_ERR(bdev)) { err = PTR_ERR(bdev); bdev = NULL; @@ -539,7 +539,7 @@ out: kvfree(bitmap); if (bdev) - blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + blkdev_put(bdev, zram); if (backing_dev) filp_close(backing_dev, NULL); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 4a2aed047aec..7022fea396f2 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1369,7 +1369,7 @@ static void cached_dev_free(struct closure *cl) put_page(virt_to_page(dc->sb_disk)); if (!IS_ERR_OR_NULL(dc->bdev)) - blkdev_put(dc->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); + blkdev_put(dc->bdev, bcache_kobj); wake_up(&unregister_wait); @@ -2218,7 +2218,7 @@ void bch_cache_release(struct kobject *kobj) put_page(virt_to_page(ca->sb_disk)); if (!IS_ERR_OR_NULL(ca->bdev)) - blkdev_put(ca->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); + blkdev_put(ca->bdev, bcache_kobj); kfree(ca); module_put(THIS_MODULE); @@ -2359,7 +2359,7 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk, * call blkdev_put() to bdev in bch_cache_release(). So we * explicitly call blkdev_put() here. */ - blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); + blkdev_put(bdev, bcache_kobj); if (ret == -ENOMEM) err = "cache_alloc(): -ENOMEM"; else if (ret == -EPERM) @@ -2461,7 +2461,7 @@ static void register_bdev_worker(struct work_struct *work) if (!dc) { fail = true; put_page(virt_to_page(args->sb_disk)); - blkdev_put(args->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + blkdev_put(args->bdev, bcache_kobj); goto out; } @@ -2491,7 +2491,7 @@ static void register_cache_worker(struct work_struct *work) if (!ca) { fail = true; put_page(virt_to_page(args->sb_disk)); - blkdev_put(args->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + blkdev_put(args->bdev, bcache_kobj); goto out; } @@ -2558,8 +2558,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, ret = -EINVAL; err = "failed to open device"; - bdev = blkdev_get_by_path(strim(path), - FMODE_READ|FMODE_WRITE|FMODE_EXCL, + bdev = blkdev_get_by_path(strim(path), FMODE_READ | FMODE_WRITE, bcache_kobj, NULL); if (IS_ERR(bdev)) { if (bdev == ERR_PTR(-EBUSY)) { @@ -2648,7 +2647,7 @@ async_done: out_put_sb_page: put_page(virt_to_page(sb_disk)); out_blkdev_put: - blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + blkdev_put(bdev, register_bcache); out_free_sb: kfree(sb); out_free_path: diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 246b8f028a98..b16e37362c5a 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -746,7 +746,7 @@ static struct table_device *open_table_device(struct mapped_device *md, return ERR_PTR(-ENOMEM); refcount_set(&td->count, 1); - bdev = blkdev_get_by_dev(dev, mode | FMODE_EXCL, _dm_claim_ptr, NULL); + bdev = blkdev_get_by_dev(dev, mode, _dm_claim_ptr, NULL); if (IS_ERR(bdev)) { r = PTR_ERR(bdev); goto out_free_td; @@ -771,7 +771,7 @@ static struct table_device *open_table_device(struct mapped_device *md, return td; out_blkdev_put: - blkdev_put(bdev, mode | FMODE_EXCL); + blkdev_put(bdev, _dm_claim_ptr); out_free_td: kfree(td); return ERR_PTR(r); @@ -784,7 +784,7 @@ static void close_table_device(struct table_device *td, struct mapped_device *md { if (md->disk->slave_dir) bd_unlink_disk_holder(td->dm_dev.bdev, md->disk); - blkdev_put(td->dm_dev.bdev, td->dm_dev.mode | FMODE_EXCL); + blkdev_put(td->dm_dev.bdev, _dm_claim_ptr); put_dax(td->dm_dev.dax_dev); list_del(&td->list); kfree(td); diff --git a/drivers/md/md.c b/drivers/md/md.c index 159197dd7b6d..dad4a5539f9f 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2449,7 +2449,10 @@ static void rdev_delayed_delete(struct work_struct *ws) void md_autodetect_dev(dev_t dev); -static void export_rdev(struct md_rdev *rdev) +/* just for claiming the bdev */ +static struct md_rdev claim_rdev; + +static void export_rdev(struct md_rdev *rdev, struct mddev *mddev) { pr_debug("md: export_rdev(%pg)\n", rdev->bdev); md_rdev_clear(rdev); @@ -2457,7 +2460,7 @@ static void export_rdev(struct md_rdev *rdev) if (test_bit(AutoDetected, &rdev->flags)) md_autodetect_dev(rdev->bdev->bd_dev); #endif - blkdev_put(rdev->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + blkdev_put(rdev->bdev, mddev->major_version == -2 ? &claim_rdev : rdev); rdev->bdev = NULL; kobject_put(&rdev->kobj); } @@ -2485,7 +2488,7 @@ static void md_kick_rdev_from_array(struct md_rdev *rdev) INIT_WORK(&rdev->del_work, rdev_delayed_delete); kobject_get(&rdev->kobj); queue_work(md_rdev_misc_wq, &rdev->del_work); - export_rdev(rdev); + export_rdev(rdev, rdev->mddev); } static void export_array(struct mddev *mddev) @@ -3612,6 +3615,7 @@ int md_rdev_init(struct md_rdev *rdev) return badblocks_init(&rdev->badblocks, 0); } EXPORT_SYMBOL_GPL(md_rdev_init); + /* * Import a device. If 'super_format' >= 0, then sanity check the superblock * @@ -3624,7 +3628,6 @@ EXPORT_SYMBOL_GPL(md_rdev_init); */ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor) { - static struct md_rdev claim_rdev; /* just for claiming the bdev */ struct md_rdev *rdev; sector_t size; int err; @@ -3640,8 +3643,7 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe if (err) goto out_clear_rdev; - rdev->bdev = blkdev_get_by_dev(newdev, - FMODE_READ | FMODE_WRITE | FMODE_EXCL, + rdev->bdev = blkdev_get_by_dev(newdev, FMODE_READ | FMODE_WRITE, super_format == -2 ? &claim_rdev : rdev, NULL); if (IS_ERR(rdev->bdev)) { pr_warn("md: could not open device unknown-block(%u,%u).\n", @@ -3679,7 +3681,7 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe return rdev; out_blkdev_put: - blkdev_put(rdev->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + blkdev_put(rdev->bdev, super_format == -2 ? &claim_rdev : rdev); out_clear_rdev: md_rdev_clear(rdev); out_free_rdev: @@ -4560,7 +4562,7 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len) err = bind_rdev_to_array(rdev, mddev); out: if (err) - export_rdev(rdev); + export_rdev(rdev, mddev); mddev_unlock(mddev); if (!err) md_new_event(); @@ -6498,7 +6500,7 @@ static void autorun_devices(int part) rdev_for_each_list(rdev, tmp, &candidates) { list_del_init(&rdev->same_set); if (bind_rdev_to_array(rdev, mddev)) - export_rdev(rdev); + export_rdev(rdev, mddev); } autorun_array(mddev); mddev_unlock(mddev); @@ -6508,7 +6510,7 @@ static void autorun_devices(int part) */ rdev_for_each_list(rdev, tmp, &candidates) { list_del_init(&rdev->same_set); - export_rdev(rdev); + export_rdev(rdev, mddev); } mddev_put(mddev); } @@ -6696,13 +6698,13 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info) pr_warn("md: %pg has different UUID to %pg\n", rdev->bdev, rdev0->bdev); - export_rdev(rdev); + export_rdev(rdev, mddev); return -EINVAL; } } err = bind_rdev_to_array(rdev, mddev); if (err) - export_rdev(rdev); + export_rdev(rdev, mddev); return err; } @@ -6746,7 +6748,7 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info) /* This was a hot-add request, but events doesn't * match, so reject it. */ - export_rdev(rdev); + export_rdev(rdev, mddev); return -EINVAL; } @@ -6772,7 +6774,7 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info) } } if (has_journal || mddev->bitmap) { - export_rdev(rdev); + export_rdev(rdev, mddev); return -EBUSY; } set_bit(Journal, &rdev->flags); @@ -6787,7 +6789,7 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info) /* --add initiated by this node */ err = md_cluster_ops->add_new_disk(mddev, rdev); if (err) { - export_rdev(rdev); + export_rdev(rdev, mddev); return err; } } @@ -6797,7 +6799,7 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info) err = bind_rdev_to_array(rdev, mddev); if (err) - export_rdev(rdev); + export_rdev(rdev, mddev); if (mddev_is_clustered(mddev)) { if (info->state & (1 << MD_DISK_CANDIDATE)) { @@ -6860,7 +6862,7 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info) err = bind_rdev_to_array(rdev, mddev); if (err) { - export_rdev(rdev); + export_rdev(rdev, mddev); return err; } } @@ -6985,7 +6987,7 @@ static int hot_add_disk(struct mddev *mddev, dev_t dev) return 0; abort_export: - export_rdev(rdev); + export_rdev(rdev, mddev); return err; } diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c index 218eb2af564a..44fc23af4c3f 100644 --- a/drivers/mtd/devices/block2mtd.c +++ b/drivers/mtd/devices/block2mtd.c @@ -209,7 +209,7 @@ static void block2mtd_free_device(struct block2mtd_dev *dev) if (dev->blkdev) { invalidate_mapping_pages(dev->blkdev->bd_inode->i_mapping, 0, -1); - blkdev_put(dev->blkdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); + blkdev_put(dev->blkdev, NULL); } kfree(dev); @@ -261,7 +261,7 @@ static struct block_device __ref *mdtblock_early_get_bdev(const char *devname, static struct block2mtd_dev *add_device(char *devname, int erase_size, char *label, int timeout) { - const fmode_t mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL; + const fmode_t mode = FMODE_READ | FMODE_WRITE; struct block_device *bdev; struct block2mtd_dev *dev; char *name; diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 9b6d6d85c725..65ed2d478fac 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -51,7 +51,7 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id) void nvmet_bdev_ns_disable(struct nvmet_ns *ns) { if (ns->bdev) { - blkdev_put(ns->bdev, FMODE_WRITE | FMODE_READ); + blkdev_put(ns->bdev, NULL); ns->bdev = NULL; } } diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index f21198bc483e..d2b27b84f854 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -179,7 +179,7 @@ void dasd_destroy_partitions(struct dasd_block *block) mutex_unlock(&bdev->bd_disk->open_mutex); /* Matching blkdev_put to the blkdev_get in dasd_scan_partitions. */ - blkdev_put(bdev, FMODE_READ); + blkdev_put(bdev, NULL); } int dasd_gendisk_init(void) diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index a5cbbefa78ee..c62f961f46e3 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -108,7 +108,7 @@ static int iblock_configure_device(struct se_device *dev) pr_debug( "IBLOCK: Claiming struct block_device: %s\n", ib_dev->ibd_udev_path); - mode = FMODE_READ|FMODE_EXCL; + mode = FMODE_READ; if (!ib_dev->ibd_readonly) mode |= FMODE_WRITE; else @@ -175,7 +175,7 @@ static int iblock_configure_device(struct se_device *dev) return 0; out_blkdev_put: - blkdev_put(ib_dev->ibd_bd, FMODE_WRITE|FMODE_READ|FMODE_EXCL); + blkdev_put(ib_dev->ibd_bd, ib_dev); out_free_bioset: bioset_exit(&ib_dev->ibd_bio_set); out: @@ -201,7 +201,7 @@ static void iblock_destroy_device(struct se_device *dev) struct iblock_dev *ib_dev = IBLOCK_DEV(dev); if (ib_dev->ibd_bd != NULL) - blkdev_put(ib_dev->ibd_bd, FMODE_WRITE|FMODE_READ|FMODE_EXCL); + blkdev_put(ib_dev->ibd_bd, ib_dev); bioset_exit(&ib_dev->ibd_bio_set); } diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index e3494e036c6c..da3b5512d7ae 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -366,8 +366,7 @@ static int pscsi_create_type_disk(struct se_device *dev, struct scsi_device *sd) * Claim exclusive struct block_device access to struct scsi_device * for TYPE_DISK and TYPE_ZBC using supplied udev_path */ - bd = blkdev_get_by_path(dev->udev_path, - FMODE_WRITE|FMODE_READ|FMODE_EXCL, pdv, + bd = blkdev_get_by_path(dev->udev_path, FMODE_WRITE | FMODE_READ, pdv, NULL); if (IS_ERR(bd)) { pr_err("pSCSI: blkdev_get_by_path() failed\n"); @@ -378,7 +377,7 @@ static int pscsi_create_type_disk(struct se_device *dev, struct scsi_device *sd) ret = pscsi_add_device_to_list(dev, sd); if (ret) { - blkdev_put(pdv->pdv_bd, FMODE_WRITE|FMODE_READ|FMODE_EXCL); + blkdev_put(pdv->pdv_bd, pdv); scsi_device_put(sd); return ret; } @@ -566,8 +565,7 @@ static void pscsi_destroy_device(struct se_device *dev) */ if ((sd->type == TYPE_DISK || sd->type == TYPE_ZBC) && pdv->pdv_bd) { - blkdev_put(pdv->pdv_bd, - FMODE_WRITE|FMODE_READ|FMODE_EXCL); + blkdev_put(pdv->pdv_bd, pdv); pdv->pdv_bd = NULL; } /* diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 4de4984fa99b..677e9d9e1527 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -257,7 +257,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, return -EINVAL; } - bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL, + bdev = blkdev_get_by_path(device_path, FMODE_WRITE, fs_info->bdev_holder, NULL); if (IS_ERR(bdev)) { btrfs_err(fs_info, "target device %s is invalid!", device_path); @@ -315,7 +315,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, device->bdev = bdev; set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); set_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state); - device->mode = FMODE_EXCL; + device->holder = fs_info->bdev_holder; device->dev_stats_valid = 1; set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE); device->fs_devices = fs_devices; @@ -334,7 +334,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, return 0; error: - blkdev_put(bdev, FMODE_EXCL); + blkdev_put(bdev, fs_info->bdev_holder); return ret; } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 2fa36f694daa..d99376a79ef4 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2672,7 +2672,7 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg) struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_ioctl_vol_args_v2 *vol_args; struct block_device *bdev = NULL; - fmode_t mode; + void *holder; int ret; bool cancel = false; @@ -2709,7 +2709,7 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg) goto err_drop; /* Exclusive operation is now claimed */ - ret = btrfs_rm_device(fs_info, &args, &bdev, &mode); + ret = btrfs_rm_device(fs_info, &args, &bdev, &holder); btrfs_exclop_finish(fs_info); @@ -2724,7 +2724,7 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg) err_drop: mnt_drop_write_file(file); if (bdev) - blkdev_put(bdev, mode); + blkdev_put(bdev, holder); out: btrfs_put_dev_args_from_path(&args); kfree(vol_args); @@ -2738,7 +2738,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_ioctl_vol_args *vol_args; struct block_device *bdev = NULL; - fmode_t mode; + void *holder; int ret; bool cancel = false; @@ -2765,7 +2765,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) ret = exclop_start_or_cancel_reloc(fs_info, BTRFS_EXCLOP_DEV_REMOVE, cancel); if (ret == 0) { - ret = btrfs_rm_device(fs_info, &args, &bdev, &mode); + ret = btrfs_rm_device(fs_info, &args, &bdev, &holder); if (!ret) btrfs_info(fs_info, "disk deleted %s", vol_args->name); btrfs_exclop_finish(fs_info); @@ -2773,7 +2773,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) mnt_drop_write_file(file); if (bdev) - blkdev_put(bdev, mode); + blkdev_put(bdev, holder); out: btrfs_put_dev_args_from_path(&args); kfree(vol_args); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 035868cee3dd..7b12e05cdbf0 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -507,14 +507,14 @@ btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder, sync_blockdev(*bdev); ret = set_blocksize(*bdev, BTRFS_BDEV_BLOCKSIZE); if (ret) { - blkdev_put(*bdev, flags); + blkdev_put(*bdev, holder); goto error; } invalidate_bdev(*bdev); *disk_super = btrfs_read_dev_super(*bdev); if (IS_ERR(*disk_super)) { ret = PTR_ERR(*disk_super); - blkdev_put(*bdev, flags); + blkdev_put(*bdev, holder); goto error; } @@ -642,7 +642,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices, device->bdev = bdev; clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); - device->mode = flags; + device->holder = holder; fs_devices->open_devices++; if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) && @@ -656,7 +656,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices, error_free_page: btrfs_release_disk_super(disk_super); - blkdev_put(bdev, flags); + blkdev_put(bdev, holder); return -EINVAL; } @@ -1057,7 +1057,7 @@ static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, continue; if (device->bdev) { - blkdev_put(device->bdev, device->mode); + blkdev_put(device->bdev, device->holder); device->bdev = NULL; fs_devices->open_devices--; } @@ -1103,7 +1103,7 @@ static void btrfs_close_bdev(struct btrfs_device *device) invalidate_bdev(device->bdev); } - blkdev_put(device->bdev, device->mode); + blkdev_put(device->bdev, device->holder); } static void btrfs_close_one_device(struct btrfs_device *device) @@ -1213,8 +1213,6 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices, struct btrfs_device *latest_dev = NULL; struct btrfs_device *tmp_device; - flags |= FMODE_EXCL; - list_for_each_entry_safe(device, tmp_device, &fs_devices->devices, dev_list) { int ret; @@ -1400,7 +1398,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags) btrfs_release_disk_super(disk_super); error_bdev_put: - blkdev_put(bdev, flags); + blkdev_put(bdev, NULL); return device; } @@ -2087,7 +2085,7 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, int btrfs_rm_device(struct btrfs_fs_info *fs_info, struct btrfs_dev_lookup_args *args, - struct block_device **bdev, fmode_t *mode) + struct block_device **bdev, void **holder) { struct btrfs_trans_handle *trans; struct btrfs_device *device; @@ -2226,7 +2224,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, } *bdev = device->bdev; - *mode = device->mode; + *holder = device->holder; synchronize_rcu(); btrfs_free_device(device); @@ -2394,7 +2392,7 @@ int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info, else memcpy(args->fsid, disk_super->fsid, BTRFS_FSID_SIZE); btrfs_release_disk_super(disk_super); - blkdev_put(bdev, FMODE_READ); + blkdev_put(bdev, NULL); return 0; } @@ -2627,7 +2625,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path if (sb_rdonly(sb) && !fs_devices->seeding) return -EROFS; - bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL, + bdev = blkdev_get_by_path(device_path, FMODE_WRITE, fs_info->bdev_holder, NULL); if (IS_ERR(bdev)) return PTR_ERR(bdev); @@ -2690,7 +2688,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path device->commit_total_bytes = device->total_bytes; set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state); - device->mode = FMODE_EXCL; + device->holder = fs_info->bdev_holder; device->dev_stats_valid = 1; set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE); @@ -2848,7 +2846,7 @@ error_free_zone: error_free_device: btrfs_free_device(device); error: - blkdev_put(bdev, FMODE_EXCL); + blkdev_put(bdev, fs_info->bdev_holder); if (locked) { mutex_unlock(&uuid_mutex); up_write(&sb->s_umount); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index eb97a397b3c3..840a8df39907 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -94,8 +94,8 @@ struct btrfs_device { struct btrfs_zoned_device_info *zone_info; - /* the mode sent to blkdev_get */ - fmode_t mode; + /* block device holder for blkdev_get/put */ + void *holder; /* * Device's major-minor number. Must be set even if the device is not @@ -619,7 +619,7 @@ void btrfs_put_dev_args_from_path(struct btrfs_dev_lookup_args *args); void btrfs_free_device(struct btrfs_device *device); int btrfs_rm_device(struct btrfs_fs_info *fs_info, struct btrfs_dev_lookup_args *args, - struct block_device **bdev, fmode_t *mode); + struct block_device **bdev, void **holder); void __exit btrfs_cleanup_fs_uuids(void); int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len); int btrfs_grow_device(struct btrfs_trans_handle *trans, diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 6c263e9cd38b..54dba967a2d4 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -19,6 +19,7 @@ #include static struct kmem_cache *erofs_inode_cachep __read_mostly; +struct file_system_type erofs_fs_type; void _erofs_err(struct super_block *sb, const char *function, const char *fmt, ...) @@ -253,8 +254,8 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb, return PTR_ERR(fscache); dif->fscache = fscache; } else if (!sbi->devs->flatdev) { - bdev = blkdev_get_by_path(dif->path, FMODE_READ | FMODE_EXCL, - sb->s_type, NULL); + bdev = blkdev_get_by_path(dif->path, FMODE_READ, sb->s_type, + NULL); if (IS_ERR(bdev)) return PTR_ERR(bdev); dif->bdev = bdev; @@ -877,7 +878,7 @@ static int erofs_release_device_info(int id, void *ptr, void *data) fs_put_dax(dif->dax_dev, NULL); if (dif->bdev) - blkdev_put(dif->bdev, FMODE_READ | FMODE_EXCL); + blkdev_put(dif->bdev, &erofs_fs_type); erofs_fscache_unregister_cookie(dif->fscache); dif->fscache = NULL; kfree(dif->path); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9070ea9154d7..92dd699139a3 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1112,7 +1112,7 @@ static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb) { struct block_device *bdev; - bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb, + bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_WRITE, sb, &ext4_holder_ops); if (IS_ERR(bdev)) goto fail; @@ -1128,17 +1128,12 @@ fail: /* * Release the journal device */ -static void ext4_blkdev_put(struct block_device *bdev) -{ - blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); -} - static void ext4_blkdev_remove(struct ext4_sb_info *sbi) { struct block_device *bdev; bdev = sbi->s_journal_bdev; if (bdev) { - ext4_blkdev_put(bdev); + blkdev_put(bdev, sbi->s_es); sbi->s_journal_bdev = NULL; } } @@ -5915,7 +5910,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, out_journal: jbd2_journal_destroy(journal); out_bdev: - ext4_blkdev_put(bdev); + blkdev_put(bdev, sb); return NULL; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 7c34ab082f13..a5adb1d316e3 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1538,7 +1538,7 @@ static void destroy_device_list(struct f2fs_sb_info *sbi) int i; for (i = 0; i < sbi->s_ndevs; i++) { - blkdev_put(FDEV(i).bdev, FMODE_EXCL); + blkdev_put(FDEV(i).bdev, sbi->sb->s_type); #ifdef CONFIG_BLK_DEV_ZONED kvfree(FDEV(i).blkz_seq); #endif diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index 46d393c8088a..82f70d46f4e5 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -1100,7 +1100,7 @@ int lmLogOpen(struct super_block *sb) * file systems to log may have n-to-1 relationship; */ - bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, + bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ | FMODE_WRITE, log, NULL); if (IS_ERR(bdev)) { rc = PTR_ERR(bdev); @@ -1141,7 +1141,7 @@ journal_found: lbmLogShutdown(log); close: /* close external log device */ - blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); + blkdev_put(bdev, log); free: /* free log descriptor */ mutex_unlock(&jfs_log_mutex); @@ -1485,7 +1485,7 @@ int lmLogClose(struct super_block *sb) bdev = log->bdev; rc = lmLogShutdown(log); - blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); + blkdev_put(bdev, log); kfree(log); diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c index 38b066ca699e..9be7f958f60e 100644 --- a/fs/nfs/blocklayout/dev.c +++ b/fs/nfs/blocklayout/dev.c @@ -35,7 +35,7 @@ bl_free_device(struct pnfs_block_dev *dev) } if (dev->bdev) - blkdev_put(dev->bdev, FMODE_READ | FMODE_WRITE); + blkdev_put(dev->bdev, NULL); } } @@ -374,7 +374,7 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d, return 0; out_blkdev_put: - blkdev_put(d->bdev, FMODE_READ | FMODE_WRITE); + blkdev_put(d->bdev, NULL); return error; } diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 91bfbd973d1d..61d5e79a5e81 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1278,7 +1278,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags, { struct nilfs_super_data sd; struct super_block *s; - fmode_t mode = FMODE_READ | FMODE_EXCL; + fmode_t mode = FMODE_READ; struct dentry *root_dentry; int err, s_new = false; @@ -1357,7 +1357,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags, } if (!s_new) - blkdev_put(sd.bdev, mode); + blkdev_put(sd.bdev, fs_type); return root_dentry; @@ -1366,7 +1366,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags, failed: if (!s_new) - blkdev_put(sd.bdev, mode); + blkdev_put(sd.bdev, fs_type); return ERR_PTR(err); } diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 6b13b8c3f2b8..c6ae9aee01ed 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1503,7 +1503,7 @@ static void o2hb_region_release(struct config_item *item) } if (reg->hr_bdev) - blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE); + blkdev_put(reg->hr_bdev, NULL); kfree(reg->hr_slots); @@ -1893,7 +1893,7 @@ static ssize_t o2hb_region_dev_store(struct config_item *item, out3: if (ret < 0) { - blkdev_put(reg->hr_bdev, FMODE_READ | FMODE_WRITE); + blkdev_put(reg->hr_bdev, NULL); reg->hr_bdev = NULL; } out2: diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 5e4db9a0c8e5..905297ea5545 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2589,7 +2589,7 @@ static void release_journal_dev(struct super_block *super, struct reiserfs_journal *journal) { if (journal->j_dev_bd != NULL) { - blkdev_put(journal->j_dev_bd, journal->j_dev_mode); + blkdev_put(journal->j_dev_bd, journal); journal->j_dev_bd = NULL; } } @@ -2598,9 +2598,10 @@ static int journal_init_dev(struct super_block *super, struct reiserfs_journal *journal, const char *jdev_name) { + fmode_t blkdev_mode = FMODE_READ; + void *holder = journal; int result; dev_t jdev; - fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL; result = 0; @@ -2608,16 +2609,15 @@ static int journal_init_dev(struct super_block *super, jdev = SB_ONDISK_JOURNAL_DEVICE(super) ? new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev; - if (bdev_read_only(super->s_bdev)) - blkdev_mode = FMODE_READ; + if (!bdev_read_only(super->s_bdev)) + blkdev_mode |= FMODE_WRITE; /* there is no "jdev" option and journal is on separate device */ if ((!jdev_name || !jdev_name[0])) { if (jdev == super->s_dev) - blkdev_mode &= ~FMODE_EXCL; - journal->j_dev_bd = blkdev_get_by_dev(jdev, blkdev_mode, - journal, NULL); - journal->j_dev_mode = blkdev_mode; + holder = NULL; + journal->j_dev_bd = blkdev_get_by_dev(jdev, blkdev_mode, holder, + NULL); if (IS_ERR(journal->j_dev_bd)) { result = PTR_ERR(journal->j_dev_bd); journal->j_dev_bd = NULL; @@ -2631,8 +2631,7 @@ static int journal_init_dev(struct super_block *super, return 0; } - journal->j_dev_mode = blkdev_mode; - journal->j_dev_bd = blkdev_get_by_path(jdev_name, blkdev_mode, journal, + journal->j_dev_bd = blkdev_get_by_path(jdev_name, blkdev_mode, holder, NULL); if (IS_ERR(journal->j_dev_bd)) { result = PTR_ERR(journal->j_dev_bd); diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index 1bccf6a2e908..55e85256aae8 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h @@ -300,7 +300,6 @@ struct reiserfs_journal { struct reiserfs_journal_cnode *j_first; struct block_device *j_dev_bd; - fmode_t j_dev_mode; /* first block on s_dev of reserved area journal */ int j_1st_reserved_block; diff --git a/fs/super.c b/fs/super.c index f127589700ab..8563794a8bc4 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1255,7 +1255,7 @@ int get_tree_bdev(struct fs_context *fc, { struct block_device *bdev; struct super_block *s; - fmode_t mode = FMODE_READ | FMODE_EXCL; + fmode_t mode = FMODE_READ; int error = 0; if (!(fc->sb_flags & SB_RDONLY)) @@ -1279,7 +1279,7 @@ int get_tree_bdev(struct fs_context *fc, if (bdev->bd_fsfreeze_count > 0) { mutex_unlock(&bdev->bd_fsfreeze_mutex); warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev); - blkdev_put(bdev, mode); + blkdev_put(bdev, fc->fs_type); return -EBUSY; } @@ -1288,7 +1288,7 @@ int get_tree_bdev(struct fs_context *fc, s = sget_fc(fc, test_bdev_super_fc, set_bdev_super_fc); mutex_unlock(&bdev->bd_fsfreeze_mutex); if (IS_ERR(s)) { - blkdev_put(bdev, mode); + blkdev_put(bdev, fc->fs_type); return PTR_ERR(s); } @@ -1297,7 +1297,7 @@ int get_tree_bdev(struct fs_context *fc, if ((fc->sb_flags ^ s->s_flags) & SB_RDONLY) { warnf(fc, "%pg: Can't mount, would change RO state", bdev); deactivate_locked_super(s); - blkdev_put(bdev, mode); + blkdev_put(bdev, fc->fs_type); return -EBUSY; } @@ -1309,7 +1309,7 @@ int get_tree_bdev(struct fs_context *fc, * holding an active reference. */ up_write(&s->s_umount); - blkdev_put(bdev, mode); + blkdev_put(bdev, fc->fs_type); down_write(&s->s_umount); } else { s->s_mode = mode; @@ -1344,7 +1344,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type, { struct block_device *bdev; struct super_block *s; - fmode_t mode = FMODE_READ | FMODE_EXCL; + fmode_t mode = FMODE_READ; int error = 0; if (!(flags & SB_RDONLY)) @@ -1386,7 +1386,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type, * holding an active reference. */ up_write(&s->s_umount); - blkdev_put(bdev, mode); + blkdev_put(bdev, fs_type); down_write(&s->s_umount); } else { s->s_mode = mode; @@ -1409,7 +1409,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type, error_s: error = PTR_ERR(s); error_bdev: - blkdev_put(bdev, mode); + blkdev_put(bdev, fs_type); error: return ERR_PTR(error); } @@ -1418,13 +1418,11 @@ EXPORT_SYMBOL(mount_bdev); void kill_block_super(struct super_block *sb) { struct block_device *bdev = sb->s_bdev; - fmode_t mode = sb->s_mode; bdev->bd_super = NULL; generic_shutdown_super(sb); sync_blockdev(bdev); - WARN_ON_ONCE(!(mode & FMODE_EXCL)); - blkdev_put(bdev, mode | FMODE_EXCL); + blkdev_put(bdev, sb->s_type); } EXPORT_SYMBOL(kill_block_super); diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 1b4bd5c88f4a..3b7cf8268057 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -396,8 +396,8 @@ xfs_blkdev_get( { int error = 0; - *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL, - mp, &xfs_holder_ops); + *bdevp = blkdev_get_by_path(name, FMODE_READ | FMODE_WRITE, mp, + &xfs_holder_ops); if (IS_ERR(*bdevp)) { error = PTR_ERR(*bdevp); xfs_warn(mp, "Invalid device [%s], error=%d", name, error); @@ -408,10 +408,11 @@ xfs_blkdev_get( STATIC void xfs_blkdev_put( + struct xfs_mount *mp, struct block_device *bdev) { if (bdev) - blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); + blkdev_put(bdev, mp); } STATIC void @@ -422,13 +423,13 @@ xfs_close_devices( struct block_device *logdev = mp->m_logdev_targp->bt_bdev; xfs_free_buftarg(mp->m_logdev_targp); - xfs_blkdev_put(logdev); + xfs_blkdev_put(mp, logdev); } if (mp->m_rtdev_targp) { struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev; xfs_free_buftarg(mp->m_rtdev_targp); - xfs_blkdev_put(rtdev); + xfs_blkdev_put(mp, rtdev); } xfs_free_buftarg(mp->m_ddev_targp); } @@ -503,10 +504,10 @@ xfs_open_devices( out_free_ddev_targ: xfs_free_buftarg(mp->m_ddev_targp); out_close_rtdev: - xfs_blkdev_put(rtdev); + xfs_blkdev_put(mp, rtdev); out_close_logdev: if (logdev && logdev != ddev) - xfs_blkdev_put(logdev); + xfs_blkdev_put(mp, logdev); return error; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 25bdd0cc74dc..d5b99796f12c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1480,7 +1480,7 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, int bd_prepare_to_claim(struct block_device *bdev, void *holder, const struct blk_holder_ops *hops); void bd_abort_claiming(struct block_device *bdev, void *holder); -void blkdev_put(struct block_device *bdev, fmode_t mode); +void blkdev_put(struct block_device *bdev, void *holder); /* just for blk-cgroup, don't use elsewhere */ struct block_device *blkdev_get_no_open(dev_t dev); diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 7ae95ec72f99..f62e89d0d906 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -688,22 +688,18 @@ static int load_image_and_restore(bool snapshot_test) { int error; unsigned int flags; - fmode_t mode = FMODE_READ; - - if (snapshot_test) - mode |= FMODE_EXCL; pm_pr_dbg("Loading hibernation image.\n"); lock_device_hotplug(); error = create_basic_memory_bitmaps(); if (error) { - swsusp_close(mode); + swsusp_close(snapshot_test); goto Unlock; } error = swsusp_read(&flags); - swsusp_close(mode); + swsusp_close(snapshot_test); if (!error) error = hibernation_restore(flags & SF_PLATFORM_MODE); @@ -956,7 +952,7 @@ static int software_resume(void) /* The snapshot device should not be opened while we're running */ if (!hibernate_acquire()) { error = -EBUSY; - swsusp_close(FMODE_READ | FMODE_EXCL); + swsusp_close(false); goto Unlock; } @@ -991,7 +987,7 @@ static int software_resume(void) pm_pr_dbg("Hibernation image not present or could not be loaded.\n"); return error; Close_Finish: - swsusp_close(FMODE_READ | FMODE_EXCL); + swsusp_close(false); goto Finish; } diff --git a/kernel/power/power.h b/kernel/power/power.h index 978189fcafd1..a8e0c44b804e 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -177,7 +177,7 @@ int swsusp_check(bool snapshot_test); extern void swsusp_free(void); extern int swsusp_read(unsigned int *flags_p); extern int swsusp_write(unsigned int flags); -extern void swsusp_close(fmode_t); +void swsusp_close(bool snapshot_test); #ifdef CONFIG_SUSPEND extern int swsusp_unmark(void); #endif diff --git a/kernel/power/swap.c b/kernel/power/swap.c index b03ff1a33c7f..cc9259307c94 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -363,7 +363,7 @@ static int swsusp_swap_check(void) res = set_blocksize(hib_resume_bdev, PAGE_SIZE); if (res < 0) - blkdev_put(hib_resume_bdev, FMODE_WRITE); + blkdev_put(hib_resume_bdev, NULL); return res; } @@ -443,7 +443,7 @@ static int get_swap_writer(struct swap_map_handle *handle) err_rel: release_swap_writer(handle); err_close: - swsusp_close(FMODE_WRITE); + swsusp_close(false); return ret; } @@ -508,7 +508,7 @@ static int swap_writer_finish(struct swap_map_handle *handle, if (error) free_all_swap_pages(root_swap); release_swap_writer(handle); - swsusp_close(FMODE_WRITE); + swsusp_close(false); return error; } @@ -1518,14 +1518,11 @@ static void *swsusp_holder; int swsusp_check(bool snapshot_test) { + void *holder = snapshot_test ? &swsusp_holder : NULL; int error; - fmode_t mode = FMODE_READ; - if (snapshot_test) - mode |= FMODE_EXCL; - - hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device, - mode, &swsusp_holder, NULL); + hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device, FMODE_READ, + holder, NULL); if (!IS_ERR(hib_resume_bdev)) { set_blocksize(hib_resume_bdev, PAGE_SIZE); clear_page(swsusp_header); @@ -1552,7 +1549,7 @@ int swsusp_check(bool snapshot_test) put: if (error) - blkdev_put(hib_resume_bdev, mode); + blkdev_put(hib_resume_bdev, holder); else pr_debug("Image signature found, resuming\n"); } else { @@ -1569,14 +1566,14 @@ put: * swsusp_close - close swap device. */ -void swsusp_close(fmode_t mode) +void swsusp_close(bool snapshot_test) { if (IS_ERR(hib_resume_bdev)) { pr_debug("Image device not initialised\n"); return; } - blkdev_put(hib_resume_bdev, mode); + blkdev_put(hib_resume_bdev, snapshot_test ? &swsusp_holder : NULL); } /** diff --git a/mm/swapfile.c b/mm/swapfile.c index cfbcf7d5705f..16554256be65 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2539,7 +2539,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) struct block_device *bdev = I_BDEV(inode); set_blocksize(bdev, old_block_size); - blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + blkdev_put(bdev, p); } inode_lock(inode); @@ -2770,8 +2770,7 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode) if (S_ISBLK(inode->i_mode)) { p->bdev = blkdev_get_by_dev(inode->i_rdev, - FMODE_READ | FMODE_WRITE | FMODE_EXCL, p, - NULL); + FMODE_READ | FMODE_WRITE, p, NULL); if (IS_ERR(p->bdev)) { error = PTR_ERR(p->bdev); p->bdev = NULL; @@ -3222,7 +3221,7 @@ bad_swap: p->cluster_next_cpu = NULL; if (inode && S_ISBLK(inode->i_mode) && p->bdev) { set_blocksize(p->bdev, p->old_block_size); - blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + blkdev_put(p->bdev, p); } inode = NULL; destroy_swap_extents(p); -- cgit v1.2.3 From 3f0b3e785e8b54a40c530fa77b7ab37bec925c57 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 8 Jun 2023 13:02:44 +0200 Subject: block: add a sb_open_mode helper Add a helper to return the open flags for blkdev_get_by* for passed in super block flags instead of open coding the logic in many places. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Acked-by: Christian Brauner Link: https://lore.kernel.org/r/20230608110258.189493-17-hch@lst.de Signed-off-by: Jens Axboe --- fs/btrfs/super.c | 5 +---- fs/nilfs2/super.c | 7 ++----- fs/super.c | 15 ++++----------- include/linux/blkdev.h | 7 +++++++ 4 files changed, 14 insertions(+), 20 deletions(-) (limited to 'fs/super.c') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 1a2ee9407f54..fd02b92e3910 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1440,12 +1440,9 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type, struct btrfs_fs_devices *fs_devices = NULL; struct btrfs_fs_info *fs_info = NULL; void *new_sec_opts = NULL; - fmode_t mode = FMODE_READ; + fmode_t mode = sb_open_mode(flags); int error = 0; - if (!(flags & SB_RDONLY)) - mode |= FMODE_WRITE; - if (data) { error = security_sb_eat_lsm_opts(data, &new_sec_opts); if (error) diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 61d5e79a5e81..a41fd84d4e28 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1278,14 +1278,11 @@ nilfs_mount(struct file_system_type *fs_type, int flags, { struct nilfs_super_data sd; struct super_block *s; - fmode_t mode = FMODE_READ; struct dentry *root_dentry; int err, s_new = false; - if (!(flags & SB_RDONLY)) - mode |= FMODE_WRITE; - - sd.bdev = blkdev_get_by_path(dev_name, mode, fs_type, NULL); + sd.bdev = blkdev_get_by_path(dev_name, sb_open_mode(flags), fs_type, + NULL); if (IS_ERR(sd.bdev)) return ERR_CAST(sd.bdev); diff --git a/fs/super.c b/fs/super.c index 8563794a8bc4..dc7f32839833 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1255,17 +1255,13 @@ int get_tree_bdev(struct fs_context *fc, { struct block_device *bdev; struct super_block *s; - fmode_t mode = FMODE_READ; int error = 0; - if (!(fc->sb_flags & SB_RDONLY)) - mode |= FMODE_WRITE; - if (!fc->source) return invalf(fc, "No source specified"); - bdev = blkdev_get_by_path(fc->source, mode, fc->fs_type, - &fs_holder_ops); + bdev = blkdev_get_by_path(fc->source, sb_open_mode(fc->sb_flags), + fc->fs_type, &fs_holder_ops); if (IS_ERR(bdev)) { errorf(fc, "%s: Can't open blockdev", fc->source); return PTR_ERR(bdev); @@ -1344,13 +1340,10 @@ struct dentry *mount_bdev(struct file_system_type *fs_type, { struct block_device *bdev; struct super_block *s; - fmode_t mode = FMODE_READ; int error = 0; - if (!(flags & SB_RDONLY)) - mode |= FMODE_WRITE; - - bdev = blkdev_get_by_path(dev_name, mode, fs_type, &fs_holder_ops); + bdev = blkdev_get_by_path(dev_name, sb_open_mode(flags), fs_type, + &fs_holder_ops); if (IS_ERR(bdev)) return ERR_CAST(bdev); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d5b99796f12c..978036039020 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1473,6 +1473,13 @@ struct blk_holder_ops { void (*mark_dead)(struct block_device *bdev); }; +/* + * Return the correct open flags for blkdev_get_by_* for super block flags + * as stored in sb->s_flags. + */ +#define sb_open_mode(flags) \ + (FMODE_READ | (((flags) & SB_RDONLY) ? 0 : FMODE_WRITE)) + struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder, const struct blk_holder_ops *hops); struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, -- cgit v1.2.3 From 81b1fb7d17c0110df839e13468ada9e99bb6e5f4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 8 Jun 2023 13:02:45 +0200 Subject: fs: remove sb->s_mode There is no real need to store the open mode in the super_block now. It is only used by f2fs, which can easily recalculate it. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Acked-by: Christian Brauner Link: https://lore.kernel.org/r/20230608110258.189493-18-hch@lst.de Signed-off-by: Jens Axboe --- fs/f2fs/super.c | 10 ++++++---- fs/nilfs2/super.c | 1 - fs/super.c | 2 -- include/linux/fs.h | 1 - 4 files changed, 6 insertions(+), 8 deletions(-) (limited to 'fs/super.c') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index a5adb1d316e3..5a764fecd1c7 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3993,6 +3993,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); unsigned int max_devices = MAX_DEVICES; unsigned int logical_blksize; + fmode_t mode = sb_open_mode(sbi->sb->s_flags); int i; /* Initialize single device information */ @@ -4024,8 +4025,8 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) if (max_devices == 1) { /* Single zoned block device mount */ FDEV(0).bdev = - blkdev_get_by_dev(sbi->sb->s_bdev->bd_dev, - sbi->sb->s_mode, sbi->sb->s_type, NULL); + blkdev_get_by_dev(sbi->sb->s_bdev->bd_dev, mode, + sbi->sb->s_type, NULL); } else { /* Multi-device mount */ memcpy(FDEV(i).path, RDEV(i).path, MAX_PATH_LEN); @@ -4043,8 +4044,9 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) (FDEV(i).total_segments << sbi->log_blocks_per_seg) - 1; } - FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path, - sbi->sb->s_mode, sbi->sb->s_type, NULL); + FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path, mode, + sbi->sb->s_type, + NULL); } if (IS_ERR(FDEV(i).bdev)) return PTR_ERR(FDEV(i).bdev); diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index a41fd84d4e28..15a5a1099427 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1316,7 +1316,6 @@ nilfs_mount(struct file_system_type *fs_type, int flags, s_new = true; /* New superblock instance created */ - s->s_mode = mode; snprintf(s->s_id, sizeof(s->s_id), "%pg", sd.bdev); sb_set_blocksize(s, block_size(sd.bdev)); diff --git a/fs/super.c b/fs/super.c index dc7f32839833..86f40f898198 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1308,7 +1308,6 @@ int get_tree_bdev(struct fs_context *fc, blkdev_put(bdev, fc->fs_type); down_write(&s->s_umount); } else { - s->s_mode = mode; snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev); shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s", fc->fs_type->name, s->s_id); @@ -1382,7 +1381,6 @@ struct dentry *mount_bdev(struct file_system_type *fs_type, blkdev_put(bdev, fs_type); down_write(&s->s_umount); } else { - s->s_mode = mode; snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev); shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s", fs_type->name, s->s_id); diff --git a/include/linux/fs.h b/include/linux/fs.h index 7b2053649820..ad1d2c9afb3f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1215,7 +1215,6 @@ struct super_block { uuid_t s_uuid; /* UUID */ unsigned int s_max_links; - fmode_t s_mode; /* * The next field is for VFS *only*. No filesystems have any business -- cgit v1.2.3 From c541dce86c537714b6761a79a969c1623dfa222b Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 15 Jun 2023 13:38:48 +0200 Subject: fs: Protect reconfiguration of sb read-write from racing writes The reconfigure / remount code takes a lot of effort to protect filesystem's reconfiguration code from racing writes on remounting read-only. However during remounting read-only filesystem to read-write mode userspace writes can start immediately once we clear SB_RDONLY flag. This is inconvenient for example for ext4 because we need to do some writes to the filesystem (such as preparation of quota files) before we can take userspace writes so we are clearing SB_RDONLY flag before we are fully ready to accept userpace writes and syzbot has found a way to exploit this [1]. Also as far as I'm reading the code the filesystem remount code was protected from racing writes in the legacy mount path by the mount's MNT_READONLY flag so this is relatively new problem. It is actually fairly easy to protect remount read-write from racing writes using sb->s_readonly_remount flag so let's just do that instead of having to workaround these races in the filesystem code. [1] https://lore.kernel.org/all/00000000000006a0df05f6667499@google.com/T/ Signed-off-by: Jan Kara Message-Id: <20230615113848.8439-1-jack@suse.cz> Signed-off-by: Christian Brauner --- fs/super.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'fs/super.c') diff --git a/fs/super.c b/fs/super.c index 8d8d68799b34..5bf056087acc 100644 --- a/fs/super.c +++ b/fs/super.c @@ -903,6 +903,7 @@ int reconfigure_super(struct fs_context *fc) struct super_block *sb = fc->root->d_sb; int retval; bool remount_ro = false; + bool remount_rw = false; bool force = fc->sb_flags & SB_FORCE; if (fc->sb_flags_mask & ~MS_RMT_MASK) @@ -920,7 +921,7 @@ int reconfigure_super(struct fs_context *fc) bdev_read_only(sb->s_bdev)) return -EACCES; #endif - + remount_rw = !(fc->sb_flags & SB_RDONLY) && sb_rdonly(sb); remount_ro = (fc->sb_flags & SB_RDONLY) && !sb_rdonly(sb); } @@ -950,6 +951,14 @@ int reconfigure_super(struct fs_context *fc) if (retval) return retval; } + } else if (remount_rw) { + /* + * We set s_readonly_remount here to protect filesystem's + * reconfigure code from writes from userspace until + * reconfigure finishes. + */ + sb->s_readonly_remount = 1; + smp_wmb(); } if (fc->ops->reconfigure) { -- cgit v1.2.3 From 47a7c01c3efc6581f5dcca40928baeb38e1e40c2 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Fri, 9 Jun 2023 08:15:12 +0000 Subject: Revert "mm: shrinkers: convert shrinker_rwsem to mutex" Patch series "revert shrinker_srcu related changes". This patch (of 7): This reverts commit cf2e309ebca7bb0916771839f9b580b06c778530. Kernel test robot reports -88.8% regression in stress-ng.ramfs.ops_per_sec test case [1], which is caused by commit f95bdb700bc6 ("mm: vmscan: make global slab shrink lockless"). The root cause is that SRCU has to be careful to not frequently check for SRCU read-side critical section exits. Therefore, even if no one is currently in the SRCU read-side critical section, synchronize_srcu() cannot return quickly. That's why unregister_shrinker() has become slower. After discussion, we will try to use the refcount+RCU method [2] proposed by Dave Chinner to continue to re-implement the lockless slab shrink. So revert the shrinker_mutex back to shrinker_rwsem first. [1]. https://lore.kernel.org/lkml/202305230837.db2c233f-yujie.liu@intel.com/ [2]. https://lore.kernel.org/lkml/ZIJhou1d55d4H1s0@dread.disaster.area/ Link: https://lkml.kernel.org/r/20230609081518.3039120-1-qi.zheng@linux.dev Link: https://lkml.kernel.org/r/20230609081518.3039120-2-qi.zheng@linux.dev Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202305230837.db2c233f-yujie.liu@intel.com Signed-off-by: Qi Zheng Cc: Dave Chinner Cc: Kirill Tkhai Cc: Muchun Song Cc: Roman Gushchin Cc: Vlastimil Babka Cc: Yujie Liu Signed-off-by: Andrew Morton --- drivers/md/dm-cache-metadata.c | 2 +- drivers/md/dm-thin-metadata.c | 2 +- fs/super.c | 2 +- mm/shrinker_debug.c | 14 +++++++------- mm/vmscan.c | 34 +++++++++++++++++----------------- 5 files changed, 27 insertions(+), 27 deletions(-) (limited to 'fs/super.c') diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index 9e0c69958587..acffed750e3e 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -1828,7 +1828,7 @@ int dm_cache_metadata_abort(struct dm_cache_metadata *cmd) * Replacement block manager (new_bm) is created and old_bm destroyed outside of * cmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of * shrinker associated with the block manager's bufio client vs cmd root_lock). - * - must take shrinker_mutex without holding cmd->root_lock + * - must take shrinker_rwsem without holding cmd->root_lock */ new_bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT, CACHE_MAX_CONCURRENT_LOCKS); diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 9f5cb52c5763..fd464fb024c3 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -1887,7 +1887,7 @@ int dm_pool_abort_metadata(struct dm_pool_metadata *pmd) * Replacement block manager (new_bm) is created and old_bm destroyed outside of * pmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of * shrinker associated with the block manager's bufio client vs pmd root_lock). - * - must take shrinker_mutex without holding pmd->root_lock + * - must take shrinker_rwsem without holding pmd->root_lock */ new_bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE << SECTOR_SHIFT, THIN_MAX_CONCURRENT_LOCKS); diff --git a/fs/super.c b/fs/super.c index 34afe411cf2b..04bc62ab7dfe 100644 --- a/fs/super.c +++ b/fs/super.c @@ -54,7 +54,7 @@ static char *sb_writers_name[SB_FREEZE_LEVELS] = { * One thing we have to be careful of with a per-sb shrinker is that we don't * drop the last active reference to the superblock from within the shrinker. * If that happens we could trigger unregistering the shrinker from within the - * shrinker path and that leads to deadlock on the shrinker_mutex. Hence we + * shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we * take a passive reference to the superblock to avoid this from occurring. */ static unsigned long super_cache_scan(struct shrinker *shrink, diff --git a/mm/shrinker_debug.c b/mm/shrinker_debug.c index fe10436d9911..2be15b8a6d0b 100644 --- a/mm/shrinker_debug.c +++ b/mm/shrinker_debug.c @@ -8,7 +8,7 @@ #include /* defined in vmscan.c */ -extern struct mutex shrinker_mutex; +extern struct rw_semaphore shrinker_rwsem; extern struct list_head shrinker_list; extern struct srcu_struct shrinker_srcu; @@ -168,7 +168,7 @@ int shrinker_debugfs_add(struct shrinker *shrinker) char buf[128]; int id; - lockdep_assert_held(&shrinker_mutex); + lockdep_assert_held(&shrinker_rwsem); /* debugfs isn't initialized yet, add debugfs entries later. */ if (!shrinker_debugfs_root) @@ -211,7 +211,7 @@ int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...) if (!new) return -ENOMEM; - mutex_lock(&shrinker_mutex); + down_write(&shrinker_rwsem); old = shrinker->name; shrinker->name = new; @@ -229,7 +229,7 @@ int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...) shrinker->debugfs_entry = entry; } - mutex_unlock(&shrinker_mutex); + up_write(&shrinker_rwsem); kfree_const(old); @@ -242,7 +242,7 @@ struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker, { struct dentry *entry = shrinker->debugfs_entry; - lockdep_assert_held(&shrinker_mutex); + lockdep_assert_held(&shrinker_rwsem); kfree_const(shrinker->name); shrinker->name = NULL; @@ -271,14 +271,14 @@ static int __init shrinker_debugfs_init(void) shrinker_debugfs_root = dentry; /* Create debugfs entries for shrinkers registered at boot */ - mutex_lock(&shrinker_mutex); + down_write(&shrinker_rwsem); list_for_each_entry(shrinker, &shrinker_list, list) if (!shrinker->debugfs_entry) { ret = shrinker_debugfs_add(shrinker); if (ret) break; } - mutex_unlock(&shrinker_mutex); + up_write(&shrinker_rwsem); return ret; } diff --git a/mm/vmscan.c b/mm/vmscan.c index 6d0cd2840cf0..4730dba253c8 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -35,7 +35,7 @@ #include #include #include -#include +#include #include #include #include @@ -190,7 +190,7 @@ struct scan_control { int vm_swappiness = 60; LIST_HEAD(shrinker_list); -DEFINE_MUTEX(shrinker_mutex); +DECLARE_RWSEM(shrinker_rwsem); DEFINE_SRCU(shrinker_srcu); static atomic_t shrinker_srcu_generation = ATOMIC_INIT(0); @@ -213,7 +213,7 @@ static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg, { return srcu_dereference_check(memcg->nodeinfo[nid]->shrinker_info, &shrinker_srcu, - lockdep_is_held(&shrinker_mutex)); + lockdep_is_held(&shrinker_rwsem)); } static struct shrinker_info *shrinker_info_srcu(struct mem_cgroup *memcg, @@ -292,7 +292,7 @@ int alloc_shrinker_info(struct mem_cgroup *memcg) int nid, size, ret = 0; int map_size, defer_size = 0; - mutex_lock(&shrinker_mutex); + down_write(&shrinker_rwsem); map_size = shrinker_map_size(shrinker_nr_max); defer_size = shrinker_defer_size(shrinker_nr_max); size = map_size + defer_size; @@ -308,7 +308,7 @@ int alloc_shrinker_info(struct mem_cgroup *memcg) info->map_nr_max = shrinker_nr_max; rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info); } - mutex_unlock(&shrinker_mutex); + up_write(&shrinker_rwsem); return ret; } @@ -324,7 +324,7 @@ static int expand_shrinker_info(int new_id) if (!root_mem_cgroup) goto out; - lockdep_assert_held(&shrinker_mutex); + lockdep_assert_held(&shrinker_rwsem); map_size = shrinker_map_size(new_nr_max); defer_size = shrinker_defer_size(new_nr_max); @@ -374,7 +374,7 @@ static int prealloc_memcg_shrinker(struct shrinker *shrinker) if (mem_cgroup_disabled()) return -ENOSYS; - mutex_lock(&shrinker_mutex); + down_write(&shrinker_rwsem); id = idr_alloc(&shrinker_idr, shrinker, 0, 0, GFP_KERNEL); if (id < 0) goto unlock; @@ -388,7 +388,7 @@ static int prealloc_memcg_shrinker(struct shrinker *shrinker) shrinker->id = id; ret = 0; unlock: - mutex_unlock(&shrinker_mutex); + up_write(&shrinker_rwsem); return ret; } @@ -398,7 +398,7 @@ static void unregister_memcg_shrinker(struct shrinker *shrinker) BUG_ON(id < 0); - lockdep_assert_held(&shrinker_mutex); + lockdep_assert_held(&shrinker_rwsem); idr_remove(&shrinker_idr, id); } @@ -433,7 +433,7 @@ void reparent_shrinker_deferred(struct mem_cgroup *memcg) parent = root_mem_cgroup; /* Prevent from concurrent shrinker_info expand */ - mutex_lock(&shrinker_mutex); + down_write(&shrinker_rwsem); for_each_node(nid) { child_info = shrinker_info_protected(memcg, nid); parent_info = shrinker_info_protected(parent, nid); @@ -442,7 +442,7 @@ void reparent_shrinker_deferred(struct mem_cgroup *memcg) atomic_long_add(nr, &parent_info->nr_deferred[i]); } } - mutex_unlock(&shrinker_mutex); + up_write(&shrinker_rwsem); } static bool cgroup_reclaim(struct scan_control *sc) @@ -743,9 +743,9 @@ void free_prealloced_shrinker(struct shrinker *shrinker) shrinker->name = NULL; #endif if (shrinker->flags & SHRINKER_MEMCG_AWARE) { - mutex_lock(&shrinker_mutex); + down_write(&shrinker_rwsem); unregister_memcg_shrinker(shrinker); - mutex_unlock(&shrinker_mutex); + up_write(&shrinker_rwsem); return; } @@ -755,11 +755,11 @@ void free_prealloced_shrinker(struct shrinker *shrinker) void register_shrinker_prepared(struct shrinker *shrinker) { - mutex_lock(&shrinker_mutex); + down_write(&shrinker_rwsem); list_add_tail_rcu(&shrinker->list, &shrinker_list); shrinker->flags |= SHRINKER_REGISTERED; shrinker_debugfs_add(shrinker); - mutex_unlock(&shrinker_mutex); + up_write(&shrinker_rwsem); } static int __register_shrinker(struct shrinker *shrinker) @@ -810,13 +810,13 @@ void unregister_shrinker(struct shrinker *shrinker) if (!(shrinker->flags & SHRINKER_REGISTERED)) return; - mutex_lock(&shrinker_mutex); + down_write(&shrinker_rwsem); list_del_rcu(&shrinker->list); shrinker->flags &= ~SHRINKER_REGISTERED; if (shrinker->flags & SHRINKER_MEMCG_AWARE) unregister_memcg_shrinker(shrinker); debugfs_entry = shrinker_debugfs_detach(shrinker, &debugfs_id); - mutex_unlock(&shrinker_mutex); + up_write(&shrinker_rwsem); atomic_inc(&shrinker_srcu_generation); synchronize_srcu(&shrinker_srcu); -- cgit v1.2.3 From d7439fb1f4338fffd0bc68bb62d78f7712725f26 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 20 Jun 2023 13:28:32 +0200 Subject: fs: Provide helpers for manipulating sb->s_readonly_remount Provide helpers to set and clear sb->s_readonly_remount including appropriate memory barriers. Also use this opportunity to document what the barriers pair with and why they are needed. Suggested-by: Dave Chinner Signed-off-by: Jan Kara Reviewed-by: Dave Chinner Message-Id: <20230620112832.5158-1-jack@suse.cz> Signed-off-by: Christian Brauner --- fs/internal.h | 41 +++++++++++++++++++++++++++++++++++++++++ fs/namespace.c | 25 ++++++++++++++++--------- fs/super.c | 17 ++++++----------- include/linux/fs.h | 2 +- 4 files changed, 64 insertions(+), 21 deletions(-) (limited to 'fs/super.c') diff --git a/fs/internal.h b/fs/internal.h index bd3b2810a36b..b916b84809f3 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -120,6 +120,47 @@ void put_super(struct super_block *sb); extern bool mount_capable(struct fs_context *); int sb_init_dio_done_wq(struct super_block *sb); +/* + * Prepare superblock for changing its read-only state (i.e., either remount + * read-write superblock read-only or vice versa). After this function returns + * mnt_is_readonly() will return true for any mount of the superblock if its + * caller is able to observe any changes done by the remount. This holds until + * sb_end_ro_state_change() is called. + */ +static inline void sb_start_ro_state_change(struct super_block *sb) +{ + WRITE_ONCE(sb->s_readonly_remount, 1); + /* + * For RO->RW transition, the barrier pairs with the barrier in + * mnt_is_readonly() making sure if mnt_is_readonly() sees SB_RDONLY + * cleared, it will see s_readonly_remount set. + * For RW->RO transition, the barrier pairs with the barrier in + * __mnt_want_write() before the mnt_is_readonly() check. The barrier + * makes sure if __mnt_want_write() sees MNT_WRITE_HOLD already + * cleared, it will see s_readonly_remount set. + */ + smp_wmb(); +} + +/* + * Ends section changing read-only state of the superblock. After this function + * returns if mnt_is_readonly() returns false, the caller will be able to + * observe all the changes remount did to the superblock. + */ +static inline void sb_end_ro_state_change(struct super_block *sb) +{ + /* + * This barrier provides release semantics that pairs with + * the smp_rmb() acquire semantics in mnt_is_readonly(). + * This barrier pair ensure that when mnt_is_readonly() sees + * 0 for sb->s_readonly_remount, it will also see all the + * preceding flag changes that were made during the RO state + * change. + */ + smp_wmb(); + WRITE_ONCE(sb->s_readonly_remount, 0); +} + /* * open.c */ diff --git a/fs/namespace.c b/fs/namespace.c index 54847db5b819..5ba1eca6f720 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -309,9 +309,16 @@ static unsigned int mnt_get_writers(struct mount *mnt) static int mnt_is_readonly(struct vfsmount *mnt) { - if (mnt->mnt_sb->s_readonly_remount) + if (READ_ONCE(mnt->mnt_sb->s_readonly_remount)) return 1; - /* Order wrt setting s_flags/s_readonly_remount in do_remount() */ + /* + * The barrier pairs with the barrier in sb_start_ro_state_change() + * making sure if we don't see s_readonly_remount set yet, we also will + * not see any superblock / mount flag changes done by remount. + * It also pairs with the barrier in sb_end_ro_state_change() + * assuring that if we see s_readonly_remount already cleared, we will + * see the values of superblock / mount flags updated by remount. + */ smp_rmb(); return __mnt_is_readonly(mnt); } @@ -364,9 +371,11 @@ int __mnt_want_write(struct vfsmount *m) } } /* - * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will - * be set to match its requirements. So we must not load that until - * MNT_WRITE_HOLD is cleared. + * The barrier pairs with the barrier sb_start_ro_state_change() making + * sure that if we see MNT_WRITE_HOLD cleared, we will also see + * s_readonly_remount set (or even SB_RDONLY / MNT_READONLY flags) in + * mnt_is_readonly() and bail in case we are racing with remount + * read-only. */ smp_rmb(); if (mnt_is_readonly(m)) { @@ -588,10 +597,8 @@ int sb_prepare_remount_readonly(struct super_block *sb) if (!err && atomic_long_read(&sb->s_remove_count)) err = -EBUSY; - if (!err) { - sb->s_readonly_remount = 1; - smp_wmb(); - } + if (!err) + sb_start_ro_state_change(sb); list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) { if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD) mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; diff --git a/fs/super.c b/fs/super.c index 5bf056087acc..5ee5da1fd498 100644 --- a/fs/super.c +++ b/fs/super.c @@ -944,8 +944,7 @@ int reconfigure_super(struct fs_context *fc) */ if (remount_ro) { if (force) { - sb->s_readonly_remount = 1; - smp_wmb(); + sb_start_ro_state_change(sb); } else { retval = sb_prepare_remount_readonly(sb); if (retval) @@ -953,12 +952,10 @@ int reconfigure_super(struct fs_context *fc) } } else if (remount_rw) { /* - * We set s_readonly_remount here to protect filesystem's - * reconfigure code from writes from userspace until - * reconfigure finishes. + * Protect filesystem's reconfigure code from writes from + * userspace until reconfigure finishes. */ - sb->s_readonly_remount = 1; - smp_wmb(); + sb_start_ro_state_change(sb); } if (fc->ops->reconfigure) { @@ -974,9 +971,7 @@ int reconfigure_super(struct fs_context *fc) WRITE_ONCE(sb->s_flags, ((sb->s_flags & ~fc->sb_flags_mask) | (fc->sb_flags & fc->sb_flags_mask))); - /* Needs to be ordered wrt mnt_is_readonly() */ - smp_wmb(); - sb->s_readonly_remount = 0; + sb_end_ro_state_change(sb); /* * Some filesystems modify their metadata via some other path than the @@ -991,7 +986,7 @@ int reconfigure_super(struct fs_context *fc) return 0; cancel_readonly: - sb->s_readonly_remount = 0; + sb_end_ro_state_change(sb); return retval; } diff --git a/include/linux/fs.h b/include/linux/fs.h index 40bef9bf8749..4caac7fdc5d3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1248,7 +1248,7 @@ struct super_block { */ atomic_long_t s_fsnotify_connectors; - /* Being remounted read-only */ + /* Read-only state of the superblock is being changed */ int s_readonly_remount; /* per-sb errseq_t for reporting writeback errors via syncfs */ -- cgit v1.2.3