From 982c3b3058433f20aba9fb032599cee5dfc17328 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 24 Oct 2023 15:01:08 +0200 Subject: bdev: rename freeze and thaw helpers We have bdev_mark_dead() etc and we're going to move block device freezing to holder ops in the next patch. Make the naming consistent: * freeze_bdev() -> bdev_freeze() * thaw_bdev() -> bdev_thaw() Also document the return code. Link: https://lore.kernel.org/r/20231024-vfs-super-freeze-v2-2-599c19f4faac@kernel.org Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- block/bdev.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'block') diff --git a/block/bdev.c b/block/bdev.c index e4cfb7adb645..a890da1b3adf 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -207,18 +207,20 @@ int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend) EXPORT_SYMBOL(sync_blockdev_range); /** - * freeze_bdev - lock a filesystem and force it into a consistent state + * bdev_freeze - lock a filesystem and force it into a consistent state * @bdev: blockdevice to lock * * If a superblock is found on this device, we take the s_umount semaphore * on it to make sure nobody unmounts until the snapshot creation is done. * The reference counter (bd_fsfreeze_count) guarantees that only the last * unfreeze process can unfreeze the frozen filesystem actually when multiple - * freeze requests arrive simultaneously. It counts up in freeze_bdev() and - * count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze + * freeze requests arrive simultaneously. It counts up in bdev_freeze() and + * count down in bdev_thaw(). When it becomes 0, thaw_bdev() will unfreeze * actually. + * + * Return: On success zero is returned, negative error code on failure. */ -int freeze_bdev(struct block_device *bdev) +int bdev_freeze(struct block_device *bdev) { struct super_block *sb; int error = 0; @@ -248,15 +250,17 @@ done: mutex_unlock(&bdev->bd_fsfreeze_mutex); return error; } -EXPORT_SYMBOL(freeze_bdev); +EXPORT_SYMBOL(bdev_freeze); /** - * thaw_bdev - unlock filesystem + * bdev_thaw - unlock filesystem * @bdev: blockdevice to unlock * - * Unlocks the filesystem and marks it writeable again after freeze_bdev(). + * Unlocks the filesystem and marks it writeable again after bdev_freeze(). + * + * Return: On success zero is returned, negative error code on failure. */ -int thaw_bdev(struct block_device *bdev) +int bdev_thaw(struct block_device *bdev) { struct super_block *sb; int error = -EINVAL; @@ -285,7 +289,7 @@ out: mutex_unlock(&bdev->bd_fsfreeze_mutex); return error; } -EXPORT_SYMBOL(thaw_bdev); +EXPORT_SYMBOL(bdev_thaw); /* * pseudo-fs -- cgit v1.2.3 From fbcb8f39e96d59f4ed48cbdfaa65211fa867985c Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 24 Oct 2023 15:01:09 +0200 Subject: bdev: surface the error from sync_blockdev() When freeze_super() is called, sync_filesystem() will be called which calls sync_blockdev() and already surfaces any errors. Do the same for block devices that aren't owned by a superblock and also for filesystems that don't call sync_blockdev() internally but implicitly rely on bdev_freeze() to do it. Link: https://lore.kernel.org/r/20231024-vfs-super-freeze-v2-3-599c19f4faac@kernel.org Reviewed-by: Darrick J. Wong Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- block/bdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block') diff --git a/block/bdev.c b/block/bdev.c index a890da1b3adf..f27f1a7db2bd 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -245,7 +245,7 @@ int bdev_freeze(struct block_device *bdev) bdev->bd_fsfreeze_sb = sb; sync: - sync_blockdev(bdev); + error = sync_blockdev(bdev); done: mutex_unlock(&bdev->bd_fsfreeze_mutex); return error; -- cgit v1.2.3 From 49ef8832fb1a9e0da0020eb17480fd286433bc13 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 27 Sep 2023 15:21:16 +0200 Subject: bdev: implement freeze and thaw holder operations The old method of implementing block device freeze and thaw operations required us to rely on get_active_super() to walk the list of all superblocks on the system to find any superblock that might use the block device. This is wasteful and not very pleasant overall. Now that we can finally go straight from block device to owning superblock things become way simpler. Link: https://lore.kernel.org/r/20231024-vfs-super-freeze-v2-5-599c19f4faac@kernel.org Reviewed-by: Darrick J. Wong Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- block/bdev.c | 65 +++++++++++++++---------------- fs/super.c | 99 +++++++++++++++++++++++++++++++++++++---------- include/linux/blk_types.h | 2 +- 3 files changed, 112 insertions(+), 54 deletions(-) (limited to 'block') diff --git a/block/bdev.c b/block/bdev.c index f27f1a7db2bd..53a2c653a259 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -222,31 +222,27 @@ EXPORT_SYMBOL(sync_blockdev_range); */ int bdev_freeze(struct block_device *bdev) { - struct super_block *sb; int error = 0; mutex_lock(&bdev->bd_fsfreeze_mutex); - if (++bdev->bd_fsfreeze_count > 1) - goto done; - - sb = get_active_super(bdev); - if (!sb) - goto sync; - if (sb->s_op->freeze_super) - error = sb->s_op->freeze_super(sb, FREEZE_HOLDER_USERSPACE); - else - error = freeze_super(sb, FREEZE_HOLDER_USERSPACE); - deactivate_super(sb); - if (error) { - bdev->bd_fsfreeze_count--; - goto done; + if (atomic_inc_return(&bdev->bd_fsfreeze_count) > 1) { + mutex_unlock(&bdev->bd_fsfreeze_mutex); + return 0; + } + + mutex_lock(&bdev->bd_holder_lock); + if (bdev->bd_holder_ops && bdev->bd_holder_ops->freeze) { + error = bdev->bd_holder_ops->freeze(bdev); + lockdep_assert_not_held(&bdev->bd_holder_lock); + } else { + mutex_unlock(&bdev->bd_holder_lock); + error = sync_blockdev(bdev); } - bdev->bd_fsfreeze_sb = sb; -sync: - error = sync_blockdev(bdev); -done: + if (error) + atomic_dec(&bdev->bd_fsfreeze_count); + mutex_unlock(&bdev->bd_fsfreeze_mutex); return error; } @@ -262,29 +258,32 @@ EXPORT_SYMBOL(bdev_freeze); */ int bdev_thaw(struct block_device *bdev) { - struct super_block *sb; - int error = -EINVAL; + int error = -EINVAL, nr_freeze; mutex_lock(&bdev->bd_fsfreeze_mutex); - if (!bdev->bd_fsfreeze_count) + + /* + * If this returns < 0 it means that @bd_fsfreeze_count was + * already 0 and no decrement was performed. + */ + nr_freeze = atomic_dec_if_positive(&bdev->bd_fsfreeze_count); + if (nr_freeze < 0) goto out; error = 0; - if (--bdev->bd_fsfreeze_count > 0) + if (nr_freeze > 0) goto out; - sb = bdev->bd_fsfreeze_sb; - if (!sb) - goto out; + mutex_lock(&bdev->bd_holder_lock); + if (bdev->bd_holder_ops && bdev->bd_holder_ops->thaw) { + error = bdev->bd_holder_ops->thaw(bdev); + lockdep_assert_not_held(&bdev->bd_holder_lock); + } else { + mutex_unlock(&bdev->bd_holder_lock); + } - if (sb->s_op->thaw_super) - error = sb->s_op->thaw_super(sb, FREEZE_HOLDER_USERSPACE); - else - error = thaw_super(sb, FREEZE_HOLDER_USERSPACE); if (error) - bdev->bd_fsfreeze_count++; - else - bdev->bd_fsfreeze_sb = NULL; + atomic_inc(&bdev->bd_fsfreeze_count); out: mutex_unlock(&bdev->bd_fsfreeze_mutex); return error; diff --git a/fs/super.c b/fs/super.c index 8333aaa878cf..8c943ab1f424 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1440,7 +1440,7 @@ EXPORT_SYMBOL(sget_dev); * * The function must be called with bdev->bd_holder_lock and releases it. */ -static struct super_block *bdev_super_lock_shared(struct block_device *bdev) +static struct super_block *bdev_super_lock(struct block_device *bdev, bool excl) __releases(&bdev->bd_holder_lock) { struct super_block *sb = bdev->bd_holder; @@ -1454,18 +1454,25 @@ static struct super_block *bdev_super_lock_shared(struct block_device *bdev) spin_lock(&sb_lock); sb->s_count++; spin_unlock(&sb_lock); + mutex_unlock(&bdev->bd_holder_lock); - locked = super_lock_shared(sb); - if (!locked || !sb->s_root || !(sb->s_flags & SB_ACTIVE)) { - put_super(sb); - return NULL; - } + locked = super_lock(sb, excl); + /* - * The superblock is active and we hold s_umount, we can drop our - * temporary reference now. - */ + * If the superblock wasn't already SB_DYING then we hold + * s_umount and can safely drop our temporary reference. + */ put_super(sb); + + if (!locked) + return NULL; + + if (!sb->s_root || !(sb->s_flags & SB_ACTIVE)) { + super_unlock(sb, excl); + return NULL; + } + return sb; } @@ -1473,7 +1480,7 @@ static void fs_bdev_mark_dead(struct block_device *bdev, bool surprise) { struct super_block *sb; - sb = bdev_super_lock_shared(bdev); + sb = bdev_super_lock(bdev, false); if (!sb) return; @@ -1491,16 +1498,74 @@ static void fs_bdev_sync(struct block_device *bdev) { struct super_block *sb; - sb = bdev_super_lock_shared(bdev); + sb = bdev_super_lock(bdev, false); if (!sb) return; + sync_filesystem(sb); super_unlock_shared(sb); } +static struct super_block *get_bdev_super(struct block_device *bdev) +{ + bool active = false; + struct super_block *sb; + + sb = bdev_super_lock(bdev, true); + if (sb) { + active = atomic_inc_not_zero(&sb->s_active); + super_unlock_excl(sb); + } + if (!active) + return NULL; + return sb; +} + +static int fs_bdev_freeze(struct block_device *bdev) +{ + struct super_block *sb; + int error = 0; + + lockdep_assert_held(&bdev->bd_fsfreeze_mutex); + + sb = get_bdev_super(bdev); + if (!sb) + return -EINVAL; + + if (sb->s_op->freeze_super) + error = sb->s_op->freeze_super(sb, FREEZE_HOLDER_USERSPACE); + else + error = freeze_super(sb, FREEZE_HOLDER_USERSPACE); + if (!error) + error = sync_blockdev(bdev); + deactivate_super(sb); + return error; +} + +static int fs_bdev_thaw(struct block_device *bdev) +{ + struct super_block *sb; + int error; + + lockdep_assert_held(&bdev->bd_fsfreeze_mutex); + + sb = get_bdev_super(bdev); + if (WARN_ON_ONCE(!sb)) + return -EINVAL; + + if (sb->s_op->thaw_super) + error = sb->s_op->thaw_super(sb, FREEZE_HOLDER_USERSPACE); + else + error = thaw_super(sb, FREEZE_HOLDER_USERSPACE); + deactivate_super(sb); + return error; +} + const struct blk_holder_ops fs_holder_ops = { .mark_dead = fs_bdev_mark_dead, .sync = fs_bdev_sync, + .freeze = fs_bdev_freeze, + .thaw = fs_bdev_thaw, }; EXPORT_SYMBOL_GPL(fs_holder_ops); @@ -1530,15 +1595,10 @@ int setup_bdev_super(struct super_block *sb, int sb_flags, } /* - * Until SB_BORN flag is set, there can be no active superblock - * references and thus no filesystem freezing. get_active_super() will - * just loop waiting for SB_BORN so even bdev_freeze() cannot proceed. - * - * It is enough to check bdev was not frozen before we set s_bdev. + * It is enough to check bdev was not frozen before we set + * s_bdev as freezing will wait until SB_BORN is set. */ - mutex_lock(&bdev->bd_fsfreeze_mutex); - if (bdev->bd_fsfreeze_count > 0) { - mutex_unlock(&bdev->bd_fsfreeze_mutex); + if (atomic_read(&bdev->bd_fsfreeze_count) > 0) { if (fc) warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev); bdev_release(bdev_handle); @@ -1551,7 +1611,6 @@ int setup_bdev_super(struct super_block *sb, int sb_flags, if (bdev_stable_writes(bdev)) sb->s_iflags |= SB_I_STABLE_WRITES; spin_unlock(&sb_lock); - mutex_unlock(&bdev->bd_fsfreeze_mutex); snprintf(sb->s_id, sizeof(sb->s_id), "%pg", bdev); shrinker_debugfs_rename(sb->s_shrink, "sb-%s:%s", sb->s_type->name, diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index d5c5e59ddbd2..88e1848b0869 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -57,7 +57,7 @@ struct block_device { const struct blk_holder_ops *bd_holder_ops; struct mutex bd_holder_lock; /* The counter of freeze processes */ - int bd_fsfreeze_count; + atomic_t bd_fsfreeze_count; int bd_holders; struct kobject *bd_holder_dir; -- cgit v1.2.3 From cd34758c5238ae6976b10fe15bba7031b409c969 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 1 Nov 2023 18:43:07 +0100 Subject: block: Remove blkdev_get_by_*() functions blkdev_get_by_*() and blkdev_put() functions are now unused. Remove them. Acked-by: Christoph Hellwig Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20231101174325.10596-2-jack@suse.cz Reviewed-by: Christian Brauner Reviewed-by: Jens Axboe Signed-off-by: Christian Brauner --- block/bdev.c | 94 ++++++++++++++++---------------------------------- include/linux/blkdev.h | 5 --- 2 files changed, 30 insertions(+), 69 deletions(-) (limited to 'block') diff --git a/block/bdev.c b/block/bdev.c index 53a2c653a259..0daf14e50dd0 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -732,7 +732,7 @@ void blkdev_put_no_open(struct block_device *bdev) } /** - * blkdev_get_by_dev - open a block device by device number + * bdev_open_by_dev - open a block device by device number * @dev: device number of block device to open * @mode: open mode (BLK_OPEN_*) * @holder: exclusive holder identifier @@ -744,32 +744,40 @@ void blkdev_put_no_open(struct block_device *bdev) * * Use this interface ONLY if you really do not have anything better - i.e. when * you are behind a truly sucky interface and all you are given is a device - * number. Everything else should use blkdev_get_by_path(). + * number. Everything else should use bdev_open_by_path(). * * CONTEXT: * Might sleep. * * RETURNS: - * Reference to the block_device on success, ERR_PTR(-errno) on failure. + * Handle with a reference to the block_device on success, ERR_PTR(-errno) on + * failure. */ -struct block_device *blkdev_get_by_dev(dev_t dev, blk_mode_t mode, void *holder, - const struct blk_holder_ops *hops) +struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, + const struct blk_holder_ops *hops) { - bool unblock_events = true; + struct bdev_handle *handle = kmalloc(sizeof(struct bdev_handle), + GFP_KERNEL); struct block_device *bdev; + bool unblock_events = true; struct gendisk *disk; int ret; + if (!handle) + return ERR_PTR(-ENOMEM); + ret = devcgroup_check_permission(DEVCG_DEV_BLOCK, MAJOR(dev), MINOR(dev), ((mode & BLK_OPEN_READ) ? DEVCG_ACC_READ : 0) | ((mode & BLK_OPEN_WRITE) ? DEVCG_ACC_WRITE : 0)); if (ret) - return ERR_PTR(ret); + goto free_handle; bdev = blkdev_get_no_open(dev); - if (!bdev) - return ERR_PTR(-ENXIO); + if (!bdev) { + ret = -ENXIO; + goto free_handle; + } disk = bdev->bd_disk; if (holder) { @@ -818,7 +826,10 @@ struct block_device *blkdev_get_by_dev(dev_t dev, blk_mode_t mode, void *holder, if (unblock_events) disk_unblock_events(disk); - return bdev; + handle->bdev = bdev; + handle->holder = holder; + handle->mode = mode; + return handle; put_module: module_put(disk->fops->owner); abort_claiming: @@ -828,34 +839,14 @@ abort_claiming: disk_unblock_events(disk); put_blkdev: blkdev_put_no_open(bdev); +free_handle: + kfree(handle); return ERR_PTR(ret); } -EXPORT_SYMBOL(blkdev_get_by_dev); - -struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, - const struct blk_holder_ops *hops) -{ - struct bdev_handle *handle = kmalloc(sizeof(*handle), GFP_KERNEL); - struct block_device *bdev; - - if (!handle) - return ERR_PTR(-ENOMEM); - bdev = blkdev_get_by_dev(dev, mode, holder, hops); - if (IS_ERR(bdev)) { - kfree(handle); - return ERR_CAST(bdev); - } - handle->bdev = bdev; - handle->holder = holder; - if (holder) - mode |= BLK_OPEN_EXCL; - handle->mode = mode; - return handle; -} EXPORT_SYMBOL(bdev_open_by_dev); /** - * blkdev_get_by_path - open a block device by name + * bdev_open_by_path - open a block device by name * @path: path to the block device to open * @mode: open mode (BLK_OPEN_*) * @holder: exclusive holder identifier @@ -869,29 +860,9 @@ EXPORT_SYMBOL(bdev_open_by_dev); * Might sleep. * * RETURNS: - * Reference to the block_device on success, ERR_PTR(-errno) on failure. + * Handle with a reference to the block_device on success, ERR_PTR(-errno) on + * failure. */ -struct block_device *blkdev_get_by_path(const char *path, blk_mode_t mode, - void *holder, const struct blk_holder_ops *hops) -{ - struct block_device *bdev; - dev_t dev; - int error; - - error = lookup_bdev(path, &dev); - if (error) - return ERR_PTR(error); - - bdev = blkdev_get_by_dev(dev, mode, holder, hops); - if (!IS_ERR(bdev) && (mode & BLK_OPEN_WRITE) && bdev_read_only(bdev)) { - blkdev_put(bdev, holder); - return ERR_PTR(-EACCES); - } - - return bdev; -} -EXPORT_SYMBOL(blkdev_get_by_path); - struct bdev_handle *bdev_open_by_path(const char *path, blk_mode_t mode, void *holder, const struct blk_holder_ops *hops) { @@ -914,8 +885,9 @@ struct bdev_handle *bdev_open_by_path(const char *path, blk_mode_t mode, } EXPORT_SYMBOL(bdev_open_by_path); -void blkdev_put(struct block_device *bdev, void *holder) +void bdev_release(struct bdev_handle *handle) { + struct block_device *bdev = handle->bdev; struct gendisk *disk = bdev->bd_disk; /* @@ -929,8 +901,8 @@ void blkdev_put(struct block_device *bdev, void *holder) sync_blockdev(bdev); mutex_lock(&disk->open_mutex); - if (holder) - bd_end_claim(bdev, holder); + if (handle->holder) + bd_end_claim(bdev, handle->holder); /* * Trigger event checking and tell drivers to flush MEDIA_CHANGE @@ -947,12 +919,6 @@ void blkdev_put(struct block_device *bdev, void *holder) module_put(disk->fops->owner); blkdev_put_no_open(bdev); -} -EXPORT_SYMBOL(blkdev_put); - -void bdev_release(struct bdev_handle *handle) -{ - blkdev_put(handle->bdev, handle->holder); kfree(handle); } EXPORT_SYMBOL(bdev_release); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index abf71cce785c..7afc10315dd5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1500,10 +1500,6 @@ struct bdev_handle { blk_mode_t mode; }; -struct block_device *blkdev_get_by_dev(dev_t dev, blk_mode_t mode, void *holder, - const struct blk_holder_ops *hops); -struct block_device *blkdev_get_by_path(const char *path, blk_mode_t mode, - void *holder, const struct blk_holder_ops *hops); struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, const struct blk_holder_ops *hops); struct bdev_handle *bdev_open_by_path(const char *path, blk_mode_t mode, @@ -1511,7 +1507,6 @@ struct bdev_handle *bdev_open_by_path(const char *path, blk_mode_t mode, int bd_prepare_to_claim(struct block_device *bdev, void *holder, const struct blk_holder_ops *hops); void bd_abort_claiming(struct block_device *bdev, void *holder); -void blkdev_put(struct block_device *bdev, void *holder); void bdev_release(struct bdev_handle *handle); /* just for blk-cgroup, don't use elsewhere */ -- cgit v1.2.3 From ed5cc702d311c14b653323d76062b0294effa66e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 1 Nov 2023 18:43:08 +0100 Subject: block: Add config option to not allow writing to mounted devices Writing to mounted devices is dangerous and can lead to filesystem corruption as well as crashes. Furthermore syzbot comes with more and more involved examples how to corrupt block device under a mounted filesystem leading to kernel crashes and reports we can do nothing about. Add tracking of writers to each block device and a kernel cmdline argument which controls whether other writeable opens to block devices open with BLK_OPEN_RESTRICT_WRITES flag are allowed. We will make filesystems use this flag for used devices. Note that this effectively only prevents modification of the particular block device's page cache by other writers. The actual device content can still be modified by other means - e.g. by issuing direct scsi commands, by doing writes through devices lower in the storage stack (e.g. in case loop devices, DM, or MD are involved) etc. But blocking direct modifications of the block device page cache is enough to give filesystems a chance to perform data validation when loading data from the underlying storage and thus prevent kernel crashes. Syzbot can use this cmdline argument option to avoid uninteresting crashes. Also users whose userspace setup does not need writing to mounted block devices can set this option for hardening. Link: https://lore.kernel.org/all/60788e5d-5c7c-1142-e554-c21d709acfd9@linaro.org Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20231101174325.10596-3-jack@suse.cz Reviewed-by: Jens Axboe Signed-off-by: Christian Brauner --- block/Kconfig | 20 +++++++++++++ block/bdev.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++- include/linux/blk_types.h | 1 + include/linux/blkdev.h | 2 ++ 4 files changed, 97 insertions(+), 1 deletion(-) (limited to 'block') diff --git a/block/Kconfig b/block/Kconfig index 55ae2286a4de..1de4682d48cc 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -78,6 +78,26 @@ config BLK_DEV_INTEGRITY_T10 select CRC_T10DIF select CRC64_ROCKSOFT +config BLK_DEV_WRITE_MOUNTED + bool "Allow writing to mounted block devices" + default y + help + When a block device is mounted, writing to its buffer cache is very + likely going to cause filesystem corruption. It is also rather easy to + crash the kernel in this way since the filesystem has no practical way + of detecting these writes to buffer cache and verifying its metadata + integrity. However there are some setups that need this capability + like running fsck on read-only mounted root device, modifying some + features on mounted ext4 filesystem, and similar. If you say N, the + kernel will prevent processes from writing to block devices that are + mounted by filesystems which provides some more protection from runaway + privileged processes and generally makes it much harder to crash + filesystem drivers. Note however that this does not prevent + underlying device(s) from being modified by other means, e.g. by + directly submitting SCSI commands or through access to lower layers of + storage stack. If in doubt, say Y. The configuration can be overridden + with the bdev_allow_write_mounted boot option. + config BLK_DEV_ZONED bool "Zoned block device support" select MQ_IOSCHED_DEADLINE diff --git a/block/bdev.c b/block/bdev.c index 0daf14e50dd0..fc8d28d77495 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -30,6 +30,9 @@ #include "../fs/internal.h" #include "blk.h" +/* Should we allow writing to mounted block devices? */ +static bool bdev_allow_write_mounted = IS_ENABLED(CONFIG_BLK_DEV_WRITE_MOUNTED); + struct bdev_inode { struct block_device bdev; struct inode vfs_inode; @@ -730,7 +733,58 @@ void blkdev_put_no_open(struct block_device *bdev) { put_device(&bdev->bd_device); } - + +static bool bdev_writes_blocked(struct block_device *bdev) +{ + return bdev->bd_writers == -1; +} + +static void bdev_block_writes(struct block_device *bdev) +{ + bdev->bd_writers = -1; +} + +static void bdev_unblock_writes(struct block_device *bdev) +{ + bdev->bd_writers = 0; +} + +static bool bdev_may_open(struct block_device *bdev, blk_mode_t mode) +{ + if (bdev_allow_write_mounted) + return true; + /* Writes blocked? */ + if (mode & BLK_OPEN_WRITE && bdev_writes_blocked(bdev)) + return false; + if (mode & BLK_OPEN_RESTRICT_WRITES && bdev->bd_writers > 0) + return false; + return true; +} + +static void bdev_claim_write_access(struct block_device *bdev, blk_mode_t mode) +{ + if (bdev_allow_write_mounted) + return; + + /* Claim exclusive or shared write access. */ + if (mode & BLK_OPEN_RESTRICT_WRITES) + bdev_block_writes(bdev); + else if (mode & BLK_OPEN_WRITE) + bdev->bd_writers++; +} + +static void bdev_yield_write_access(struct block_device *bdev, blk_mode_t mode) +{ + if (bdev_allow_write_mounted) + return; + + /* Yield exclusive or shared write access. */ + if (mode & BLK_OPEN_RESTRICT_WRITES) + bdev_unblock_writes(bdev); + else if (mode & BLK_OPEN_WRITE) + bdev->bd_writers--; +} + /** * bdev_open_by_dev - open a block device by device number * @dev: device number of block device to open @@ -773,6 +827,10 @@ struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, if (ret) goto free_handle; + /* Blocking writes requires exclusive opener */ + if (mode & BLK_OPEN_RESTRICT_WRITES && !holder) + return ERR_PTR(-EINVAL); + bdev = blkdev_get_no_open(dev); if (!bdev) { ret = -ENXIO; @@ -800,12 +858,16 @@ struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, goto abort_claiming; if (!try_module_get(disk->fops->owner)) goto abort_claiming; + ret = -EBUSY; + if (!bdev_may_open(bdev, mode)) + goto abort_claiming; if (bdev_is_partition(bdev)) ret = blkdev_get_part(bdev, mode); else ret = blkdev_get_whole(bdev, mode); if (ret) goto put_module; + bdev_claim_write_access(bdev, mode); if (holder) { bd_finish_claiming(bdev, holder, hops); @@ -901,6 +963,8 @@ void bdev_release(struct bdev_handle *handle) sync_blockdev(bdev); mutex_lock(&disk->open_mutex); + bdev_yield_write_access(bdev, handle->mode); + if (handle->holder) bd_end_claim(bdev, handle->holder); @@ -1069,3 +1133,12 @@ void bdev_statx_dioalign(struct inode *inode, struct kstat *stat) blkdev_put_no_open(bdev); } + +static int __init setup_bdev_allow_write_mounted(char *str) +{ + if (kstrtobool(str, &bdev_allow_write_mounted)) + pr_warn("Invalid option string for bdev_allow_write_mounted:" + " '%s'\n", str); + return 1; +} +__setup("bdev_allow_write_mounted=", setup_bdev_allow_write_mounted); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 749203277fee..52e264d5a830 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -66,6 +66,7 @@ struct block_device { #ifdef CONFIG_FAIL_MAKE_REQUEST bool bd_make_it_fail; #endif + int bd_writers; /* * keep this out-of-line as it's both big and not needed in the fast * path diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7afc10315dd5..0e0c0186aa32 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -124,6 +124,8 @@ typedef unsigned int __bitwise blk_mode_t; #define BLK_OPEN_NDELAY ((__force blk_mode_t)(1 << 3)) /* open for "writes" only for ioctls (specialy hack for floppy.c) */ #define BLK_OPEN_WRITE_IOCTL ((__force blk_mode_t)(1 << 4)) +/* open is exclusive wrt all other BLK_OPEN_WRITE opens to the device */ +#define BLK_OPEN_RESTRICT_WRITES ((__force blk_mode_t)(1 << 5)) struct gendisk { /* -- cgit v1.2.3 From 8ff363ade395e72dc639810b6f59849c743c363e Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 24 Dec 2023 18:36:42 +0100 Subject: block: Fix a memory leak in bdev_open_by_dev() If we early exit here, 'handle' needs to be freed, or some memory leaks. Fixes: ed5cc702d311 ("block: Add config option to not allow writing to mounted devices") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/8eaec334781e695810aaa383b55de00ca4ab1352.1703439383.git.christophe.jaillet@wanadoo.fr Reviewed-by: Christoph Hellwig Reviewed-by: Jens Axboe Signed-off-by: Christian Brauner --- block/bdev.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/bdev.c b/block/bdev.c index fc8d28d77495..de76320ef9df 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -828,8 +828,10 @@ struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, goto free_handle; /* Blocking writes requires exclusive opener */ - if (mode & BLK_OPEN_RESTRICT_WRITES && !holder) - return ERR_PTR(-EINVAL); + if (mode & BLK_OPEN_RESTRICT_WRITES && !holder) { + ret = -EINVAL; + goto free_handle; + } bdev = blkdev_get_no_open(dev); if (!bdev) { -- cgit v1.2.3