From f3a608827d1f8de0dd12813e8d9c6803fe64e119 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 8 Feb 2024 18:47:35 +0100 Subject: bdev: open block device as files Add two new helpers to allow opening block devices as files. This is not the final infrastructure. This still opens the block device before opening a struct a file. Until we have removed all references to struct bdev_handle we can't switch the order: * Introduce blk_to_file_flags() to translate from block specific to flags usable to pen a new file. * Introduce bdev_file_open_by_{dev,path}(). * Introduce temporary sb_bdev_handle() helper to retrieve a struct bdev_handle from a block device file and update places that directly reference struct bdev_handle to rely on it. * Don't count block device openes against the number of open files. A bdev_file_open_by_{dev,path}() file is never installed into any file descriptor table. One idea that came to mind was to use kernel_tmpfile_open() which would require us to pass a path and it would then call do_dentry_open() going through the regular fops->open::blkdev_open() path. But then we're back to the problem of routing block specific flags such as BLK_OPEN_RESTRICT_WRITES through the open path and would have to waste FMODE_* flags every time we add a new one. With this we can avoid using a flag bit and we have more leeway in how we open block devices from bdev_open_by_{dev,path}(). Link: https://lore.kernel.org/r/20240123-vfs-bdev-file-v2-1-adbd023e19cc@kernel.org Signed-off-by: Christian Brauner --- block/bdev.c | 101 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 97 insertions(+), 4 deletions(-) (limited to 'block') diff --git a/block/bdev.c b/block/bdev.c index e9f1b12bd75c..e1149652c532 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -49,6 +49,13 @@ struct block_device *I_BDEV(struct inode *inode) } EXPORT_SYMBOL(I_BDEV); +struct block_device *file_bdev(struct file *bdev_file) +{ + struct bdev_handle *handle = bdev_file->private_data; + return handle->bdev; +} +EXPORT_SYMBOL(file_bdev); + static void bdev_write_inode(struct block_device *bdev) { struct inode *inode = bdev->bd_inode; @@ -368,12 +375,12 @@ static struct file_system_type bd_type = { }; struct super_block *blockdev_superblock __ro_after_init; +struct vfsmount *blockdev_mnt __ro_after_init; EXPORT_SYMBOL_GPL(blockdev_superblock); void __init bdev_cache_init(void) { int err; - static struct vfsmount *bd_mnt __ro_after_init; bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode), 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| @@ -382,10 +389,10 @@ void __init bdev_cache_init(void) err = register_filesystem(&bd_type); if (err) panic("Cannot register bdev pseudo-fs"); - bd_mnt = kern_mount(&bd_type); - if (IS_ERR(bd_mnt)) + blockdev_mnt = kern_mount(&bd_type); + if (IS_ERR(blockdev_mnt)) panic("Cannot create bdev pseudo-fs"); - blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */ + blockdev_superblock = blockdev_mnt->mnt_sb; /* For writeback */ } struct block_device *bdev_alloc(struct gendisk *disk, u8 partno) @@ -911,6 +918,92 @@ free_handle: } EXPORT_SYMBOL(bdev_open_by_dev); +/* + * If BLK_OPEN_WRITE_IOCTL is set then this is a historical quirk + * associated with the floppy driver where it has allowed ioctls if the + * file was opened for writing, but does not allow reads or writes. + * Make sure that this quirk is reflected in @f_flags. + * + * It can also happen if a block device is opened as O_RDWR | O_WRONLY. + */ +static unsigned blk_to_file_flags(blk_mode_t mode) +{ + unsigned int flags = 0; + + if ((mode & (BLK_OPEN_READ | BLK_OPEN_WRITE)) == + (BLK_OPEN_READ | BLK_OPEN_WRITE)) + flags |= O_RDWR; + else if (mode & BLK_OPEN_WRITE_IOCTL) + flags |= O_RDWR | O_WRONLY; + else if (mode & BLK_OPEN_WRITE) + flags |= O_WRONLY; + else if (mode & BLK_OPEN_READ) + flags |= O_RDONLY; /* homeopathic, because O_RDONLY is 0 */ + else + WARN_ON_ONCE(true); + + if (mode & BLK_OPEN_NDELAY) + flags |= O_NDELAY; + + return flags; +} + +struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, + const struct blk_holder_ops *hops) +{ + struct file *bdev_file; + struct bdev_handle *handle; + unsigned int flags; + + handle = bdev_open_by_dev(dev, mode, holder, hops); + if (IS_ERR(handle)) + return ERR_CAST(handle); + + flags = blk_to_file_flags(mode); + bdev_file = alloc_file_pseudo_noaccount(handle->bdev->bd_inode, + blockdev_mnt, "", flags | O_LARGEFILE, &def_blk_fops); + if (IS_ERR(bdev_file)) { + bdev_release(handle); + return bdev_file; + } + ihold(handle->bdev->bd_inode); + + bdev_file->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT; + if (bdev_nowait(handle->bdev)) + bdev_file->f_mode |= FMODE_NOWAIT; + + bdev_file->f_mapping = handle->bdev->bd_inode->i_mapping; + bdev_file->f_wb_err = filemap_sample_wb_err(bdev_file->f_mapping); + bdev_file->private_data = handle; + return bdev_file; +} +EXPORT_SYMBOL(bdev_file_open_by_dev); + +struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode, + void *holder, + const struct blk_holder_ops *hops) +{ + struct file *bdev_file; + dev_t dev; + int error; + + error = lookup_bdev(path, &dev); + if (error) + return ERR_PTR(error); + + bdev_file = bdev_file_open_by_dev(dev, mode, holder, hops); + if (!IS_ERR(bdev_file) && (mode & BLK_OPEN_WRITE)) { + struct bdev_handle *handle = bdev_file->private_data; + if (bdev_read_only(handle->bdev)) { + fput(bdev_file); + bdev_file = ERR_PTR(-EACCES); + } + } + + return bdev_file; +} +EXPORT_SYMBOL(bdev_file_open_by_path); + /** * bdev_open_by_path - open a block device by name * @path: path to the block device to open -- cgit v1.2.3 From e5ca9d391615269b05a6ed871fec66d9db650520 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 23 Jan 2024 14:26:19 +0100 Subject: block/ioctl: port blkdev_bszset() to file Link: https://lore.kernel.org/r/20240123-vfs-bdev-file-v2-2-adbd023e19cc@kernel.org Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- block/ioctl.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'block') diff --git a/block/ioctl.c b/block/ioctl.c index 9c73a763ef88..5d0619e02e4c 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -471,7 +471,7 @@ static int blkdev_bszset(struct block_device *bdev, blk_mode_t mode, int __user *argp) { int ret, n; - struct bdev_handle *handle; + struct file *file; if (!capable(CAP_SYS_ADMIN)) return -EACCES; @@ -483,12 +483,11 @@ static int blkdev_bszset(struct block_device *bdev, blk_mode_t mode, if (mode & BLK_OPEN_EXCL) return set_blocksize(bdev, n); - handle = bdev_open_by_dev(bdev->bd_dev, mode, &bdev, NULL); - if (IS_ERR(handle)) + file = bdev_file_open_by_dev(bdev->bd_dev, mode, &bdev, NULL); + if (IS_ERR(file)) return -EBUSY; ret = set_blocksize(bdev, n); - bdev_release(handle); - + fput(file); return ret; } -- cgit v1.2.3 From 190f676afa00d07082db170400aaa2cd4de0933f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 23 Jan 2024 14:26:20 +0100 Subject: block/genhd: port disk_scan_partitions() to file This may run from a kernel thread via device_add_disk(). So this could also use __fput_sync() if we were worried about EBUSY. But when it is called from a kernel thread it's always BLK_OPEN_READ so EBUSY can't really happen even if we do BLK_OPEN_RESTRICT_WRITES or BLK_OPEN_EXCL. Otherwise it's called from an ioctl on the block device which is only called from userspace and can rely on task work. Link: https://lore.kernel.org/r/20240123-vfs-bdev-file-v2-3-adbd023e19cc@kernel.org Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- block/genhd.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'block') diff --git a/block/genhd.c b/block/genhd.c index d74fb5b4ae68..a911d2969c07 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -342,7 +342,7 @@ EXPORT_SYMBOL_GPL(disk_uevent); int disk_scan_partitions(struct gendisk *disk, blk_mode_t mode) { - struct bdev_handle *handle; + struct file *file; int ret = 0; if (disk->flags & (GENHD_FL_NO_PART | GENHD_FL_HIDDEN)) @@ -366,12 +366,12 @@ int disk_scan_partitions(struct gendisk *disk, blk_mode_t mode) } set_bit(GD_NEED_PART_SCAN, &disk->state); - handle = bdev_open_by_dev(disk_devt(disk), mode & ~BLK_OPEN_EXCL, NULL, - NULL); - if (IS_ERR(handle)) - ret = PTR_ERR(handle); + file = bdev_file_open_by_dev(disk_devt(disk), mode & ~BLK_OPEN_EXCL, + NULL, NULL); + if (IS_ERR(file)) + ret = PTR_ERR(file); else - bdev_release(handle); + fput(file); /* * If blkdev_get_by_dev() failed early, GD_NEED_PART_SCAN is still set, -- cgit v1.2.3 From e97d06a46526d9392cbdbd7eda193091e1af2723 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 23 Jan 2024 14:26:44 +0100 Subject: bdev: remove bdev_open_by_path() Link: https://lore.kernel.org/r/20240123-vfs-bdev-file-v2-27-adbd023e19cc@kernel.org Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- block/bdev.c | 40 ---------------------------------------- include/linux/blkdev.h | 2 -- 2 files changed, 42 deletions(-) (limited to 'block') diff --git a/block/bdev.c b/block/bdev.c index e1149652c532..4003f8e1782a 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -1004,46 +1004,6 @@ struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode, } EXPORT_SYMBOL(bdev_file_open_by_path); -/** - * bdev_open_by_path - open a block device by name - * @path: path to the block device to open - * @mode: open mode (BLK_OPEN_*) - * @holder: exclusive holder identifier - * @hops: holder operations - * - * Open the block device described by the device file at @path. If @holder is - * not %NULL, the block device is opened with exclusive access. Exclusive opens - * may nest for the same @holder. - * - * CONTEXT: - * Might sleep. - * - * RETURNS: - * Handle with a reference to the block_device on success, ERR_PTR(-errno) on - * failure. - */ -struct bdev_handle *bdev_open_by_path(const char *path, blk_mode_t mode, - void *holder, const struct blk_holder_ops *hops) -{ - struct bdev_handle *handle; - dev_t dev; - int error; - - error = lookup_bdev(path, &dev); - if (error) - return ERR_PTR(error); - - handle = bdev_open_by_dev(dev, mode, holder, hops); - if (!IS_ERR(handle) && (mode & BLK_OPEN_WRITE) && - bdev_read_only(handle->bdev)) { - bdev_release(handle); - return ERR_PTR(-EACCES); - } - - return handle; -} -EXPORT_SYMBOL(bdev_open_by_path); - void bdev_release(struct bdev_handle *handle) { struct block_device *bdev = handle->bdev; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 76706aa47316..5880d5abfebe 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1484,8 +1484,6 @@ struct bdev_handle { struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, const struct blk_holder_ops *hops); -struct bdev_handle *bdev_open_by_path(const char *path, blk_mode_t mode, - void *holder, const struct blk_holder_ops *hops); struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, const struct blk_holder_ops *hops); struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode, -- cgit v1.2.3 From b1211a25c4fe3443cfef4ed7c39251502a663776 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 23 Jan 2024 14:26:45 +0100 Subject: bdev: make bdev_{release, open_by_dev}() private to block layer Move both of them to the private block header. There's no caller in the tree anymore that uses them directly. Link: https://lore.kernel.org/r/20240123-vfs-bdev-file-v2-28-adbd023e19cc@kernel.org Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- block/bdev.c | 2 -- block/blk.h | 4 ++++ include/linux/blkdev.h | 3 --- 3 files changed, 4 insertions(+), 5 deletions(-) (limited to 'block') diff --git a/block/bdev.c b/block/bdev.c index 4003f8e1782a..e6e46f24a89a 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -916,7 +916,6 @@ free_handle: kfree(handle); return ERR_PTR(ret); } -EXPORT_SYMBOL(bdev_open_by_dev); /* * If BLK_OPEN_WRITE_IOCTL is set then this is a historical quirk @@ -1042,7 +1041,6 @@ void bdev_release(struct bdev_handle *handle) blkdev_put_no_open(bdev); kfree(handle); } -EXPORT_SYMBOL(bdev_release); /** * lookup_bdev() - Look up a struct block_device by name. diff --git a/block/blk.h b/block/blk.h index 1ef920f72e0f..c9630774767d 100644 --- a/block/blk.h +++ b/block/blk.h @@ -516,4 +516,8 @@ static inline int req_ref_read(struct request *req) return atomic_read(&req->ref); } +void bdev_release(struct bdev_handle *handle); +struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, + const struct blk_holder_ops *hops); + #endif /* BLK_INTERNAL_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5880d5abfebe..495f55587207 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1482,8 +1482,6 @@ struct bdev_handle { blk_mode_t mode; }; -struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, - const struct blk_holder_ops *hops); struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, const struct blk_holder_ops *hops); struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode, @@ -1491,7 +1489,6 @@ struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode, int bd_prepare_to_claim(struct block_device *bdev, void *holder, const struct blk_holder_ops *hops); void bd_abort_claiming(struct block_device *bdev, void *holder); -void bdev_release(struct bdev_handle *handle); /* just for blk-cgroup, don't use elsewhere */ struct block_device *blkdev_get_no_open(dev_t dev); -- cgit v1.2.3 From a56aefca8d386181415a1fb7cfec2f72b0404797 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 23 Jan 2024 14:26:46 +0100 Subject: bdev: make struct bdev_handle private to the block layer Link: https://lore.kernel.org/r/20240123-vfs-bdev-file-v2-29-adbd023e19cc@kernel.org Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner --- block/bdev.c | 119 +++++++++++++++++++++++++------------------------ block/blk.h | 12 +++-- block/fops.c | 37 +++++++-------- include/linux/blkdev.h | 7 --- include/linux/fs.h | 6 --- 5 files changed, 86 insertions(+), 95 deletions(-) (limited to 'block') diff --git a/block/bdev.c b/block/bdev.c index e6e46f24a89a..8f33f160e923 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -703,6 +703,24 @@ out_blkdev_put: return ret; } +int bdev_permission(dev_t dev, blk_mode_t mode, void *holder) +{ + int ret; + + ret = devcgroup_check_permission(DEVCG_DEV_BLOCK, + MAJOR(dev), MINOR(dev), + ((mode & BLK_OPEN_READ) ? DEVCG_ACC_READ : 0) | + ((mode & BLK_OPEN_WRITE) ? DEVCG_ACC_WRITE : 0)); + if (ret) + return ret; + + /* Blocking writes requires exclusive opener */ + if (mode & BLK_OPEN_RESTRICT_WRITES && !holder) + return -EINVAL; + + return 0; +} + static void blkdev_put_part(struct block_device *part) { struct block_device *whole = bdev_whole(part); @@ -795,69 +813,43 @@ static void bdev_yield_write_access(struct block_device *bdev, blk_mode_t mode) } /** - * bdev_open_by_dev - open a block device by device number - * @dev: device number of block device to open + * bdev_open - open a block device + * @bdev: block device to open * @mode: open mode (BLK_OPEN_*) * @holder: exclusive holder identifier * @hops: holder operations + * @bdev_file: file for the block device * - * Open the block device described by device number @dev. If @holder is not - * %NULL, the block device is opened with exclusive access. Exclusive opens may - * nest for the same @holder. - * - * Use this interface ONLY if you really do not have anything better - i.e. when - * you are behind a truly sucky interface and all you are given is a device - * number. Everything else should use bdev_open_by_path(). + * Open the block device. If @holder is not %NULL, the block device is opened + * with exclusive access. Exclusive opens may nest for the same @holder. * * CONTEXT: * Might sleep. * * RETURNS: - * Handle with a reference to the block_device on success, ERR_PTR(-errno) on - * failure. + * zero on success, -errno on failure. */ -struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, - const struct blk_holder_ops *hops) +int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder, + const struct blk_holder_ops *hops, struct file *bdev_file) { - struct bdev_handle *handle = kmalloc(sizeof(struct bdev_handle), - GFP_KERNEL); - struct block_device *bdev; + struct bdev_handle *handle; bool unblock_events = true; - struct gendisk *disk; + struct gendisk *disk = bdev->bd_disk; int ret; + handle = kmalloc(sizeof(struct bdev_handle), GFP_KERNEL); if (!handle) - return ERR_PTR(-ENOMEM); - - ret = devcgroup_check_permission(DEVCG_DEV_BLOCK, - MAJOR(dev), MINOR(dev), - ((mode & BLK_OPEN_READ) ? DEVCG_ACC_READ : 0) | - ((mode & BLK_OPEN_WRITE) ? DEVCG_ACC_WRITE : 0)); - if (ret) - goto free_handle; - - /* Blocking writes requires exclusive opener */ - if (mode & BLK_OPEN_RESTRICT_WRITES && !holder) { - ret = -EINVAL; - goto free_handle; - } - - bdev = blkdev_get_no_open(dev); - if (!bdev) { - ret = -ENXIO; - goto free_handle; - } - disk = bdev->bd_disk; + return -ENOMEM; if (holder) { mode |= BLK_OPEN_EXCL; ret = bd_prepare_to_claim(bdev, holder, hops); if (ret) - goto put_blkdev; + goto free_handle; } else { if (WARN_ON_ONCE(mode & BLK_OPEN_EXCL)) { ret = -EIO; - goto put_blkdev; + goto free_handle; } } @@ -902,7 +894,16 @@ struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, handle->bdev = bdev; handle->holder = holder; handle->mode = mode; - return handle; + + bdev_file->f_flags |= O_LARGEFILE; + bdev_file->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT; + if (bdev_nowait(bdev)) + bdev_file->f_mode |= FMODE_NOWAIT; + bdev_file->f_mapping = handle->bdev->bd_inode->i_mapping; + bdev_file->f_wb_err = filemap_sample_wb_err(bdev_file->f_mapping); + bdev_file->private_data = handle; + + return 0; put_module: module_put(disk->fops->owner); abort_claiming: @@ -910,11 +911,9 @@ abort_claiming: bd_abort_claiming(bdev, holder); mutex_unlock(&disk->open_mutex); disk_unblock_events(disk); -put_blkdev: - blkdev_put_no_open(bdev); free_handle: kfree(handle); - return ERR_PTR(ret); + return ret; } /* @@ -951,29 +950,33 @@ struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, const struct blk_holder_ops *hops) { struct file *bdev_file; - struct bdev_handle *handle; + struct block_device *bdev; unsigned int flags; + int ret; - handle = bdev_open_by_dev(dev, mode, holder, hops); - if (IS_ERR(handle)) - return ERR_CAST(handle); + ret = bdev_permission(dev, mode, holder); + if (ret) + return ERR_PTR(ret); + + bdev = blkdev_get_no_open(dev); + if (!bdev) + return ERR_PTR(-ENXIO); flags = blk_to_file_flags(mode); - bdev_file = alloc_file_pseudo_noaccount(handle->bdev->bd_inode, + bdev_file = alloc_file_pseudo_noaccount(bdev->bd_inode, blockdev_mnt, "", flags | O_LARGEFILE, &def_blk_fops); if (IS_ERR(bdev_file)) { - bdev_release(handle); + blkdev_put_no_open(bdev); return bdev_file; } - ihold(handle->bdev->bd_inode); + ihold(bdev->bd_inode); - bdev_file->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT; - if (bdev_nowait(handle->bdev)) - bdev_file->f_mode |= FMODE_NOWAIT; - - bdev_file->f_mapping = handle->bdev->bd_inode->i_mapping; - bdev_file->f_wb_err = filemap_sample_wb_err(bdev_file->f_mapping); - bdev_file->private_data = handle; + ret = bdev_open(bdev, mode, holder, hops, bdev_file); + if (ret) { + blkdev_put_no_open(bdev); + fput(bdev_file); + return ERR_PTR(ret); + } return bdev_file; } EXPORT_SYMBOL(bdev_file_open_by_dev); diff --git a/block/blk.h b/block/blk.h index c9630774767d..19b15870284f 100644 --- a/block/blk.h +++ b/block/blk.h @@ -25,6 +25,12 @@ struct blk_flush_queue { struct request *flush_rq; }; +struct bdev_handle { + struct block_device *bdev; + void *holder; + blk_mode_t mode; +}; + bool is_flush_rq(struct request *req); struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size, @@ -517,7 +523,7 @@ static inline int req_ref_read(struct request *req) } void bdev_release(struct bdev_handle *handle); -struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, - const struct blk_holder_ops *hops); - +int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder, + const struct blk_holder_ops *hops, struct file *bdev_file); +int bdev_permission(dev_t dev, blk_mode_t mode, void *holder); #endif /* BLK_INTERNAL_H */ diff --git a/block/fops.c b/block/fops.c index 0cf8cf72cdfa..a1ba1a50ae77 100644 --- a/block/fops.c +++ b/block/fops.c @@ -599,36 +599,31 @@ blk_mode_t file_to_blk_mode(struct file *file) static int blkdev_open(struct inode *inode, struct file *filp) { - struct bdev_handle *handle; + struct block_device *bdev; blk_mode_t mode; - - /* - * Preserve backwards compatibility and allow large file access - * even if userspace doesn't ask for it explicitly. Some mkfs - * binary needs it. We might want to drop this workaround - * during an unstable branch. - */ - filp->f_flags |= O_LARGEFILE; - filp->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT; + void *holder; + int ret; mode = file_to_blk_mode(filp); - handle = bdev_open_by_dev(inode->i_rdev, mode, - mode & BLK_OPEN_EXCL ? filp : NULL, NULL); - if (IS_ERR(handle)) - return PTR_ERR(handle); + holder = mode & BLK_OPEN_EXCL ? filp : NULL; + ret = bdev_permission(inode->i_rdev, mode, holder); + if (ret) + return ret; - if (bdev_nowait(handle->bdev)) - filp->f_mode |= FMODE_NOWAIT; + bdev = blkdev_get_no_open(inode->i_rdev); + if (!bdev) + return -ENXIO; - filp->f_mapping = handle->bdev->bd_inode->i_mapping; - filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping); - filp->private_data = handle; - return 0; + ret = bdev_open(bdev, mode, holder, NULL, filp); + if (ret) + blkdev_put_no_open(bdev); + return ret; } static int blkdev_release(struct inode *inode, struct file *filp) { - bdev_release(filp->private_data); + if (filp->private_data) + bdev_release(filp->private_data); return 0; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 495f55587207..2f5dbde23094 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1475,13 +1475,6 @@ extern const struct blk_holder_ops fs_holder_ops; (BLK_OPEN_READ | BLK_OPEN_RESTRICT_WRITES | \ (((flags) & SB_RDONLY) ? 0 : BLK_OPEN_WRITE)) -/* @bdev_handle will be removed soon. */ -struct bdev_handle { - struct block_device *bdev; - void *holder; - blk_mode_t mode; -}; - struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, const struct blk_holder_ops *hops); struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode, diff --git a/include/linux/fs.h b/include/linux/fs.h index e9291e27cc47..6e0714d35d9b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1327,12 +1327,6 @@ struct super_block { struct list_head s_inodes_wb; /* writeback inodes */ } __randomize_layout; -/* Temporary helper that will go away. */ -static inline struct bdev_handle *sb_bdev_handle(struct super_block *sb) -{ - return sb->s_bdev_file->private_data; -} - static inline struct user_namespace *i_user_ns(const struct inode *inode) { return inode->i_sb->s_user_ns; -- cgit v1.2.3 From 7c09a4ed6156c6cab6b951e027ca6ea24af454ba Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 23 Jan 2024 14:26:47 +0100 Subject: bdev: remove bdev pointer from struct bdev_handle We can always go directly via: * I_BDEV(bdev_file->f_inode) * I_BDEV(bdev_file->f_mapping->host) So keeping struct bdev in struct bdev_handle is redundant. Link: https://lore.kernel.org/r/20240123-vfs-bdev-file-v2-30-adbd023e19cc@kernel.org Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- block/bdev.c | 26 ++++++++++++-------------- block/blk.h | 3 +-- block/fops.c | 2 +- 3 files changed, 14 insertions(+), 17 deletions(-) (limited to 'block') diff --git a/block/bdev.c b/block/bdev.c index 8f33f160e923..4e4527c5df00 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -51,8 +51,7 @@ EXPORT_SYMBOL(I_BDEV); struct block_device *file_bdev(struct file *bdev_file) { - struct bdev_handle *handle = bdev_file->private_data; - return handle->bdev; + return I_BDEV(bdev_file->f_mapping->host); } EXPORT_SYMBOL(file_bdev); @@ -891,7 +890,6 @@ int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder, if (unblock_events) disk_unblock_events(disk); - handle->bdev = bdev; handle->holder = holder; handle->mode = mode; @@ -899,7 +897,7 @@ int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder, bdev_file->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT; if (bdev_nowait(bdev)) bdev_file->f_mode |= FMODE_NOWAIT; - bdev_file->f_mapping = handle->bdev->bd_inode->i_mapping; + bdev_file->f_mapping = bdev->bd_inode->i_mapping; bdev_file->f_wb_err = filemap_sample_wb_err(bdev_file->f_mapping); bdev_file->private_data = handle; @@ -985,7 +983,7 @@ struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode, void *holder, const struct blk_holder_ops *hops) { - struct file *bdev_file; + struct file *file; dev_t dev; int error; @@ -993,22 +991,22 @@ struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode, if (error) return ERR_PTR(error); - bdev_file = bdev_file_open_by_dev(dev, mode, holder, hops); - if (!IS_ERR(bdev_file) && (mode & BLK_OPEN_WRITE)) { - struct bdev_handle *handle = bdev_file->private_data; - if (bdev_read_only(handle->bdev)) { - fput(bdev_file); - bdev_file = ERR_PTR(-EACCES); + file = bdev_file_open_by_dev(dev, mode, holder, hops); + if (!IS_ERR(file) && (mode & BLK_OPEN_WRITE)) { + if (bdev_read_only(file_bdev(file))) { + fput(file); + file = ERR_PTR(-EACCES); } } - return bdev_file; + return file; } EXPORT_SYMBOL(bdev_file_open_by_path); -void bdev_release(struct bdev_handle *handle) +void bdev_release(struct file *bdev_file) { - struct block_device *bdev = handle->bdev; + struct block_device *bdev = file_bdev(bdev_file); + struct bdev_handle *handle = bdev_file->private_data; struct gendisk *disk = bdev->bd_disk; /* diff --git a/block/blk.h b/block/blk.h index 19b15870284f..7ca24814f3a0 100644 --- a/block/blk.h +++ b/block/blk.h @@ -26,7 +26,6 @@ struct blk_flush_queue { }; struct bdev_handle { - struct block_device *bdev; void *holder; blk_mode_t mode; }; @@ -522,7 +521,7 @@ static inline int req_ref_read(struct request *req) return atomic_read(&req->ref); } -void bdev_release(struct bdev_handle *handle); +void bdev_release(struct file *bdev_file); int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder, const struct blk_holder_ops *hops, struct file *bdev_file); int bdev_permission(dev_t dev, blk_mode_t mode, void *holder); diff --git a/block/fops.c b/block/fops.c index a1ba1a50ae77..aab9b89e4c77 100644 --- a/block/fops.c +++ b/block/fops.c @@ -623,7 +623,7 @@ static int blkdev_open(struct inode *inode, struct file *filp) static int blkdev_release(struct inode *inode, struct file *filp) { if (filp->private_data) - bdev_release(filp->private_data); + bdev_release(filp); return 0; } -- cgit v1.2.3 From 321de651fa565dcf76c017b257bdf15ec7fff45d Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 23 Jan 2024 14:26:48 +0100 Subject: block: don't rely on BLK_OPEN_RESTRICT_WRITES when yielding write access Make it possible to detected a block device that was opened with restricted write access based only on BLK_OPEN_WRITE and bdev->bd_writers < 0 so we won't have to claim another FMODE_* flag. Link: https://lore.kernel.org/r/20240123-vfs-bdev-file-v2-31-adbd023e19cc@kernel.org Signed-off-by: Christian Brauner --- block/bdev.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'block') diff --git a/block/bdev.c b/block/bdev.c index 4e4527c5df00..efecc9b97e1e 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -799,16 +799,21 @@ static void bdev_claim_write_access(struct block_device *bdev, blk_mode_t mode) bdev->bd_writers++; } -static void bdev_yield_write_access(struct block_device *bdev, blk_mode_t mode) +static void bdev_yield_write_access(struct file *bdev_file, blk_mode_t mode) { + struct block_device *bdev; + if (bdev_allow_write_mounted) return; + bdev = file_bdev(bdev_file); /* Yield exclusive or shared write access. */ - if (mode & BLK_OPEN_RESTRICT_WRITES) - bdev_unblock_writes(bdev); - else if (mode & BLK_OPEN_WRITE) - bdev->bd_writers--; + if (mode & BLK_OPEN_WRITE) { + if (bdev_writes_blocked(bdev)) + bdev_unblock_writes(bdev); + else + bdev->bd_writers--; + } } /** @@ -1020,7 +1025,7 @@ void bdev_release(struct file *bdev_file) sync_blockdev(bdev); mutex_lock(&disk->open_mutex); - bdev_yield_write_access(bdev, handle->mode); + bdev_yield_write_access(bdev_file, handle->mode); if (handle->holder) bd_end_claim(bdev, handle->holder); -- cgit v1.2.3 From ab838b3fd9a442a62f36ea7eeb93e77259f787ce Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 23 Jan 2024 14:26:49 +0100 Subject: block: remove bdev_handle completely We just need to use the holder to indicate whether a block device open was exclusive or not. We did use to do that before but had to give that up once we switched to struct bdev_handle. Before struct bdev_handle we only stashed stuff in file->private_data if this was an exclusive open but after struct bdev_handle we always set file->private_data to a struct bdev_handle and so we had to use bdev_handle->mode or bdev_handle->holder. Now that we don't use struct bdev_handle anymore we can revert back to the old behavior. Link: https://lore.kernel.org/r/20240123-vfs-bdev-file-v2-32-adbd023e19cc@kernel.org Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner --- block/bdev.c | 47 ++++++++++++++++++++++++----------------------- block/blk.h | 5 ----- block/fops.c | 21 ++++++++++----------- 3 files changed, 34 insertions(+), 39 deletions(-) (limited to 'block') diff --git a/block/bdev.c b/block/bdev.c index efecc9b97e1e..140093c99bdc 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -717,6 +717,13 @@ int bdev_permission(dev_t dev, blk_mode_t mode, void *holder) if (mode & BLK_OPEN_RESTRICT_WRITES && !holder) return -EINVAL; + /* + * We're using error pointers to indicate to ->release() when we + * failed to open that block device. Also this doesn't make sense. + */ + if (WARN_ON_ONCE(IS_ERR(holder))) + return -EINVAL; + return 0; } @@ -799,7 +806,7 @@ static void bdev_claim_write_access(struct block_device *bdev, blk_mode_t mode) bdev->bd_writers++; } -static void bdev_yield_write_access(struct file *bdev_file, blk_mode_t mode) +static void bdev_yield_write_access(struct file *bdev_file) { struct block_device *bdev; @@ -808,7 +815,7 @@ static void bdev_yield_write_access(struct file *bdev_file, blk_mode_t mode) bdev = file_bdev(bdev_file); /* Yield exclusive or shared write access. */ - if (mode & BLK_OPEN_WRITE) { + if (bdev_file->f_mode & FMODE_WRITE) { if (bdev_writes_blocked(bdev)) bdev_unblock_writes(bdev); else @@ -836,25 +843,18 @@ static void bdev_yield_write_access(struct file *bdev_file, blk_mode_t mode) int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder, const struct blk_holder_ops *hops, struct file *bdev_file) { - struct bdev_handle *handle; bool unblock_events = true; struct gendisk *disk = bdev->bd_disk; int ret; - handle = kmalloc(sizeof(struct bdev_handle), GFP_KERNEL); - if (!handle) - return -ENOMEM; - if (holder) { mode |= BLK_OPEN_EXCL; ret = bd_prepare_to_claim(bdev, holder, hops); if (ret) - goto free_handle; + return ret; } else { - if (WARN_ON_ONCE(mode & BLK_OPEN_EXCL)) { - ret = -EIO; - goto free_handle; - } + if (WARN_ON_ONCE(mode & BLK_OPEN_EXCL)) + return -EIO; } disk_block_events(disk); @@ -895,8 +895,6 @@ int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder, if (unblock_events) disk_unblock_events(disk); - handle->holder = holder; - handle->mode = mode; bdev_file->f_flags |= O_LARGEFILE; bdev_file->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT; @@ -904,7 +902,7 @@ int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder, bdev_file->f_mode |= FMODE_NOWAIT; bdev_file->f_mapping = bdev->bd_inode->i_mapping; bdev_file->f_wb_err = filemap_sample_wb_err(bdev_file->f_mapping); - bdev_file->private_data = handle; + bdev_file->private_data = holder; return 0; put_module: @@ -914,8 +912,6 @@ abort_claiming: bd_abort_claiming(bdev, holder); mutex_unlock(&disk->open_mutex); disk_unblock_events(disk); -free_handle: - kfree(handle); return ret; } @@ -976,7 +972,8 @@ struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, ret = bdev_open(bdev, mode, holder, hops, bdev_file); if (ret) { - blkdev_put_no_open(bdev); + /* We failed to open the block device. Let ->release() know. */ + bdev_file->private_data = ERR_PTR(ret); fput(bdev_file); return ERR_PTR(ret); } @@ -1011,9 +1008,13 @@ EXPORT_SYMBOL(bdev_file_open_by_path); void bdev_release(struct file *bdev_file) { struct block_device *bdev = file_bdev(bdev_file); - struct bdev_handle *handle = bdev_file->private_data; + void *holder = bdev_file->private_data; struct gendisk *disk = bdev->bd_disk; + /* We failed to open that block device. */ + if (IS_ERR(holder)) + goto put_no_open; + /* * Sync early if it looks like we're the last one. If someone else * opens the block device between now and the decrement of bd_openers @@ -1025,10 +1026,10 @@ void bdev_release(struct file *bdev_file) sync_blockdev(bdev); mutex_lock(&disk->open_mutex); - bdev_yield_write_access(bdev_file, handle->mode); + bdev_yield_write_access(bdev_file); - if (handle->holder) - bd_end_claim(bdev, handle->holder); + if (holder) + bd_end_claim(bdev, holder); /* * Trigger event checking and tell drivers to flush MEDIA_CHANGE @@ -1044,8 +1045,8 @@ void bdev_release(struct file *bdev_file) mutex_unlock(&disk->open_mutex); module_put(disk->fops->owner); +put_no_open: blkdev_put_no_open(bdev); - kfree(handle); } /** diff --git a/block/blk.h b/block/blk.h index 7ca24814f3a0..f02b25f22e8b 100644 --- a/block/blk.h +++ b/block/blk.h @@ -25,11 +25,6 @@ struct blk_flush_queue { struct request *flush_rq; }; -struct bdev_handle { - void *holder; - blk_mode_t mode; -}; - bool is_flush_rq(struct request *req); struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size, diff --git a/block/fops.c b/block/fops.c index aab9b89e4c77..029e787f0119 100644 --- a/block/fops.c +++ b/block/fops.c @@ -569,18 +569,17 @@ static int blkdev_fsync(struct file *filp, loff_t start, loff_t end, blk_mode_t file_to_blk_mode(struct file *file) { blk_mode_t mode = 0; - struct bdev_handle *handle = file->private_data; if (file->f_mode & FMODE_READ) mode |= BLK_OPEN_READ; if (file->f_mode & FMODE_WRITE) mode |= BLK_OPEN_WRITE; /* - * do_dentry_open() clears O_EXCL from f_flags, use handle->mode to - * determine whether the open was exclusive for already open files. + * do_dentry_open() clears O_EXCL from f_flags, use file->private_data + * to determine whether the open was exclusive for already open files. */ - if (handle) - mode |= handle->mode & BLK_OPEN_EXCL; + if (file->private_data) + mode |= BLK_OPEN_EXCL; else if (file->f_flags & O_EXCL) mode |= BLK_OPEN_EXCL; if (file->f_flags & O_NDELAY) @@ -601,12 +600,13 @@ static int blkdev_open(struct inode *inode, struct file *filp) { struct block_device *bdev; blk_mode_t mode; - void *holder; int ret; mode = file_to_blk_mode(filp); - holder = mode & BLK_OPEN_EXCL ? filp : NULL; - ret = bdev_permission(inode->i_rdev, mode, holder); + /* Use the file as the holder. */ + if (mode & BLK_OPEN_EXCL) + filp->private_data = filp; + ret = bdev_permission(inode->i_rdev, mode, filp->private_data); if (ret) return ret; @@ -614,7 +614,7 @@ static int blkdev_open(struct inode *inode, struct file *filp) if (!bdev) return -ENXIO; - ret = bdev_open(bdev, mode, holder, NULL, filp); + ret = bdev_open(bdev, mode, filp->private_data, NULL, filp); if (ret) blkdev_put_no_open(bdev); return ret; @@ -622,8 +622,7 @@ static int blkdev_open(struct inode *inode, struct file *filp) static int blkdev_release(struct inode *inode, struct file *filp) { - if (filp->private_data) - bdev_release(filp); + bdev_release(filp); return 0; } -- cgit v1.2.3