summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2020-11-23 15:38:40 +0300
committerJens Axboe <axboe@kernel.dk>2020-12-02 00:53:39 +0300
commit4e7b5671c6a883d94b5428e1a9c141bbd56cb2a6 (patch)
tree862980514dedf92e87aa3ec651f5be306f207390 /fs
parent7918f0f6fdafa1e52c2d77c537cb55ef25fb69a3 (diff)
downloadlinux-4e7b5671c6a883d94b5428e1a9c141bbd56cb2a6.tar.xz
block: remove i_bdev
Switch the block device lookup interfaces to directly work with a dev_t so that struct block_device references are only acquired by the blkdev_get variants (and the blk-cgroup special case). This means that we now don't need an extra reference in the inode and can generally simplify handling of struct block_device to keep the lookups contained in the core block layer code. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Jan Kara <jack@suse.cz> Reviewed-by: Hannes Reinecke <hare@suse.de> Acked-by: Tejun Heo <tj@kernel.org> Acked-by: Coly Li <colyli@suse.de> [bcache] Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'fs')
-rw-r--r--fs/block_dev.c196
-rw-r--r--fs/btrfs/volumes.c13
-rw-r--r--fs/inode.c3
-rw-r--r--fs/internal.h7
-rw-r--r--fs/io_uring.c10
-rw-r--r--fs/pipe.c5
-rw-r--r--fs/quota/quota.c19
-rw-r--r--fs/statfs.c2
-rw-r--r--fs/super.c44
9 files changed, 91 insertions, 208 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 2b8c0586314f..6d6e4d50834c 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -883,7 +883,6 @@ static struct block_device *bdget(dev_t dev)
bdev->bd_dev = dev;
inode->i_mode = S_IFBLK;
inode->i_rdev = dev;
- inode->i_bdev = bdev;
inode->i_data.a_ops = &def_blk_aops;
mapping_set_gfp_mask(&inode->i_data, GFP_USER);
unlock_new_inode(inode);
@@ -928,67 +927,8 @@ void bdput(struct block_device *bdev)
{
iput(bdev->bd_inode);
}
-
EXPORT_SYMBOL(bdput);
-static struct block_device *bd_acquire(struct inode *inode)
-{
- struct block_device *bdev;
-
- spin_lock(&bdev_lock);
- bdev = inode->i_bdev;
- if (bdev && !inode_unhashed(bdev->bd_inode)) {
- bdgrab(bdev);
- spin_unlock(&bdev_lock);
- return bdev;
- }
- spin_unlock(&bdev_lock);
-
- /*
- * i_bdev references block device inode that was already shut down
- * (corresponding device got removed). Remove the reference and look
- * up block device inode again just in case new device got
- * reestablished under the same device number.
- */
- if (bdev)
- bd_forget(inode);
-
- bdev = bdget(inode->i_rdev);
- if (bdev) {
- spin_lock(&bdev_lock);
- if (!inode->i_bdev) {
- /*
- * We take an additional reference to bd_inode,
- * and it's released in clear_inode() of inode.
- * So, we can access it via ->i_mapping always
- * without igrab().
- */
- bdgrab(bdev);
- inode->i_bdev = bdev;
- inode->i_mapping = bdev->bd_inode->i_mapping;
- }
- spin_unlock(&bdev_lock);
- }
- return bdev;
-}
-
-/* Call when you free inode */
-
-void bd_forget(struct inode *inode)
-{
- struct block_device *bdev = NULL;
-
- spin_lock(&bdev_lock);
- if (!sb_is_blkdev_sb(inode->i_sb))
- bdev = inode->i_bdev;
- inode->i_bdev = NULL;
- inode->i_mapping = &inode->i_data;
- spin_unlock(&bdev_lock);
-
- if (bdev)
- bdput(bdev);
-}
-
/**
* bd_may_claim - test whether a block device can be claimed
* @bdev: block device of interest
@@ -1497,38 +1437,45 @@ static int __blkdev_get(struct block_device *bdev, struct gendisk *disk,
}
/**
- * blkdev_get - open a block device
- * @bdev: block_device to open
+ * blkdev_get_by_dev - open a block device by device number
+ * @dev: device number of block device to open
* @mode: FMODE_* mask
* @holder: exclusive holder identifier
*
- * Open @bdev with @mode. If @mode includes %FMODE_EXCL, @bdev is
- * open with exclusive access. Specifying %FMODE_EXCL with %NULL
- * @holder is invalid. Exclusive opens may nest for the same @holder.
+ * Open the block device described by device number @dev. If @mode includes
+ * %FMODE_EXCL, the block device is opened with exclusive access. Specifying
+ * %FMODE_EXCL with a %NULL @holder is invalid. Exclusive opens may nest for
+ * the same @holder.
*
- * On success, the reference count of @bdev is unchanged. On failure,
- * @bdev is put.
+ * Use this interface ONLY if you really do not have anything better - i.e. when
+ * you are behind a truly sucky interface and all you are given is a device
+ * number. Everything else should use blkdev_get_by_path().
*
* CONTEXT:
* Might sleep.
*
* RETURNS:
- * 0 on success, -errno on failure.
+ * Reference to the block_device on success, ERR_PTR(-errno) on failure.
*/
-static int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
+struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
{
struct block_device *claiming;
bool unblock_events = true;
+ struct block_device *bdev;
struct gendisk *disk;
int partno;
int ret;
ret = devcgroup_check_permission(DEVCG_DEV_BLOCK,
- imajor(bdev->bd_inode), iminor(bdev->bd_inode),
+ MAJOR(dev), MINOR(dev),
((mode & FMODE_READ) ? DEVCG_ACC_READ : 0) |
((mode & FMODE_WRITE) ? DEVCG_ACC_WRITE : 0));
if (ret)
- goto bdput;
+ return ERR_PTR(ret);
+
+ bdev = bdget(dev);
+ if (!bdev)
+ return ERR_PTR(-ENOMEM);
/*
* If we lost a race with 'disk' being deleted, try again. See md.c.
@@ -1589,10 +1536,13 @@ put_disk:
if (ret == -ERESTARTSYS)
goto retry;
bdput:
- if (ret)
+ if (ret) {
bdput(bdev);
- return ret;
+ return ERR_PTR(ret);
+ }
+ return bdev;
}
+EXPORT_SYMBOL(blkdev_get_by_dev);
/**
* blkdev_get_by_path - open a block device by name
@@ -1600,32 +1550,30 @@ bdput:
* @mode: FMODE_* mask
* @holder: exclusive holder identifier
*
- * Open the blockdevice described by the device file at @path. @mode
- * and @holder are identical to blkdev_get().
- *
- * On success, the returned block_device has reference count of one.
+ * Open the block device described by the device file at @path. If @mode
+ * includes %FMODE_EXCL, the block device is opened with exclusive access.
+ * Specifying %FMODE_EXCL with a %NULL @holder is invalid. Exclusive opens may
+ * nest for the same @holder.
*
* CONTEXT:
* Might sleep.
*
* RETURNS:
- * Pointer to block_device on success, ERR_PTR(-errno) on failure.
+ * Reference to the block_device on success, ERR_PTR(-errno) on failure.
*/
struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
void *holder)
{
struct block_device *bdev;
- int err;
-
- bdev = lookup_bdev(path);
- if (IS_ERR(bdev))
- return bdev;
+ dev_t dev;
+ int error;
- err = blkdev_get(bdev, mode, holder);
- if (err)
- return ERR_PTR(err);
+ error = lookup_bdev(path, &dev);
+ if (error)
+ return ERR_PTR(error);
- if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) {
+ bdev = blkdev_get_by_dev(dev, mode, holder);
+ if (!IS_ERR(bdev) && (mode & FMODE_WRITE) && bdev_read_only(bdev)) {
blkdev_put(bdev, mode);
return ERR_PTR(-EACCES);
}
@@ -1634,45 +1582,6 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
}
EXPORT_SYMBOL(blkdev_get_by_path);
-/**
- * blkdev_get_by_dev - open a block device by device number
- * @dev: device number of block device to open
- * @mode: FMODE_* mask
- * @holder: exclusive holder identifier
- *
- * Open the blockdevice described by device number @dev. @mode and
- * @holder are identical to blkdev_get().
- *
- * Use it ONLY if you really do not have anything better - i.e. when
- * you are behind a truly sucky interface and all you are given is a
- * device number. _Never_ to be used for internal purposes. If you
- * ever need it - reconsider your API.
- *
- * On success, the returned block_device has reference count of one.
- *
- * CONTEXT:
- * Might sleep.
- *
- * RETURNS:
- * Pointer to block_device on success, ERR_PTR(-errno) on failure.
- */
-struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
-{
- struct block_device *bdev;
- int err;
-
- bdev = bdget(dev);
- if (!bdev)
- return ERR_PTR(-ENOMEM);
-
- err = blkdev_get(bdev, mode, holder);
- if (err)
- return ERR_PTR(err);
-
- return bdev;
-}
-EXPORT_SYMBOL(blkdev_get_by_dev);
-
static int blkdev_open(struct inode * inode, struct file * filp)
{
struct block_device *bdev;
@@ -1694,14 +1603,12 @@ static int blkdev_open(struct inode * inode, struct file * filp)
if ((filp->f_flags & O_ACCMODE) == 3)
filp->f_mode |= FMODE_WRITE_IOCTL;
- bdev = bd_acquire(inode);
- if (bdev == NULL)
- return -ENOMEM;
-
+ bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode, filp);
+ if (IS_ERR(bdev))
+ return PTR_ERR(bdev);
filp->f_mapping = bdev->bd_inode->i_mapping;
filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
-
- return blkdev_get(bdev, filp->f_mode, filp);
+ return 0;
}
static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
@@ -2010,37 +1917,32 @@ const struct file_operations def_blk_fops = {
* namespace if possible and return it. Return ERR_PTR(error)
* otherwise.
*/
-struct block_device *lookup_bdev(const char *pathname)
+int lookup_bdev(const char *pathname, dev_t *dev)
{
- struct block_device *bdev;
struct inode *inode;
struct path path;
int error;
if (!pathname || !*pathname)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
error = kern_path(pathname, LOOKUP_FOLLOW, &path);
if (error)
- return ERR_PTR(error);
+ return error;
inode = d_backing_inode(path.dentry);
error = -ENOTBLK;
if (!S_ISBLK(inode->i_mode))
- goto fail;
+ goto out_path_put;
error = -EACCES;
if (!may_open_dev(&path))
- goto fail;
- error = -ENOMEM;
- bdev = bd_acquire(inode);
- if (!bdev)
- goto fail;
-out:
+ goto out_path_put;
+
+ *dev = inode->i_rdev;
+ error = 0;
+out_path_put:
path_put(&path);
- return bdev;
-fail:
- bdev = ERR_PTR(error);
- goto out;
+ return error;
}
EXPORT_SYMBOL(lookup_bdev);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index a6406b3b8c2b..fbc4b58228f7 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -929,16 +929,16 @@ static noinline struct btrfs_device *device_list_add(const char *path,
* make sure it's the same device if the device is mounted
*/
if (device->bdev) {
- struct block_device *path_bdev;
+ int error;
+ dev_t path_dev;
- path_bdev = lookup_bdev(path);
- if (IS_ERR(path_bdev)) {
+ error = lookup_bdev(path, &path_dev);
+ if (error) {
mutex_unlock(&fs_devices->device_list_mutex);
- return ERR_CAST(path_bdev);
+ return ERR_PTR(error);
}
- if (device->bdev != path_bdev) {
- bdput(path_bdev);
+ if (device->bdev->bd_dev != path_dev) {
mutex_unlock(&fs_devices->device_list_mutex);
btrfs_warn_in_rcu(device->fs_info,
"duplicate device %s devid %llu generation %llu scanned by %s (%d)",
@@ -947,7 +947,6 @@ static noinline struct btrfs_device *device_list_add(const char *path,
task_pid_nr(current));
return ERR_PTR(-EEXIST);
}
- bdput(path_bdev);
btrfs_info_in_rcu(device->fs_info,
"devid %llu device path %s changed to %s scanned by %s (%d)",
devid, rcu_str_deref(device->name),
diff --git a/fs/inode.c b/fs/inode.c
index 9d78c37b00b8..cb008acf0efd 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -155,7 +155,6 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
inode->i_bytes = 0;
inode->i_generation = 0;
inode->i_pipe = NULL;
- inode->i_bdev = NULL;
inode->i_cdev = NULL;
inode->i_link = NULL;
inode->i_dir_seq = 0;
@@ -580,8 +579,6 @@ static void evict(struct inode *inode)
truncate_inode_pages_final(&inode->i_data);
clear_inode(inode);
}
- if (S_ISBLK(inode->i_mode) && inode->i_bdev)
- bd_forget(inode);
if (S_ISCHR(inode->i_mode) && inode->i_cdev)
cd_forget(inode);
diff --git a/fs/internal.h b/fs/internal.h
index 47be21dfeebe..53f890446e75 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -25,7 +25,6 @@ extern void __init bdev_cache_init(void);
extern int __sync_blockdev(struct block_device *bdev, int wait);
void iterate_bdevs(void (*)(struct block_device *, void *), void *);
void emergency_thaw_bdev(struct super_block *sb);
-void bd_forget(struct inode *inode);
#else
static inline void bdev_cache_init(void)
{
@@ -43,9 +42,6 @@ static inline int emergency_thaw_bdev(struct super_block *sb)
{
return 0;
}
-static inline void bd_forget(struct inode *inode)
-{
-}
#endif /* CONFIG_BLOCK */
/*
@@ -114,8 +110,7 @@ extern struct file *alloc_empty_file_noaccount(int, const struct cred *);
*/
extern int reconfigure_super(struct fs_context *);
extern bool trylock_super(struct super_block *sb);
-struct super_block *__get_super(struct block_device *bdev, bool excl);
-extern struct super_block *user_get_super(dev_t);
+struct super_block *user_get_super(dev_t, bool excl);
void put_super(struct super_block *sb);
extern bool mount_capable(struct fs_context *);
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 4ead291b2976..8f13c0417f94 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2716,11 +2716,7 @@ static struct file *__io_file_get(struct io_submit_state *state, int fd)
static bool io_bdev_nowait(struct block_device *bdev)
{
-#ifdef CONFIG_BLOCK
return !bdev || blk_queue_nowait(bdev_get_queue(bdev));
-#else
- return true;
-#endif
}
/*
@@ -2733,14 +2729,16 @@ static bool io_file_supports_async(struct file *file, int rw)
umode_t mode = file_inode(file)->i_mode;
if (S_ISBLK(mode)) {
- if (io_bdev_nowait(file->f_inode->i_bdev))
+ if (IS_ENABLED(CONFIG_BLOCK) &&
+ io_bdev_nowait(I_BDEV(file->f_mapping->host)))
return true;
return false;
}
if (S_ISCHR(mode) || S_ISSOCK(mode))
return true;
if (S_ISREG(mode)) {
- if (io_bdev_nowait(file->f_inode->i_sb->s_bdev) &&
+ if (IS_ENABLED(CONFIG_BLOCK) &&
+ io_bdev_nowait(file->f_inode->i_sb->s_bdev) &&
file->f_op != &io_uring_fops)
return true;
return false;
diff --git a/fs/pipe.c b/fs/pipe.c
index 0ac197658a2d..c5989cfd564d 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1342,9 +1342,8 @@ out_revert_acct:
}
/*
- * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
- * location, so checking ->i_pipe is not enough to verify that this is a
- * pipe.
+ * Note that i_pipe and i_cdev share the same location, so checking ->i_pipe is
+ * not enough to verify that this is a pipe.
*/
struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice)
{
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index f3d32b0d9008..6d16b2be5ac4 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -866,17 +866,18 @@ static bool quotactl_cmd_onoff(int cmd)
static struct super_block *quotactl_block(const char __user *special, int cmd)
{
#ifdef CONFIG_BLOCK
- struct block_device *bdev;
struct super_block *sb;
struct filename *tmp = getname(special);
bool excl = false, thawed = false;
+ int error;
+ dev_t dev;
if (IS_ERR(tmp))
return ERR_CAST(tmp);
- bdev = lookup_bdev(tmp->name);
+ error = lookup_bdev(tmp->name, &dev);
putname(tmp);
- if (IS_ERR(bdev))
- return ERR_CAST(bdev);
+ if (error)
+ return ERR_PTR(error);
if (quotactl_cmd_onoff(cmd)) {
excl = true;
@@ -886,8 +887,10 @@ static struct super_block *quotactl_block(const char __user *special, int cmd)
}
retry:
- sb = __get_super(bdev, excl);
- if (thawed && sb && sb->s_writers.frozen != SB_UNFROZEN) {
+ sb = user_get_super(dev, excl);
+ if (!sb)
+ return ERR_PTR(-ENODEV);
+ if (thawed && sb->s_writers.frozen != SB_UNFROZEN) {
if (excl)
up_write(&sb->s_umount);
else
@@ -897,10 +900,6 @@ retry:
put_super(sb);
goto retry;
}
-
- bdput(bdev);
- if (!sb)
- return ERR_PTR(-ENODEV);
return sb;
#else
diff --git a/fs/statfs.c b/fs/statfs.c
index 59f33752c131..68cb07788750 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -235,7 +235,7 @@ SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user
static int vfs_ustat(dev_t dev, struct kstatfs *sbuf)
{
- struct super_block *s = user_get_super(dev);
+ struct super_block *s = user_get_super(dev, false);
int err;
if (!s)
return -EINVAL;
diff --git a/fs/super.c b/fs/super.c
index 343e5c1e538d..2c6cdea2ab2d 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -740,7 +740,14 @@ void iterate_supers_type(struct file_system_type *type,
EXPORT_SYMBOL(iterate_supers_type);
-struct super_block *__get_super(struct block_device *bdev, bool excl)
+/**
+ * get_super - get the superblock of a device
+ * @bdev: device to get the superblock for
+ *
+ * Scans the superblock list and finds the superblock of the file system
+ * mounted on the device given. %NULL is returned if no match is found.
+ */
+struct super_block *get_super(struct block_device *bdev)
{
struct super_block *sb;
@@ -755,17 +762,11 @@ rescan:
if (sb->s_bdev == bdev) {
sb->s_count++;
spin_unlock(&sb_lock);
- if (!excl)
- down_read(&sb->s_umount);
- else
- down_write(&sb->s_umount);
+ down_read(&sb->s_umount);
/* still alive? */
if (sb->s_root && (sb->s_flags & SB_BORN))
return sb;
- if (!excl)
- up_read(&sb->s_umount);
- else
- up_write(&sb->s_umount);
+ up_read(&sb->s_umount);
/* nope, got unmounted */
spin_lock(&sb_lock);
__put_super(sb);
@@ -777,19 +778,6 @@ rescan:
}
/**
- * get_super - get the superblock of a device
- * @bdev: device to get the superblock for
- *
- * Scans the superblock list and finds the superblock of the file system
- * mounted on the device given. %NULL is returned if no match is found.
- */
-struct super_block *get_super(struct block_device *bdev)
-{
- return __get_super(bdev, false);
-}
-EXPORT_SYMBOL(get_super);
-
-/**
* get_active_super - get an active reference to the superblock of a device
* @bdev: device to get the superblock for
*
@@ -820,7 +808,7 @@ restart:
return NULL;
}
-struct super_block *user_get_super(dev_t dev)
+struct super_block *user_get_super(dev_t dev, bool excl)
{
struct super_block *sb;
@@ -832,11 +820,17 @@ rescan:
if (sb->s_dev == dev) {
sb->s_count++;
spin_unlock(&sb_lock);
- down_read(&sb->s_umount);
+ if (excl)
+ down_write(&sb->s_umount);
+ else
+ down_read(&sb->s_umount);
/* still alive? */
if (sb->s_root && (sb->s_flags & SB_BORN))
return sb;
- up_read(&sb->s_umount);
+ if (excl)
+ up_write(&sb->s_umount);
+ else
+ up_read(&sb->s_umount);
/* nope, got unmounted */
spin_lock(&sb_lock);
__put_super(sb);