diff options
Diffstat (limited to 'drivers/block/loop.c')
-rw-r--r-- | drivers/block/loop.c | 178 |
1 files changed, 97 insertions, 81 deletions
diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 8f6761c27c68..1ec7417c7f00 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -68,7 +68,6 @@ struct loop_device { struct list_head idle_worker_list; struct rb_root worker_tree; struct timer_list timer; - bool use_dio; bool sysfs_inited; struct request_queue *lo_queue; @@ -182,41 +181,44 @@ static bool lo_bdev_can_use_dio(struct loop_device *lo, return true; } -static void __loop_update_dio(struct loop_device *lo, bool dio) +static bool lo_can_use_dio(struct loop_device *lo) { - struct file *file = lo->lo_backing_file; - struct inode *inode = file->f_mapping->host; - struct block_device *backing_bdev = NULL; - bool use_dio; + struct inode *inode = lo->lo_backing_file->f_mapping->host; - if (S_ISBLK(inode->i_mode)) - backing_bdev = I_BDEV(inode); - else if (inode->i_sb->s_bdev) - backing_bdev = inode->i_sb->s_bdev; + if (!(lo->lo_backing_file->f_mode & FMODE_CAN_ODIRECT)) + return false; - use_dio = dio && (file->f_mode & FMODE_CAN_ODIRECT) && - (!backing_bdev || lo_bdev_can_use_dio(lo, backing_bdev)); + if (S_ISBLK(inode->i_mode)) + return lo_bdev_can_use_dio(lo, I_BDEV(inode)); + if (inode->i_sb->s_bdev) + return lo_bdev_can_use_dio(lo, inode->i_sb->s_bdev); + return true; +} - if (lo->use_dio == use_dio) - return; +/* + * Direct I/O can be enabled either by using an O_DIRECT file descriptor, or by + * passing in the LO_FLAGS_DIRECT_IO flag from userspace. It will be silently + * disabled when the device block size is too small or the offset is unaligned. + * + * loop_get_status will always report the effective LO_FLAGS_DIRECT_IO flag and + * not the originally passed in one. + */ +static inline void loop_update_dio(struct loop_device *lo) +{ + bool dio_in_use = lo->lo_flags & LO_FLAGS_DIRECT_IO; - /* flush dirty pages before changing direct IO */ - vfs_fsync(file, 0); + lockdep_assert_held(&lo->lo_mutex); + WARN_ON_ONCE(lo->lo_state == Lo_bound && + lo->lo_queue->mq_freeze_depth == 0); - /* - * The flag of LO_FLAGS_DIRECT_IO is handled similarly with - * LO_FLAGS_READ_ONLY, both are set from kernel, and losetup - * will get updated by ioctl(LOOP_GET_STATUS) - */ - if (lo->lo_state == Lo_bound) - blk_mq_freeze_queue(lo->lo_queue); - lo->use_dio = use_dio; - if (use_dio) + if (lo->lo_backing_file->f_flags & O_DIRECT) lo->lo_flags |= LO_FLAGS_DIRECT_IO; - else + if ((lo->lo_flags & LO_FLAGS_DIRECT_IO) && !lo_can_use_dio(lo)) lo->lo_flags &= ~LO_FLAGS_DIRECT_IO; - if (lo->lo_state == Lo_bound) - blk_mq_unfreeze_queue(lo->lo_queue); + + /* flush dirty pages before starting to issue direct I/O */ + if ((lo->lo_flags & LO_FLAGS_DIRECT_IO) && !dio_in_use) + vfs_fsync(lo->lo_backing_file, 0); } /** @@ -311,6 +313,13 @@ static void loop_clear_limits(struct loop_device *lo, int mode) lim.discard_granularity = 0; } + /* + * XXX: this updates the queue limits without freezing the queue, which + * is against the locking protocol and dangerous. But we can't just + * freeze the queue as we're inside the ->queue_rq method here. So this + * should move out into a workqueue unless we get the file operations to + * advertise if they support specific fallocate operations. + */ queue_limits_commit_update(lo->lo_queue, &lim); } @@ -520,12 +529,6 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq) } } -static inline void loop_update_dio(struct loop_device *lo) -{ - __loop_update_dio(lo, (lo->lo_backing_file->f_flags & O_DIRECT) | - lo->use_dio); -} - static void loop_reread_partitions(struct loop_device *lo) { int rc; @@ -964,7 +967,6 @@ loop_set_status_from_info(struct loop_device *lo, memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); lo->lo_file_name[LO_NAME_SIZE-1] = 0; - lo->lo_flags = info->lo_flags; return 0; } @@ -977,12 +979,12 @@ static unsigned int loop_default_blocksize(struct loop_device *lo, return SECTOR_SIZE; } -static int loop_reconfigure_limits(struct loop_device *lo, unsigned int bsize) +static void loop_update_limits(struct loop_device *lo, struct queue_limits *lim, + unsigned int bsize) { struct file *file = lo->lo_backing_file; struct inode *inode = file->f_mapping->host; struct block_device *backing_bdev = NULL; - struct queue_limits lim; u32 granularity = 0, max_discard_sectors = 0; if (S_ISBLK(inode->i_mode)) @@ -995,22 +997,20 @@ static int loop_reconfigure_limits(struct loop_device *lo, unsigned int bsize) loop_get_discard_config(lo, &granularity, &max_discard_sectors); - lim = queue_limits_start_update(lo->lo_queue); - lim.logical_block_size = bsize; - lim.physical_block_size = bsize; - lim.io_min = bsize; - lim.features &= ~(BLK_FEAT_WRITE_CACHE | BLK_FEAT_ROTATIONAL); + lim->logical_block_size = bsize; + lim->physical_block_size = bsize; + lim->io_min = bsize; + lim->features &= ~(BLK_FEAT_WRITE_CACHE | BLK_FEAT_ROTATIONAL); if (file->f_op->fsync && !(lo->lo_flags & LO_FLAGS_READ_ONLY)) - lim.features |= BLK_FEAT_WRITE_CACHE; + lim->features |= BLK_FEAT_WRITE_CACHE; if (backing_bdev && !bdev_nonrot(backing_bdev)) - lim.features |= BLK_FEAT_ROTATIONAL; - lim.max_hw_discard_sectors = max_discard_sectors; - lim.max_write_zeroes_sectors = max_discard_sectors; + lim->features |= BLK_FEAT_ROTATIONAL; + lim->max_hw_discard_sectors = max_discard_sectors; + lim->max_write_zeroes_sectors = max_discard_sectors; if (max_discard_sectors) - lim.discard_granularity = granularity; + lim->discard_granularity = granularity; else - lim.discard_granularity = 0; - return queue_limits_commit_update(lo->lo_queue, &lim); + lim->discard_granularity = 0; } static int loop_configure(struct loop_device *lo, blk_mode_t mode, @@ -1019,6 +1019,7 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode, { struct file *file = fget(config->fd); struct address_space *mapping; + struct queue_limits lim; int error; loff_t size; bool partscan; @@ -1063,6 +1064,7 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode, error = loop_set_status_from_info(lo, &config->info); if (error) goto out_unlock; + lo->lo_flags = config->info.lo_flags; if (!(file->f_mode & FMODE_WRITE) || !(mode & BLK_OPEN_WRITE) || !file->f_op->write_iter) @@ -1084,13 +1086,15 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode, disk_force_media_change(lo->lo_disk); set_disk_ro(lo->lo_disk, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0); - lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO; lo->lo_device = bdev; lo->lo_backing_file = file; lo->old_gfp_mask = mapping_gfp_mask(mapping); mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); - error = loop_reconfigure_limits(lo, config->block_size); + lim = queue_limits_start_update(lo->lo_queue); + loop_update_limits(lo, &lim, config->block_size); + /* No need to freeze the queue as the device isn't bound yet. */ + error = queue_limits_commit_update(lo->lo_queue, &lim); if (error) goto out_unlock; @@ -1150,7 +1154,12 @@ static void __loop_clr_fd(struct loop_device *lo) lo->lo_sizelimit = 0; memset(lo->lo_file_name, 0, LO_NAME_SIZE); - /* reset the block size to the default */ + /* + * Reset the block size to the default. + * + * No queue freezing needed because this is called from the final + * ->release call only, so there can't be any outstanding I/O. + */ lim = queue_limits_start_update(lo->lo_queue); lim.logical_block_size = SECTOR_SIZE; lim.physical_block_size = SECTOR_SIZE; @@ -1244,7 +1253,6 @@ static int loop_set_status(struct loop_device *lo, const struct loop_info64 *info) { int err; - int prev_lo_flags; bool partscan = false; bool size_changed = false; @@ -1263,21 +1271,19 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) invalidate_bdev(lo->lo_device); } - /* I/O need to be drained during transfer transition */ + /* I/O needs to be drained before changing lo_offset or lo_sizelimit */ blk_mq_freeze_queue(lo->lo_queue); - prev_lo_flags = lo->lo_flags; - err = loop_set_status_from_info(lo, info); if (err) goto out_unfreeze; - /* Mask out flags that can't be set using LOOP_SET_STATUS. */ - lo->lo_flags &= LOOP_SET_STATUS_SETTABLE_FLAGS; - /* For those flags, use the previous values instead */ - lo->lo_flags |= prev_lo_flags & ~LOOP_SET_STATUS_SETTABLE_FLAGS; - /* For flags that can't be cleared, use previous values too */ - lo->lo_flags |= prev_lo_flags & ~LOOP_SET_STATUS_CLEARABLE_FLAGS; + partscan = !(lo->lo_flags & LO_FLAGS_PARTSCAN) && + (info->lo_flags & LO_FLAGS_PARTSCAN); + + lo->lo_flags &= ~(LOOP_SET_STATUS_SETTABLE_FLAGS | + LOOP_SET_STATUS_CLEARABLE_FLAGS); + lo->lo_flags |= (info->lo_flags & LOOP_SET_STATUS_SETTABLE_FLAGS); if (size_changed) { loff_t new_size = get_size(lo->lo_offset, lo->lo_sizelimit, @@ -1285,17 +1291,13 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) loop_set_size(lo, new_size); } - /* update dio if lo_offset or transfer is changed */ - __loop_update_dio(lo, lo->use_dio); + /* update the direct I/O flag if lo_offset changed */ + loop_update_dio(lo); out_unfreeze: blk_mq_unfreeze_queue(lo->lo_queue); - - if (!err && (lo->lo_flags & LO_FLAGS_PARTSCAN) && - !(prev_lo_flags & LO_FLAGS_PARTSCAN)) { + if (partscan) clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state); - partscan = true; - } out_unlock: mutex_unlock(&lo->lo_mutex); if (partscan) @@ -1444,20 +1446,32 @@ static int loop_set_capacity(struct loop_device *lo) static int loop_set_dio(struct loop_device *lo, unsigned long arg) { - int error = -ENXIO; - if (lo->lo_state != Lo_bound) - goto out; + bool use_dio = !!arg; - __loop_update_dio(lo, !!arg); - if (lo->use_dio == !!arg) + if (lo->lo_state != Lo_bound) + return -ENXIO; + if (use_dio == !!(lo->lo_flags & LO_FLAGS_DIRECT_IO)) return 0; - error = -EINVAL; - out: - return error; + + if (use_dio) { + if (!lo_can_use_dio(lo)) + return -EINVAL; + /* flush dirty pages before starting to use direct I/O */ + vfs_fsync(lo->lo_backing_file, 0); + } + + blk_mq_freeze_queue(lo->lo_queue); + if (use_dio) + lo->lo_flags |= LO_FLAGS_DIRECT_IO; + else + lo->lo_flags &= ~LO_FLAGS_DIRECT_IO; + blk_mq_unfreeze_queue(lo->lo_queue); + return 0; } static int loop_set_block_size(struct loop_device *lo, unsigned long arg) { + struct queue_limits lim; int err = 0; if (lo->lo_state != Lo_bound) @@ -1469,8 +1483,11 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg) sync_blockdev(lo->lo_device); invalidate_bdev(lo->lo_device); + lim = queue_limits_start_update(lo->lo_queue); + loop_update_limits(lo, &lim, arg); + blk_mq_freeze_queue(lo->lo_queue); - err = loop_reconfigure_limits(lo, arg); + err = queue_limits_commit_update(lo->lo_queue, &lim); loop_update_dio(lo); blk_mq_unfreeze_queue(lo->lo_queue); @@ -1854,7 +1871,7 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx, cmd->use_aio = false; break; default: - cmd->use_aio = lo->use_dio; + cmd->use_aio = lo->lo_flags & LO_FLAGS_DIRECT_IO; break; } @@ -2023,8 +2040,7 @@ static int loop_add(int i) lo->tag_set.queue_depth = hw_queue_depth; lo->tag_set.numa_node = NUMA_NO_NODE; lo->tag_set.cmd_size = sizeof(struct loop_cmd); - lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_STACKING | - BLK_MQ_F_NO_SCHED_BY_DEFAULT; + lo->tag_set.flags = BLK_MQ_F_STACKING | BLK_MQ_F_NO_SCHED_BY_DEFAULT; lo->tag_set.driver_data = lo; err = blk_mq_alloc_tag_set(&lo->tag_set); |