diff options
Diffstat (limited to 'drivers/block/ublk_drv.c')
-rw-r--r-- | drivers/block/ublk_drv.c | 197 |
1 files changed, 140 insertions, 57 deletions
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 458ac54e7b20..defcc964ecab 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -484,15 +484,17 @@ static wait_queue_head_t ublk_idr_wq; /* wait until one idr is freed */ static DEFINE_MUTEX(ublk_ctl_mutex); + +#define UBLK_MAX_UBLKS UBLK_MINORS + /* - * Max ublk devices allowed to add + * Max unprivileged ublk devices allowed to add * * It can be extended to one per-user limit in future or even controlled * by cgroup. */ -#define UBLK_MAX_UBLKS UBLK_MINORS -static unsigned int ublks_max = 64; -static unsigned int ublks_added; /* protected by ublk_ctl_mutex */ +static unsigned int unprivileged_ublks_max = 64; +static unsigned int unprivileged_ublks_added; /* protected by ublk_ctl_mutex */ static struct miscdevice ublk_misc; @@ -681,22 +683,44 @@ static int ublk_max_cmd_buf_size(void) return __ublk_queue_cmd_buf_size(UBLK_MAX_QUEUE_DEPTH); } -static inline bool ublk_queue_can_use_recovery_reissue( - struct ublk_queue *ubq) +/* + * Should I/O outstanding to the ublk server when it exits be reissued? + * If not, outstanding I/O will get errors. + */ +static inline bool ublk_nosrv_should_reissue_outstanding(struct ublk_device *ub) { - return (ubq->flags & UBLK_F_USER_RECOVERY) && - (ubq->flags & UBLK_F_USER_RECOVERY_REISSUE); + return (ub->dev_info.flags & UBLK_F_USER_RECOVERY) && + (ub->dev_info.flags & UBLK_F_USER_RECOVERY_REISSUE); } -static inline bool ublk_queue_can_use_recovery( - struct ublk_queue *ubq) +/* + * Should I/O issued while there is no ublk server queue? If not, I/O + * issued while there is no ublk server will get errors. + */ +static inline bool ublk_nosrv_dev_should_queue_io(struct ublk_device *ub) +{ + return ub->dev_info.flags & UBLK_F_USER_RECOVERY; +} + +/* + * Same as ublk_nosrv_dev_should_queue_io, but uses a queue-local copy + * of the device flags for smaller cache footprint - better for fast + * paths. + */ +static inline bool ublk_nosrv_should_queue_io(struct ublk_queue *ubq) { return ubq->flags & UBLK_F_USER_RECOVERY; } -static inline bool ublk_can_use_recovery(struct ublk_device *ub) +/* + * Should ublk devices be stopped (i.e. no recovery possible) when the + * ublk server exits? If not, devices can be used again by a future + * incarnation of a ublk server via the start_recovery/end_recovery + * commands. + */ +static inline bool ublk_nosrv_should_stop_dev(struct ublk_device *ub) { - return ub->dev_info.flags & UBLK_F_USER_RECOVERY; + return !(ub->dev_info.flags & UBLK_F_USER_RECOVERY); } static void ublk_free_disk(struct gendisk *disk) @@ -1059,6 +1083,25 @@ static void ublk_complete_rq(struct kref *ref) __ublk_complete_rq(req); } +static void ublk_do_fail_rq(struct request *req) +{ + struct ublk_queue *ubq = req->mq_hctx->driver_data; + + if (ublk_nosrv_should_reissue_outstanding(ubq->dev)) + blk_mq_requeue_request(req, false); + else + __ublk_complete_rq(req); +} + +static void ublk_fail_rq_fn(struct kref *ref) +{ + struct ublk_rq_data *data = container_of(ref, struct ublk_rq_data, + ref); + struct request *req = blk_mq_rq_from_pdu(data); + + ublk_do_fail_rq(req); +} + /* * Since __ublk_rq_task_work always fails requests immediately during * exiting, __ublk_fail_req() is only called from abort context during @@ -1072,10 +1115,13 @@ static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io, { WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_ACTIVE); - if (ublk_queue_can_use_recovery_reissue(ubq)) - blk_mq_requeue_request(req, false); - else - ublk_put_req_ref(ubq, req); + if (ublk_need_req_ref(ubq)) { + struct ublk_rq_data *data = blk_mq_rq_to_pdu(req); + + kref_put(&data->ref, ublk_fail_rq_fn); + } else { + ublk_do_fail_rq(req); + } } static void ubq_complete_io_cmd(struct ublk_io *io, int res, @@ -1100,7 +1146,7 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq, struct request *rq) { /* We cannot process this rq so just requeue it. */ - if (ublk_queue_can_use_recovery(ubq)) + if (ublk_nosrv_dev_should_queue_io(ubq->dev)) blk_mq_requeue_request(rq, false); else blk_mq_end_request(rq, BLK_STS_IOERR); @@ -1245,10 +1291,10 @@ static enum blk_eh_timer_return ublk_timeout(struct request *rq) struct ublk_device *ub = ubq->dev; if (ublk_abort_requests(ub, ubq)) { - if (ublk_can_use_recovery(ub)) - schedule_work(&ub->quiesce_work); - else + if (ublk_nosrv_should_stop_dev(ub)) schedule_work(&ub->stop_work); + else + schedule_work(&ub->quiesce_work); } return BLK_EH_DONE; } @@ -1277,7 +1323,7 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, * Note: force_abort is guaranteed to be seen because it is set * before request queue is unqiuesced. */ - if (ublk_queue_can_use_recovery(ubq) && unlikely(ubq->force_abort)) + if (ublk_nosrv_should_queue_io(ubq) && unlikely(ubq->force_abort)) return BLK_STS_IOERR; if (unlikely(ubq->canceling)) { @@ -1416,17 +1462,27 @@ static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq) } } +/* Must be called when queue is frozen */ +static bool ublk_mark_queue_canceling(struct ublk_queue *ubq) +{ + bool canceled; + + spin_lock(&ubq->cancel_lock); + canceled = ubq->canceling; + if (!canceled) + ubq->canceling = true; + spin_unlock(&ubq->cancel_lock); + + return canceled; +} + static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq) { + bool was_canceled = ubq->canceling; struct gendisk *disk; - spin_lock(&ubq->cancel_lock); - if (ubq->canceling) { - spin_unlock(&ubq->cancel_lock); + if (was_canceled) return false; - } - ubq->canceling = true; - spin_unlock(&ubq->cancel_lock); spin_lock(&ub->lock); disk = ub->ub_disk; @@ -1438,14 +1494,23 @@ static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq) if (!disk) return false; - /* Now we are serialized with ublk_queue_rq() */ + /* + * Now we are serialized with ublk_queue_rq() + * + * Make sure that ubq->canceling is set when queue is frozen, + * because ublk_queue_rq() has to rely on this flag for avoiding to + * touch completed uring_cmd + */ blk_mq_quiesce_queue(disk->queue); - /* abort queue is for making forward progress */ - ublk_abort_queue(ub, ubq); + was_canceled = ublk_mark_queue_canceling(ubq); + if (!was_canceled) { + /* abort queue is for making forward progress */ + ublk_abort_queue(ub, ubq); + } blk_mq_unquiesce_queue(disk->queue); put_device(disk_to_dev(disk)); - return true; + return !was_canceled; } static void ublk_cancel_cmd(struct ublk_queue *ubq, struct ublk_io *io, @@ -1498,10 +1563,10 @@ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd, ublk_cancel_cmd(ubq, io, issue_flags); if (need_schedule) { - if (ublk_can_use_recovery(ub)) - schedule_work(&ub->quiesce_work); - else + if (ublk_nosrv_should_stop_dev(ub)) schedule_work(&ub->stop_work); + else + schedule_work(&ub->quiesce_work); } } @@ -1621,7 +1686,7 @@ static void ublk_stop_dev(struct ublk_device *ub) mutex_lock(&ub->mutex); if (ub->dev_info.state == UBLK_S_DEV_DEAD) goto unlock; - if (ublk_can_use_recovery(ub)) { + if (ublk_nosrv_dev_should_queue_io(ub)) { if (ub->dev_info.state == UBLK_S_DEV_LIVE) __ublk_quiesce_dev(ub); ublk_unquiesce_dev(ub); @@ -1816,10 +1881,9 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, return -EIOCBQUEUED; out: - io_uring_cmd_done(cmd, ret, 0, issue_flags); pr_devel("%s: complete: cmd op %d, tag %d ret %x io_flags %x\n", __func__, cmd_op, tag, ret, io->flags); - return -EIOCBQUEUED; + return ret; } static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub, @@ -1875,7 +1939,10 @@ static inline int ublk_ch_uring_cmd_local(struct io_uring_cmd *cmd, static void ublk_ch_uring_cmd_cb(struct io_uring_cmd *cmd, unsigned int issue_flags) { - ublk_ch_uring_cmd_local(cmd, issue_flags); + int ret = ublk_ch_uring_cmd_local(cmd, issue_flags); + + if (ret != -EIOCBQUEUED) + io_uring_cmd_done(cmd, ret, 0, issue_flags); } static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) @@ -2049,7 +2116,7 @@ static void ublk_deinit_queues(struct ublk_device *ub) for (i = 0; i < nr_queues; i++) ublk_deinit_queue(ub, i); - kfree(ub->__queues); + kvfree(ub->__queues); } static int ublk_init_queues(struct ublk_device *ub) @@ -2060,7 +2127,7 @@ static int ublk_init_queues(struct ublk_device *ub) int i, ret = -ENOMEM; ub->queue_size = ubq_size; - ub->__queues = kcalloc(nr_queues, ubq_size, GFP_KERNEL); + ub->__queues = kvcalloc(nr_queues, ubq_size, GFP_KERNEL); if (!ub->__queues) return ret; @@ -2140,7 +2207,8 @@ static int ublk_add_chdev(struct ublk_device *ub) if (ret) goto fail; - ublks_added++; + if (ub->dev_info.flags & UBLK_F_UNPRIVILEGED_DEV) + unprivileged_ublks_added++; return 0; fail: put_device(dev); @@ -2178,12 +2246,17 @@ static int ublk_add_tag_set(struct ublk_device *ub) static void ublk_remove(struct ublk_device *ub) { + bool unprivileged; + ublk_stop_dev(ub); cancel_work_sync(&ub->stop_work); cancel_work_sync(&ub->quiesce_work); cdev_device_del(&ub->cdev, &ub->cdev_dev); + unprivileged = ub->dev_info.flags & UBLK_F_UNPRIVILEGED_DEV; ublk_put_device(ub); - ublks_added--; + + if (unprivileged) + unprivileged_ublks_added--; } static struct ublk_device *ublk_get_device_from_id(int idx) @@ -2384,6 +2457,10 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd) if (copy_from_user(&info, argp, sizeof(info))) return -EFAULT; + if (info.queue_depth > UBLK_MAX_QUEUE_DEPTH || !info.queue_depth || + info.nr_hw_queues > UBLK_MAX_NR_QUEUES || !info.nr_hw_queues) + return -EINVAL; + if (capable(CAP_SYS_ADMIN)) info.flags &= ~UBLK_F_UNPRIVILEGED_DEV; else if (!(info.flags & UBLK_F_UNPRIVILEGED_DEV)) @@ -2432,7 +2509,8 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd) return ret; ret = -EACCES; - if (ublks_added >= ublks_max) + if ((info.flags & UBLK_F_UNPRIVILEGED_DEV) && + unprivileged_ublks_added >= unprivileged_ublks_max) goto out_unlock; ret = -ENOMEM; @@ -2665,9 +2743,12 @@ static int ublk_ctrl_set_params(struct ublk_device *ub, if (ph.len > sizeof(struct ublk_params)) ph.len = sizeof(struct ublk_params); - /* parameters can only be changed when device isn't live */ mutex_lock(&ub->mutex); - if (ub->dev_info.state == UBLK_S_DEV_LIVE) { + if (test_bit(UB_STATE_USED, &ub->state)) { + /* + * Parameters can only be changed when device hasn't + * been started yet + */ ret = -EACCES; } else if (copy_from_user(&ub->params, argp, ph.len)) { ret = -EFAULT; @@ -2716,7 +2797,7 @@ static int ublk_ctrl_start_recovery(struct ublk_device *ub, int i; mutex_lock(&ub->mutex); - if (!ublk_can_use_recovery(ub)) + if (ublk_nosrv_should_stop_dev(ub)) goto out_unlock; if (!ub->nr_queues_ready) goto out_unlock; @@ -2769,7 +2850,7 @@ static int ublk_ctrl_end_recovery(struct ublk_device *ub, __func__, ub->dev_info.nr_hw_queues, header->dev_id); mutex_lock(&ub->mutex); - if (!ublk_can_use_recovery(ub)) + if (ublk_nosrv_should_stop_dev(ub)) goto out_unlock; if (ub->dev_info.state != UBLK_S_DEV_QUIESCED) { @@ -2990,10 +3071,9 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, if (ub) ublk_put_device(ub); out: - io_uring_cmd_done(cmd, ret, 0, issue_flags); pr_devel("%s: cmd done ret %d cmd_op %x, dev id %d qid %d\n", __func__, ret, cmd->cmd_op, header->dev_id, header->queue_id); - return -EIOCBQUEUED; + return ret; } static const struct file_operations ublk_ctl_fops = { @@ -3057,23 +3137,26 @@ static void __exit ublk_exit(void) module_init(ublk_init); module_exit(ublk_exit); -static int ublk_set_max_ublks(const char *buf, const struct kernel_param *kp) +static int ublk_set_max_unprivileged_ublks(const char *buf, + const struct kernel_param *kp) { return param_set_uint_minmax(buf, kp, 0, UBLK_MAX_UBLKS); } -static int ublk_get_max_ublks(char *buf, const struct kernel_param *kp) +static int ublk_get_max_unprivileged_ublks(char *buf, + const struct kernel_param *kp) { - return sysfs_emit(buf, "%u\n", ublks_max); + return sysfs_emit(buf, "%u\n", unprivileged_ublks_max); } -static const struct kernel_param_ops ublk_max_ublks_ops = { - .set = ublk_set_max_ublks, - .get = ublk_get_max_ublks, +static const struct kernel_param_ops ublk_max_unprivileged_ublks_ops = { + .set = ublk_set_max_unprivileged_ublks, + .get = ublk_get_max_unprivileged_ublks, }; -module_param_cb(ublks_max, &ublk_max_ublks_ops, &ublks_max, 0644); -MODULE_PARM_DESC(ublks_max, "max number of ublk devices allowed to add(default: 64)"); +module_param_cb(ublks_max, &ublk_max_unprivileged_ublks_ops, + &unprivileged_ublks_max, 0644); +MODULE_PARM_DESC(ublks_max, "max number of unprivileged ublk devices allowed to add(default: 64)"); MODULE_AUTHOR("Ming Lei <ming.lei@redhat.com>"); MODULE_DESCRIPTION("Userspace block device"); |