summaryrefslogtreecommitdiff
path: root/drivers/block/ublk_drv.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/ublk_drv.c')
-rw-r--r--drivers/block/ublk_drv.c197
1 files changed, 140 insertions, 57 deletions
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 458ac54e7b20..defcc964ecab 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -484,15 +484,17 @@ static wait_queue_head_t ublk_idr_wq; /* wait until one idr is freed */
static DEFINE_MUTEX(ublk_ctl_mutex);
+
+#define UBLK_MAX_UBLKS UBLK_MINORS
+
/*
- * Max ublk devices allowed to add
+ * Max unprivileged ublk devices allowed to add
*
* It can be extended to one per-user limit in future or even controlled
* by cgroup.
*/
-#define UBLK_MAX_UBLKS UBLK_MINORS
-static unsigned int ublks_max = 64;
-static unsigned int ublks_added; /* protected by ublk_ctl_mutex */
+static unsigned int unprivileged_ublks_max = 64;
+static unsigned int unprivileged_ublks_added; /* protected by ublk_ctl_mutex */
static struct miscdevice ublk_misc;
@@ -681,22 +683,44 @@ static int ublk_max_cmd_buf_size(void)
return __ublk_queue_cmd_buf_size(UBLK_MAX_QUEUE_DEPTH);
}
-static inline bool ublk_queue_can_use_recovery_reissue(
- struct ublk_queue *ubq)
+/*
+ * Should I/O outstanding to the ublk server when it exits be reissued?
+ * If not, outstanding I/O will get errors.
+ */
+static inline bool ublk_nosrv_should_reissue_outstanding(struct ublk_device *ub)
{
- return (ubq->flags & UBLK_F_USER_RECOVERY) &&
- (ubq->flags & UBLK_F_USER_RECOVERY_REISSUE);
+ return (ub->dev_info.flags & UBLK_F_USER_RECOVERY) &&
+ (ub->dev_info.flags & UBLK_F_USER_RECOVERY_REISSUE);
}
-static inline bool ublk_queue_can_use_recovery(
- struct ublk_queue *ubq)
+/*
+ * Should I/O issued while there is no ublk server queue? If not, I/O
+ * issued while there is no ublk server will get errors.
+ */
+static inline bool ublk_nosrv_dev_should_queue_io(struct ublk_device *ub)
+{
+ return ub->dev_info.flags & UBLK_F_USER_RECOVERY;
+}
+
+/*
+ * Same as ublk_nosrv_dev_should_queue_io, but uses a queue-local copy
+ * of the device flags for smaller cache footprint - better for fast
+ * paths.
+ */
+static inline bool ublk_nosrv_should_queue_io(struct ublk_queue *ubq)
{
return ubq->flags & UBLK_F_USER_RECOVERY;
}
-static inline bool ublk_can_use_recovery(struct ublk_device *ub)
+/*
+ * Should ublk devices be stopped (i.e. no recovery possible) when the
+ * ublk server exits? If not, devices can be used again by a future
+ * incarnation of a ublk server via the start_recovery/end_recovery
+ * commands.
+ */
+static inline bool ublk_nosrv_should_stop_dev(struct ublk_device *ub)
{
- return ub->dev_info.flags & UBLK_F_USER_RECOVERY;
+ return !(ub->dev_info.flags & UBLK_F_USER_RECOVERY);
}
static void ublk_free_disk(struct gendisk *disk)
@@ -1059,6 +1083,25 @@ static void ublk_complete_rq(struct kref *ref)
__ublk_complete_rq(req);
}
+static void ublk_do_fail_rq(struct request *req)
+{
+ struct ublk_queue *ubq = req->mq_hctx->driver_data;
+
+ if (ublk_nosrv_should_reissue_outstanding(ubq->dev))
+ blk_mq_requeue_request(req, false);
+ else
+ __ublk_complete_rq(req);
+}
+
+static void ublk_fail_rq_fn(struct kref *ref)
+{
+ struct ublk_rq_data *data = container_of(ref, struct ublk_rq_data,
+ ref);
+ struct request *req = blk_mq_rq_from_pdu(data);
+
+ ublk_do_fail_rq(req);
+}
+
/*
* Since __ublk_rq_task_work always fails requests immediately during
* exiting, __ublk_fail_req() is only called from abort context during
@@ -1072,10 +1115,13 @@ static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io,
{
WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_ACTIVE);
- if (ublk_queue_can_use_recovery_reissue(ubq))
- blk_mq_requeue_request(req, false);
- else
- ublk_put_req_ref(ubq, req);
+ if (ublk_need_req_ref(ubq)) {
+ struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
+
+ kref_put(&data->ref, ublk_fail_rq_fn);
+ } else {
+ ublk_do_fail_rq(req);
+ }
}
static void ubq_complete_io_cmd(struct ublk_io *io, int res,
@@ -1100,7 +1146,7 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq,
struct request *rq)
{
/* We cannot process this rq so just requeue it. */
- if (ublk_queue_can_use_recovery(ubq))
+ if (ublk_nosrv_dev_should_queue_io(ubq->dev))
blk_mq_requeue_request(rq, false);
else
blk_mq_end_request(rq, BLK_STS_IOERR);
@@ -1245,10 +1291,10 @@ static enum blk_eh_timer_return ublk_timeout(struct request *rq)
struct ublk_device *ub = ubq->dev;
if (ublk_abort_requests(ub, ubq)) {
- if (ublk_can_use_recovery(ub))
- schedule_work(&ub->quiesce_work);
- else
+ if (ublk_nosrv_should_stop_dev(ub))
schedule_work(&ub->stop_work);
+ else
+ schedule_work(&ub->quiesce_work);
}
return BLK_EH_DONE;
}
@@ -1277,7 +1323,7 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
* Note: force_abort is guaranteed to be seen because it is set
* before request queue is unqiuesced.
*/
- if (ublk_queue_can_use_recovery(ubq) && unlikely(ubq->force_abort))
+ if (ublk_nosrv_should_queue_io(ubq) && unlikely(ubq->force_abort))
return BLK_STS_IOERR;
if (unlikely(ubq->canceling)) {
@@ -1416,17 +1462,27 @@ static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq)
}
}
+/* Must be called when queue is frozen */
+static bool ublk_mark_queue_canceling(struct ublk_queue *ubq)
+{
+ bool canceled;
+
+ spin_lock(&ubq->cancel_lock);
+ canceled = ubq->canceling;
+ if (!canceled)
+ ubq->canceling = true;
+ spin_unlock(&ubq->cancel_lock);
+
+ return canceled;
+}
+
static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq)
{
+ bool was_canceled = ubq->canceling;
struct gendisk *disk;
- spin_lock(&ubq->cancel_lock);
- if (ubq->canceling) {
- spin_unlock(&ubq->cancel_lock);
+ if (was_canceled)
return false;
- }
- ubq->canceling = true;
- spin_unlock(&ubq->cancel_lock);
spin_lock(&ub->lock);
disk = ub->ub_disk;
@@ -1438,14 +1494,23 @@ static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq)
if (!disk)
return false;
- /* Now we are serialized with ublk_queue_rq() */
+ /*
+ * Now we are serialized with ublk_queue_rq()
+ *
+ * Make sure that ubq->canceling is set when queue is frozen,
+ * because ublk_queue_rq() has to rely on this flag for avoiding to
+ * touch completed uring_cmd
+ */
blk_mq_quiesce_queue(disk->queue);
- /* abort queue is for making forward progress */
- ublk_abort_queue(ub, ubq);
+ was_canceled = ublk_mark_queue_canceling(ubq);
+ if (!was_canceled) {
+ /* abort queue is for making forward progress */
+ ublk_abort_queue(ub, ubq);
+ }
blk_mq_unquiesce_queue(disk->queue);
put_device(disk_to_dev(disk));
- return true;
+ return !was_canceled;
}
static void ublk_cancel_cmd(struct ublk_queue *ubq, struct ublk_io *io,
@@ -1498,10 +1563,10 @@ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd,
ublk_cancel_cmd(ubq, io, issue_flags);
if (need_schedule) {
- if (ublk_can_use_recovery(ub))
- schedule_work(&ub->quiesce_work);
- else
+ if (ublk_nosrv_should_stop_dev(ub))
schedule_work(&ub->stop_work);
+ else
+ schedule_work(&ub->quiesce_work);
}
}
@@ -1621,7 +1686,7 @@ static void ublk_stop_dev(struct ublk_device *ub)
mutex_lock(&ub->mutex);
if (ub->dev_info.state == UBLK_S_DEV_DEAD)
goto unlock;
- if (ublk_can_use_recovery(ub)) {
+ if (ublk_nosrv_dev_should_queue_io(ub)) {
if (ub->dev_info.state == UBLK_S_DEV_LIVE)
__ublk_quiesce_dev(ub);
ublk_unquiesce_dev(ub);
@@ -1816,10 +1881,9 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
return -EIOCBQUEUED;
out:
- io_uring_cmd_done(cmd, ret, 0, issue_flags);
pr_devel("%s: complete: cmd op %d, tag %d ret %x io_flags %x\n",
__func__, cmd_op, tag, ret, io->flags);
- return -EIOCBQUEUED;
+ return ret;
}
static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub,
@@ -1875,7 +1939,10 @@ static inline int ublk_ch_uring_cmd_local(struct io_uring_cmd *cmd,
static void ublk_ch_uring_cmd_cb(struct io_uring_cmd *cmd,
unsigned int issue_flags)
{
- ublk_ch_uring_cmd_local(cmd, issue_flags);
+ int ret = ublk_ch_uring_cmd_local(cmd, issue_flags);
+
+ if (ret != -EIOCBQUEUED)
+ io_uring_cmd_done(cmd, ret, 0, issue_flags);
}
static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
@@ -2049,7 +2116,7 @@ static void ublk_deinit_queues(struct ublk_device *ub)
for (i = 0; i < nr_queues; i++)
ublk_deinit_queue(ub, i);
- kfree(ub->__queues);
+ kvfree(ub->__queues);
}
static int ublk_init_queues(struct ublk_device *ub)
@@ -2060,7 +2127,7 @@ static int ublk_init_queues(struct ublk_device *ub)
int i, ret = -ENOMEM;
ub->queue_size = ubq_size;
- ub->__queues = kcalloc(nr_queues, ubq_size, GFP_KERNEL);
+ ub->__queues = kvcalloc(nr_queues, ubq_size, GFP_KERNEL);
if (!ub->__queues)
return ret;
@@ -2140,7 +2207,8 @@ static int ublk_add_chdev(struct ublk_device *ub)
if (ret)
goto fail;
- ublks_added++;
+ if (ub->dev_info.flags & UBLK_F_UNPRIVILEGED_DEV)
+ unprivileged_ublks_added++;
return 0;
fail:
put_device(dev);
@@ -2178,12 +2246,17 @@ static int ublk_add_tag_set(struct ublk_device *ub)
static void ublk_remove(struct ublk_device *ub)
{
+ bool unprivileged;
+
ublk_stop_dev(ub);
cancel_work_sync(&ub->stop_work);
cancel_work_sync(&ub->quiesce_work);
cdev_device_del(&ub->cdev, &ub->cdev_dev);
+ unprivileged = ub->dev_info.flags & UBLK_F_UNPRIVILEGED_DEV;
ublk_put_device(ub);
- ublks_added--;
+
+ if (unprivileged)
+ unprivileged_ublks_added--;
}
static struct ublk_device *ublk_get_device_from_id(int idx)
@@ -2384,6 +2457,10 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
if (copy_from_user(&info, argp, sizeof(info)))
return -EFAULT;
+ if (info.queue_depth > UBLK_MAX_QUEUE_DEPTH || !info.queue_depth ||
+ info.nr_hw_queues > UBLK_MAX_NR_QUEUES || !info.nr_hw_queues)
+ return -EINVAL;
+
if (capable(CAP_SYS_ADMIN))
info.flags &= ~UBLK_F_UNPRIVILEGED_DEV;
else if (!(info.flags & UBLK_F_UNPRIVILEGED_DEV))
@@ -2432,7 +2509,8 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
return ret;
ret = -EACCES;
- if (ublks_added >= ublks_max)
+ if ((info.flags & UBLK_F_UNPRIVILEGED_DEV) &&
+ unprivileged_ublks_added >= unprivileged_ublks_max)
goto out_unlock;
ret = -ENOMEM;
@@ -2665,9 +2743,12 @@ static int ublk_ctrl_set_params(struct ublk_device *ub,
if (ph.len > sizeof(struct ublk_params))
ph.len = sizeof(struct ublk_params);
- /* parameters can only be changed when device isn't live */
mutex_lock(&ub->mutex);
- if (ub->dev_info.state == UBLK_S_DEV_LIVE) {
+ if (test_bit(UB_STATE_USED, &ub->state)) {
+ /*
+ * Parameters can only be changed when device hasn't
+ * been started yet
+ */
ret = -EACCES;
} else if (copy_from_user(&ub->params, argp, ph.len)) {
ret = -EFAULT;
@@ -2716,7 +2797,7 @@ static int ublk_ctrl_start_recovery(struct ublk_device *ub,
int i;
mutex_lock(&ub->mutex);
- if (!ublk_can_use_recovery(ub))
+ if (ublk_nosrv_should_stop_dev(ub))
goto out_unlock;
if (!ub->nr_queues_ready)
goto out_unlock;
@@ -2769,7 +2850,7 @@ static int ublk_ctrl_end_recovery(struct ublk_device *ub,
__func__, ub->dev_info.nr_hw_queues, header->dev_id);
mutex_lock(&ub->mutex);
- if (!ublk_can_use_recovery(ub))
+ if (ublk_nosrv_should_stop_dev(ub))
goto out_unlock;
if (ub->dev_info.state != UBLK_S_DEV_QUIESCED) {
@@ -2990,10 +3071,9 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
if (ub)
ublk_put_device(ub);
out:
- io_uring_cmd_done(cmd, ret, 0, issue_flags);
pr_devel("%s: cmd done ret %d cmd_op %x, dev id %d qid %d\n",
__func__, ret, cmd->cmd_op, header->dev_id, header->queue_id);
- return -EIOCBQUEUED;
+ return ret;
}
static const struct file_operations ublk_ctl_fops = {
@@ -3057,23 +3137,26 @@ static void __exit ublk_exit(void)
module_init(ublk_init);
module_exit(ublk_exit);
-static int ublk_set_max_ublks(const char *buf, const struct kernel_param *kp)
+static int ublk_set_max_unprivileged_ublks(const char *buf,
+ const struct kernel_param *kp)
{
return param_set_uint_minmax(buf, kp, 0, UBLK_MAX_UBLKS);
}
-static int ublk_get_max_ublks(char *buf, const struct kernel_param *kp)
+static int ublk_get_max_unprivileged_ublks(char *buf,
+ const struct kernel_param *kp)
{
- return sysfs_emit(buf, "%u\n", ublks_max);
+ return sysfs_emit(buf, "%u\n", unprivileged_ublks_max);
}
-static const struct kernel_param_ops ublk_max_ublks_ops = {
- .set = ublk_set_max_ublks,
- .get = ublk_get_max_ublks,
+static const struct kernel_param_ops ublk_max_unprivileged_ublks_ops = {
+ .set = ublk_set_max_unprivileged_ublks,
+ .get = ublk_get_max_unprivileged_ublks,
};
-module_param_cb(ublks_max, &ublk_max_ublks_ops, &ublks_max, 0644);
-MODULE_PARM_DESC(ublks_max, "max number of ublk devices allowed to add(default: 64)");
+module_param_cb(ublks_max, &ublk_max_unprivileged_ublks_ops,
+ &unprivileged_ublks_max, 0644);
+MODULE_PARM_DESC(ublks_max, "max number of unprivileged ublk devices allowed to add(default: 64)");
MODULE_AUTHOR("Ming Lei <ming.lei@redhat.com>");
MODULE_DESCRIPTION("Userspace block device");