diff options
Diffstat (limited to 'drivers/block/ublk_drv.c')
-rw-r--r-- | drivers/block/ublk_drv.c | 748 |
1 files changed, 454 insertions, 294 deletions
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 6f51072776f1..99abd67b708b 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -48,6 +48,8 @@ #define UBLK_MINORS (1U << MINORBITS) +#define UBLK_INVALID_BUF_IDX ((u16)-1) + /* private ioctl command mirror */ #define UBLK_CMD_DEL_DEV_ASYNC _IOC_NR(UBLK_U_CMD_DEL_DEV_ASYNC) #define UBLK_CMD_UPDATE_SIZE _IOC_NR(UBLK_U_CMD_UPDATE_SIZE) @@ -69,7 +71,9 @@ | UBLK_F_USER_RECOVERY_FAIL_IO \ | UBLK_F_UPDATE_SIZE \ | UBLK_F_AUTO_BUF_REG \ - | UBLK_F_QUIESCE) + | UBLK_F_QUIESCE \ + | UBLK_F_PER_IO_DAEMON \ + | UBLK_F_BUF_REG_OFF_DAEMON) #define UBLK_F_ALL_RECOVERY_FLAGS (UBLK_F_USER_RECOVERY \ | UBLK_F_USER_RECOVERY_REISSUE \ @@ -81,14 +85,6 @@ UBLK_PARAM_TYPE_DEVT | UBLK_PARAM_TYPE_ZONED | \ UBLK_PARAM_TYPE_DMA_ALIGN | UBLK_PARAM_TYPE_SEGMENT) -struct ublk_rq_data { - refcount_t ref; - - /* for auto-unregister buffer in case of UBLK_F_AUTO_BUF_REG */ - u16 buf_index; - void *buf_ctx_handle; -}; - struct ublk_uring_cmd_pdu { /* * Store requests in same batch temporarily for queuing them to @@ -109,8 +105,6 @@ struct ublk_uring_cmd_pdu { */ struct ublk_queue *ubq; - struct ublk_auto_buf_reg buf; - u16 tag; }; @@ -154,9 +148,19 @@ struct ublk_uring_cmd_pdu { /* atomic RW with ubq->cancel_lock */ #define UBLK_IO_FLAG_CANCELED 0x80000000 +/* + * Initialize refcount to a large number to include any registered buffers. + * UBLK_IO_COMMIT_AND_FETCH_REQ will release these references minus those for + * any buffers registered on the io daemon task. + */ +#define UBLK_REFCOUNT_INIT (REFCOUNT_MAX / 2) + struct ublk_io { /* userspace buffer address from io cmd */ - __u64 addr; + union { + __u64 addr; + struct ublk_auto_buf_reg buf; + }; unsigned int flags; int res; @@ -166,18 +170,35 @@ struct ublk_io { /* valid if UBLK_IO_FLAG_OWNED_BY_SRV is set */ struct request *req; }; -}; + + struct task_struct *task; + + /* + * The number of uses of this I/O by the ublk server + * if user copy or zero copy are enabled: + * - UBLK_REFCOUNT_INIT from dispatch to the server + * until UBLK_IO_COMMIT_AND_FETCH_REQ + * - 1 for each inflight ublk_ch_{read,write}_iter() call + * - 1 for each io_uring registered buffer not registered on task + * The I/O can only be completed once all references are dropped. + * User copy and buffer registration operations are only permitted + * if the reference count is nonzero. + */ + refcount_t ref; + /* Count of buffers registered on task and not yet unregistered */ + unsigned task_registered_buffers; + + void *buf_ctx_handle; +} ____cacheline_aligned_in_smp; struct ublk_queue { int q_id; int q_depth; unsigned long flags; - struct task_struct *ubq_daemon; struct ublksrv_io_desc *io_cmd_buf; bool force_abort; - bool timeout; bool canceling; bool fail_io; /* copy of dev->state == UBLK_S_DEV_FAIL_IO */ unsigned short nr_io_ready; /* how many ios setup */ @@ -214,7 +235,10 @@ struct ublk_device { struct completion completion; unsigned int nr_queues_ready; - unsigned int nr_privileged_daemon; + bool unprivileged_daemons; + struct mutex cancel_mutex; + bool canceling; + pid_t ublksrv_tgid; }; /* header of ublk_params */ @@ -227,7 +251,8 @@ static void ublk_io_release(void *priv); static void ublk_stop_dev_unlocked(struct ublk_device *ub); static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq); static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub, - const struct ublk_queue *ubq, int tag, size_t offset); + const struct ublk_queue *ubq, struct ublk_io *io, + size_t offset); static inline unsigned int ublk_req_build_flags(struct request *req); static inline struct ublksrv_io_desc * @@ -672,38 +697,29 @@ static inline bool ublk_need_req_ref(const struct ublk_queue *ubq) } static inline void ublk_init_req_ref(const struct ublk_queue *ubq, - struct request *req) + struct ublk_io *io) { - if (ublk_need_req_ref(ubq)) { - struct ublk_rq_data *data = blk_mq_rq_to_pdu(req); - - refcount_set(&data->ref, 1); - } + if (ublk_need_req_ref(ubq)) + refcount_set(&io->ref, UBLK_REFCOUNT_INIT); } -static inline bool ublk_get_req_ref(const struct ublk_queue *ubq, - struct request *req) +static inline bool ublk_get_req_ref(struct ublk_io *io) { - if (ublk_need_req_ref(ubq)) { - struct ublk_rq_data *data = blk_mq_rq_to_pdu(req); - - return refcount_inc_not_zero(&data->ref); - } + return refcount_inc_not_zero(&io->ref); +} - return true; +static inline void ublk_put_req_ref(struct ublk_io *io, struct request *req) +{ + if (refcount_dec_and_test(&io->ref)) + __ublk_complete_rq(req); } -static inline void ublk_put_req_ref(const struct ublk_queue *ubq, - struct request *req) +static inline bool ublk_sub_req_ref(struct ublk_io *io) { - if (ublk_need_req_ref(ubq)) { - struct ublk_rq_data *data = blk_mq_rq_to_pdu(req); + unsigned sub_refs = UBLK_REFCOUNT_INIT - io->task_registered_buffers; - if (refcount_dec_and_test(&data->ref)) - __ublk_complete_rq(req); - } else { - __ublk_complete_rq(req); - } + io->task_registered_buffers = 0; + return refcount_sub_and_test(sub_refs, &io->ref); } static inline bool ublk_need_get_data(const struct ublk_queue *ubq) @@ -980,7 +996,7 @@ static inline bool ublk_need_unmap_req(const struct request *req) } static int ublk_map_io(const struct ublk_queue *ubq, const struct request *req, - struct ublk_io *io) + const struct ublk_io *io) { const unsigned int rq_bytes = blk_rq_bytes(req); @@ -1004,7 +1020,7 @@ static int ublk_map_io(const struct ublk_queue *ubq, const struct request *req, static int ublk_unmap_io(const struct ublk_queue *ubq, const struct request *req, - struct ublk_io *io) + const struct ublk_io *io) { const unsigned int rq_bytes = blk_rq_bytes(req); @@ -1099,11 +1115,6 @@ static inline struct ublk_uring_cmd_pdu *ublk_get_uring_cmd_pdu( return io_uring_cmd_to_pdu(ioucmd, struct ublk_uring_cmd_pdu); } -static inline bool ubq_daemon_is_dying(struct ublk_queue *ubq) -{ - return !ubq->ubq_daemon || ubq->ubq_daemon->flags & PF_EXITING; -} - /* todo: handle partial completion */ static inline void __ublk_complete_rq(struct request *req) { @@ -1144,7 +1155,7 @@ static inline void __ublk_complete_rq(struct request *req) if (blk_update_request(req, BLK_STS_OK, io->res)) blk_mq_requeue_request(req, true); - else + else if (likely(!blk_should_fake_timeout(req->q))) __blk_mq_end_request(req, BLK_STS_OK); return; @@ -1152,8 +1163,8 @@ exit: blk_mq_end_request(req, res); } -static void ublk_complete_io_cmd(struct ublk_io *io, struct request *req, - int res, unsigned issue_flags) +static struct io_uring_cmd *__ublk_prep_compl_io_cmd(struct ublk_io *io, + struct request *req) { /* read cmd first because req will overwrite it */ struct io_uring_cmd *cmd = io->cmd; @@ -1168,6 +1179,13 @@ static void ublk_complete_io_cmd(struct ublk_io *io, struct request *req, io->flags &= ~UBLK_IO_FLAG_ACTIVE; io->req = req; + return cmd; +} + +static void ublk_complete_io_cmd(struct ublk_io *io, struct request *req, + int res, unsigned issue_flags) +{ + struct io_uring_cmd *cmd = __ublk_prep_compl_io_cmd(io, req); /* tell ublksrv one io request is coming */ io_uring_cmd_done(cmd, res, 0, issue_flags); @@ -1185,39 +1203,33 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq, blk_mq_end_request(rq, BLK_STS_IOERR); } -static void ublk_auto_buf_reg_fallback(struct request *req) +static void +ublk_auto_buf_reg_fallback(const struct ublk_queue *ubq, struct ublk_io *io) { - const struct ublk_queue *ubq = req->mq_hctx->driver_data; - struct ublksrv_io_desc *iod = ublk_get_iod(ubq, req->tag); - struct ublk_rq_data *data = blk_mq_rq_to_pdu(req); + unsigned tag = io - ubq->ios; + struct ublksrv_io_desc *iod = ublk_get_iod(ubq, tag); iod->op_flags |= UBLK_IO_F_NEED_REG_BUF; - refcount_set(&data->ref, 1); } -static bool ublk_auto_buf_reg(struct request *req, struct ublk_io *io, - unsigned int issue_flags) +static bool ublk_auto_buf_reg(const struct ublk_queue *ubq, struct request *req, + struct ublk_io *io, unsigned int issue_flags) { - struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(io->cmd); - struct ublk_rq_data *data = blk_mq_rq_to_pdu(req); int ret; ret = io_buffer_register_bvec(io->cmd, req, ublk_io_release, - pdu->buf.index, issue_flags); + io->buf.index, issue_flags); if (ret) { - if (pdu->buf.flags & UBLK_AUTO_BUF_REG_FALLBACK) { - ublk_auto_buf_reg_fallback(req); + if (io->buf.flags & UBLK_AUTO_BUF_REG_FALLBACK) { + ublk_auto_buf_reg_fallback(ubq, io); return true; } blk_mq_end_request(req, BLK_STS_IOERR); return false; } - /* one extra reference is dropped by ublk_io_release */ - refcount_set(&data->ref, 2); - data->buf_ctx_handle = io_uring_cmd_ctx_handle(io->cmd); - /* store buffer index in request payload */ - data->buf_index = pdu->buf.index; + io->task_registered_buffers = 1; + io->buf_ctx_handle = io_uring_cmd_ctx_handle(io->cmd); io->flags |= UBLK_IO_FLAG_AUTO_BUF_REG; return true; } @@ -1226,10 +1238,10 @@ static bool ublk_prep_auto_buf_reg(struct ublk_queue *ubq, struct request *req, struct ublk_io *io, unsigned int issue_flags) { + ublk_init_req_ref(ubq, io); if (ublk_support_auto_buf_reg(ubq) && ublk_rq_has_data(req)) - return ublk_auto_buf_reg(req, io, issue_flags); + return ublk_auto_buf_reg(ubq, req, io, issue_flags); - ublk_init_req_ref(ubq, req); return true; } @@ -1275,13 +1287,13 @@ static void ublk_dispatch_req(struct ublk_queue *ubq, /* * Task is exiting if either: * - * (1) current != ubq_daemon. + * (1) current != io->task. * io_uring_cmd_complete_in_task() tries to run task_work - * in a workqueue if ubq_daemon(cmd's task) is PF_EXITING. + * in a workqueue if cmd's task is PF_EXITING. * * (2) current->flags & PF_EXITING. */ - if (unlikely(current != ubq->ubq_daemon || current->flags & PF_EXITING)) { + if (unlikely(current != io->task || current->flags & PF_EXITING)) { __ublk_abort_rq(ubq, req); return; } @@ -1330,24 +1342,22 @@ static void ublk_cmd_list_tw_cb(struct io_uring_cmd *cmd, { struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); struct request *rq = pdu->req_list; - struct ublk_queue *ubq = pdu->ubq; struct request *next; do { next = rq->rq_next; rq->rq_next = NULL; - ublk_dispatch_req(ubq, rq, issue_flags); + ublk_dispatch_req(rq->mq_hctx->driver_data, rq, issue_flags); rq = next; } while (rq); } -static void ublk_queue_cmd_list(struct ublk_queue *ubq, struct rq_list *l) +static void ublk_queue_cmd_list(struct ublk_io *io, struct rq_list *l) { - struct request *rq = rq_list_peek(l); - struct io_uring_cmd *cmd = ubq->ios[rq->tag].cmd; + struct io_uring_cmd *cmd = io->cmd; struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); - pdu->req_list = rq; + pdu->req_list = rq_list_peek(l); rq_list_init(l); io_uring_cmd_complete_in_task(cmd, ublk_cmd_list_tw_cb); } @@ -1355,17 +1365,23 @@ static void ublk_queue_cmd_list(struct ublk_queue *ubq, struct rq_list *l) static enum blk_eh_timer_return ublk_timeout(struct request *rq) { struct ublk_queue *ubq = rq->mq_hctx->driver_data; + pid_t tgid = ubq->dev->ublksrv_tgid; + struct task_struct *p; + struct pid *pid; - if (ubq->flags & UBLK_F_UNPRIVILEGED_DEV) { - if (!ubq->timeout) { - send_sig(SIGKILL, ubq->ubq_daemon, 0); - ubq->timeout = true; - } + if (!(ubq->flags & UBLK_F_UNPRIVILEGED_DEV)) + return BLK_EH_RESET_TIMER; - return BLK_EH_DONE; - } + if (unlikely(!tgid)) + return BLK_EH_RESET_TIMER; - return BLK_EH_RESET_TIMER; + rcu_read_lock(); + pid = find_vpid(tgid); + p = pid_task(pid, PIDTYPE_PID); + if (p) + send_sig(SIGKILL, p, 0); + rcu_read_unlock(); + return BLK_EH_DONE; } static blk_status_t ublk_prep_req(struct ublk_queue *ubq, struct request *rq, @@ -1373,7 +1389,7 @@ static blk_status_t ublk_prep_req(struct ublk_queue *ubq, struct request *rq, { blk_status_t res; - if (unlikely(ubq->fail_io)) + if (unlikely(READ_ONCE(ubq->fail_io))) return BLK_STS_TARGET; /* With recovery feature enabled, force_abort is set in @@ -1385,7 +1401,8 @@ static blk_status_t ublk_prep_req(struct ublk_queue *ubq, struct request *rq, * Note: force_abort is guaranteed to be seen because it is set * before request queue is unqiuesced. */ - if (ublk_nosrv_should_queue_io(ubq) && unlikely(ubq->force_abort)) + if (ublk_nosrv_should_queue_io(ubq) && + unlikely(READ_ONCE(ubq->force_abort))) return BLK_STS_IOERR; if (check_cancel && unlikely(ubq->canceling)) @@ -1425,28 +1442,39 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_STS_OK; } +static inline bool ublk_belong_to_same_batch(const struct ublk_io *io, + const struct ublk_io *io2) +{ + return (io_uring_cmd_ctx_handle(io->cmd) == + io_uring_cmd_ctx_handle(io2->cmd)) && + (io->task == io2->task); +} + static void ublk_queue_rqs(struct rq_list *rqlist) { struct rq_list requeue_list = { }; struct rq_list submit_list = { }; - struct ublk_queue *ubq = NULL; + struct ublk_io *io = NULL; struct request *req; while ((req = rq_list_pop(rqlist))) { struct ublk_queue *this_q = req->mq_hctx->driver_data; + struct ublk_io *this_io = &this_q->ios[req->tag]; - if (ubq && ubq != this_q && !rq_list_empty(&submit_list)) - ublk_queue_cmd_list(ubq, &submit_list); - ubq = this_q; - - if (ublk_prep_req(ubq, req, true) == BLK_STS_OK) - rq_list_add_tail(&submit_list, req); - else + if (ublk_prep_req(this_q, req, true) != BLK_STS_OK) { rq_list_add_tail(&requeue_list, req); + continue; + } + + if (io && !ublk_belong_to_same_batch(io, this_io) && + !rq_list_empty(&submit_list)) + ublk_queue_cmd_list(io, &submit_list); + io = this_io; + rq_list_add_tail(&submit_list, req); } - if (ubq && !rq_list_empty(&submit_list)) - ublk_queue_cmd_list(ubq, &submit_list); + if (!rq_list_empty(&submit_list)) + ublk_queue_cmd_list(io, &submit_list); *rqlist = requeue_list; } @@ -1474,17 +1502,6 @@ static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq) /* All old ioucmds have to be completed */ ubq->nr_io_ready = 0; - /* - * old daemon is PF_EXITING, put it now - * - * It could be NULL in case of closing one quisced device. - */ - if (ubq->ubq_daemon) - put_task_struct(ubq->ubq_daemon); - /* We have to reset it to NULL, otherwise ub won't accept new FETCH_REQ */ - ubq->ubq_daemon = NULL; - ubq->timeout = false; - for (i = 0; i < ubq->q_depth; i++) { struct ublk_io *io = &ubq->ios[i]; @@ -1495,6 +1512,20 @@ static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq) io->flags &= UBLK_IO_FLAG_CANCELED; io->cmd = NULL; io->addr = 0; + + /* + * old task is PF_EXITING, put it now + * + * It could be NULL in case of closing one quiesced + * device. + */ + if (io->task) { + put_task_struct(io->task); + io->task = NULL; + } + + WARN_ON_ONCE(refcount_read(&io->ref)); + WARN_ON_ONCE(io->task_registered_buffers); } } @@ -1506,6 +1537,7 @@ static int ublk_ch_open(struct inode *inode, struct file *filp) if (test_and_set_bit(UB_STATE_OPEN, &ub->state)) return -EBUSY; filp->private_data = ub; + ub->ublksrv_tgid = current->tgid; return 0; } @@ -1516,10 +1548,11 @@ static void ublk_reset_ch_dev(struct ublk_device *ub) for (i = 0; i < ub->dev_info.nr_hw_queues; i++) ublk_queue_reinit(ub, ublk_get_queue(ub, i)); - /* set to NULL, otherwise new ubq_daemon cannot mmap the io_cmd_buf */ + /* set to NULL, otherwise new tasks cannot mmap io_cmd_buf */ ub->mm = NULL; ub->nr_queues_ready = 0; - ub->nr_privileged_daemon = 0; + ub->unprivileged_daemons = false; + ub->ublksrv_tgid = -1; } static struct gendisk *ublk_get_disk(struct ublk_device *ub) @@ -1541,6 +1574,27 @@ static void ublk_put_disk(struct gendisk *disk) put_device(disk_to_dev(disk)); } +/* + * Use this function to ensure that ->canceling is consistently set for + * the device and all queues. Do not set these flags directly. + * + * Caller must ensure that: + * - cancel_mutex is held. This ensures that there is no concurrent + * access to ub->canceling and no concurrent writes to ubq->canceling. + * - there are no concurrent reads of ubq->canceling from the queue_rq + * path. This can be done by quiescing the queue, or through other + * means. + */ +static void ublk_set_canceling(struct ublk_device *ub, bool canceling) + __must_hold(&ub->cancel_mutex) +{ + int i; + + ub->canceling = canceling; + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) + ublk_get_queue(ub, i)->canceling = canceling; +} + static int ublk_ch_release(struct inode *inode, struct file *filp) { struct ublk_device *ub = filp->private_data; @@ -1569,12 +1623,11 @@ static int ublk_ch_release(struct inode *inode, struct file *filp) * All requests may be inflight, so ->canceling may not be set, set * it now. */ - for (i = 0; i < ub->dev_info.nr_hw_queues; i++) { - struct ublk_queue *ubq = ublk_get_queue(ub, i); - - ubq->canceling = true; - ublk_abort_queue(ub, ubq); - } + mutex_lock(&ub->cancel_mutex); + ublk_set_canceling(ub, true); + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) + ublk_abort_queue(ub, ublk_get_queue(ub, i)); + mutex_unlock(&ub->cancel_mutex); blk_mq_kick_requeue_list(disk->queue); /* @@ -1592,7 +1645,6 @@ static int ublk_ch_release(struct inode *inode, struct file *filp) * Transition the device to the nosrv state. What exactly this * means depends on the recovery flags */ - blk_mq_quiesce_queue(disk->queue); if (ublk_nosrv_should_stop_dev(ub)) { /* * Allow any pending/future I/O to pass through quickly @@ -1600,8 +1652,7 @@ static int ublk_ch_release(struct inode *inode, struct file *filp) * waits for all pending I/O to complete */ for (i = 0; i < ub->dev_info.nr_hw_queues; i++) - ublk_get_queue(ub, i)->force_abort = true; - blk_mq_unquiesce_queue(disk->queue); + WRITE_ONCE(ublk_get_queue(ub, i)->force_abort, true); ublk_stop_dev_unlocked(ub); } else { @@ -1611,9 +1662,8 @@ static int ublk_ch_release(struct inode *inode, struct file *filp) } else { ub->dev_info.state = UBLK_S_DEV_FAIL_IO; for (i = 0; i < ub->dev_info.nr_hw_queues; i++) - ublk_get_queue(ub, i)->fail_io = true; + WRITE_ONCE(ublk_get_queue(ub, i)->fail_io, true); } - blk_mq_unquiesce_queue(disk->queue); } unlock: mutex_unlock(&ub->mutex); @@ -1697,23 +1747,17 @@ static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq) } } -/* Must be called when queue is frozen */ -static void ublk_mark_queue_canceling(struct ublk_queue *ubq) +static void ublk_start_cancel(struct ublk_device *ub) { - spin_lock(&ubq->cancel_lock); - if (!ubq->canceling) - ubq->canceling = true; - spin_unlock(&ubq->cancel_lock); -} - -static void ublk_start_cancel(struct ublk_queue *ubq) -{ - struct ublk_device *ub = ubq->dev; struct gendisk *disk = ublk_get_disk(ub); /* Our disk has been dead */ if (!disk) return; + + mutex_lock(&ub->cancel_mutex); + if (ub->canceling) + goto out; /* * Now we are serialized with ublk_queue_rq() * @@ -1722,8 +1766,10 @@ static void ublk_start_cancel(struct ublk_queue *ubq) * touch completed uring_cmd */ blk_mq_quiesce_queue(disk->queue); - ublk_mark_queue_canceling(ubq); + ublk_set_canceling(ub, true); blk_mq_unquiesce_queue(disk->queue); +out: + mutex_unlock(&ub->cancel_mutex); ublk_put_disk(disk); } @@ -1783,6 +1829,7 @@ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd, struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); struct ublk_queue *ubq = pdu->ubq; struct task_struct *task; + struct ublk_io *io; if (WARN_ON_ONCE(!ubq)) return; @@ -1791,13 +1838,13 @@ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd, return; task = io_uring_cmd_get_task(cmd); - if (WARN_ON_ONCE(task && task != ubq->ubq_daemon)) + io = &ubq->ios[pdu->tag]; + if (WARN_ON_ONCE(task && task != io->task)) return; - if (!ubq->canceling) - ublk_start_cancel(ubq); + ublk_start_cancel(ubq->dev); - WARN_ON_ONCE(ubq->ios[pdu->tag].cmd != cmd); + WARN_ON_ONCE(io->cmd != cmd); ublk_cancel_cmd(ubq, pdu->tag, issue_flags); } @@ -1919,9 +1966,11 @@ static void ublk_reset_io_flags(struct ublk_device *ub) for (j = 0; j < ubq->q_depth; j++) ubq->ios[j].flags &= ~UBLK_IO_FLAG_CANCELED; spin_unlock(&ubq->cancel_lock); - ubq->canceling = false; ubq->fail_io = false; } + mutex_lock(&ub->cancel_mutex); + ublk_set_canceling(ub, false); + mutex_unlock(&ub->cancel_mutex); } /* device can only be started after all IOs are ready */ @@ -1929,14 +1978,10 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq) __must_hold(&ub->mutex) { ubq->nr_io_ready++; - if (ublk_queue_ready(ubq)) { - ubq->ubq_daemon = current; - get_task_struct(ubq->ubq_daemon); + if (ublk_queue_ready(ubq)) ub->nr_queues_ready++; - - if (capable(CAP_SYS_ADMIN)) - ub->nr_privileged_daemon++; - } + if (!ub->unprivileged_daemons && !capable(CAP_SYS_ADMIN)) + ub->unprivileged_daemons = true; if (ub->nr_queues_ready == ub->dev_info.nr_hw_queues) { /* now we are ready for handling ublk io request */ @@ -1958,12 +2003,66 @@ static inline int ublk_check_cmd_op(u32 cmd_op) return 0; } -static inline void ublk_fill_io_cmd(struct ublk_io *io, - struct io_uring_cmd *cmd, unsigned long buf_addr) +static inline int ublk_set_auto_buf_reg(struct ublk_io *io, struct io_uring_cmd *cmd) +{ + io->buf = ublk_sqe_addr_to_auto_buf_reg(READ_ONCE(cmd->sqe->addr)); + + if (io->buf.reserved0 || io->buf.reserved1) + return -EINVAL; + + if (io->buf.flags & ~UBLK_AUTO_BUF_REG_F_MASK) + return -EINVAL; + return 0; +} + +static int ublk_handle_auto_buf_reg(struct ublk_io *io, + struct io_uring_cmd *cmd, + u16 *buf_idx) { + if (io->flags & UBLK_IO_FLAG_AUTO_BUF_REG) { + io->flags &= ~UBLK_IO_FLAG_AUTO_BUF_REG; + + /* + * `UBLK_F_AUTO_BUF_REG` only works iff `UBLK_IO_FETCH_REQ` + * and `UBLK_IO_COMMIT_AND_FETCH_REQ` are issued from same + * `io_ring_ctx`. + * + * If this uring_cmd's io_ring_ctx isn't same with the + * one for registering the buffer, it is ublk server's + * responsibility for unregistering the buffer, otherwise + * this ublk request gets stuck. + */ + if (io->buf_ctx_handle == io_uring_cmd_ctx_handle(cmd)) + *buf_idx = io->buf.index; + } + + return ublk_set_auto_buf_reg(io, cmd); +} + +/* Once we return, `io->req` can't be used any more */ +static inline struct request * +ublk_fill_io_cmd(struct ublk_io *io, struct io_uring_cmd *cmd) +{ + struct request *req = io->req; + io->cmd = cmd; io->flags |= UBLK_IO_FLAG_ACTIVE; + /* now this cmd slot is owned by ublk driver */ + io->flags &= ~UBLK_IO_FLAG_OWNED_BY_SRV; + + return req; +} + +static inline int +ublk_config_io_buf(const struct ublk_queue *ubq, struct ublk_io *io, + struct io_uring_cmd *cmd, unsigned long buf_addr, + u16 *buf_idx) +{ + if (ublk_support_auto_buf_reg(ubq)) + return ublk_handle_auto_buf_reg(io, cmd, buf_idx); + io->addr = buf_addr; + return 0; } static inline void ublk_prep_cancel(struct io_uring_cmd *cmd, @@ -1981,30 +2080,25 @@ static inline void ublk_prep_cancel(struct io_uring_cmd *cmd, io_uring_cmd_mark_cancelable(cmd, issue_flags); } -static inline int ublk_set_auto_buf_reg(struct io_uring_cmd *cmd) -{ - struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); - - pdu->buf = ublk_sqe_addr_to_auto_buf_reg(READ_ONCE(cmd->sqe->addr)); - - if (pdu->buf.reserved0 || pdu->buf.reserved1) - return -EINVAL; - - if (pdu->buf.flags & ~UBLK_AUTO_BUF_REG_F_MASK) - return -EINVAL; - return 0; -} - static void ublk_io_release(void *priv) { struct request *rq = priv; struct ublk_queue *ubq = rq->mq_hctx->driver_data; + struct ublk_io *io = &ubq->ios[rq->tag]; - ublk_put_req_ref(ubq, rq); + /* + * task_registered_buffers may be 0 if buffers were registered off task + * but unregistered on task. Or after UBLK_IO_COMMIT_AND_FETCH_REQ. + */ + if (current == io->task && io->task_registered_buffers) + io->task_registered_buffers--; + else + ublk_put_req_ref(io, rq); } static int ublk_register_io_buf(struct io_uring_cmd *cmd, - const struct ublk_queue *ubq, unsigned int tag, + const struct ublk_queue *ubq, + struct ublk_io *io, unsigned int index, unsigned int issue_flags) { struct ublk_device *ub = cmd->file->private_data; @@ -2014,30 +2108,75 @@ static int ublk_register_io_buf(struct io_uring_cmd *cmd, if (!ublk_support_zero_copy(ubq)) return -EINVAL; - req = __ublk_check_and_get_req(ub, ubq, tag, 0); + req = __ublk_check_and_get_req(ub, ubq, io, 0); if (!req) return -EINVAL; ret = io_buffer_register_bvec(cmd, req, ublk_io_release, index, issue_flags); if (ret) { - ublk_put_req_ref(ubq, req); + ublk_put_req_ref(io, req); return ret; } return 0; } +static int +ublk_daemon_register_io_buf(struct io_uring_cmd *cmd, + const struct ublk_queue *ubq, struct ublk_io *io, + unsigned index, unsigned issue_flags) +{ + unsigned new_registered_buffers; + struct request *req = io->req; + int ret; + + /* + * Ensure there are still references for ublk_sub_req_ref() to release. + * If not, fall back on the thread-safe buffer registration. + */ + new_registered_buffers = io->task_registered_buffers + 1; + if (unlikely(new_registered_buffers >= UBLK_REFCOUNT_INIT)) + return ublk_register_io_buf(cmd, ubq, io, index, issue_flags); + + if (!ublk_support_zero_copy(ubq) || !ublk_rq_has_data(req)) + return -EINVAL; + + ret = io_buffer_register_bvec(cmd, req, ublk_io_release, index, + issue_flags); + if (ret) + return ret; + + io->task_registered_buffers = new_registered_buffers; + return 0; +} + static int ublk_unregister_io_buf(struct io_uring_cmd *cmd, - const struct ublk_queue *ubq, + const struct ublk_device *ub, unsigned int index, unsigned int issue_flags) { - if (!ublk_support_zero_copy(ubq)) + if (!(ub->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY)) return -EINVAL; return io_buffer_unregister_bvec(cmd, index, issue_flags); } +static int ublk_check_fetch_buf(const struct ublk_queue *ubq, __u64 buf_addr) +{ + if (ublk_need_map_io(ubq)) { + /* + * FETCH_RQ has to provide IO buffer if NEED GET + * DATA is not enabled + */ + if (!buf_addr && !ublk_need_get_data(ubq)) + return -EINVAL; + } else if (buf_addr) { + /* User copy requires addr to be unset */ + return -EINVAL; + } + return 0; +} + static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq, struct ublk_io *io, __u64 buf_addr) { @@ -2064,36 +2203,20 @@ static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq, WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV); - if (ublk_need_map_io(ubq)) { - /* - * FETCH_RQ has to provide IO buffer if NEED GET - * DATA is not enabled - */ - if (!buf_addr && !ublk_need_get_data(ubq)) - goto out; - } else if (buf_addr) { - /* User copy requires addr to be unset */ - ret = -EINVAL; + ublk_fill_io_cmd(io, cmd); + ret = ublk_config_io_buf(ubq, io, cmd, buf_addr, NULL); + if (ret) goto out; - } - if (ublk_support_auto_buf_reg(ubq)) { - ret = ublk_set_auto_buf_reg(cmd); - if (ret) - goto out; - } - - ublk_fill_io_cmd(io, cmd, buf_addr); + WRITE_ONCE(io->task, get_task_struct(current)); ublk_mark_io_ready(ub, ubq); out: mutex_unlock(&ub->mutex); return ret; } -static int ublk_commit_and_fetch(const struct ublk_queue *ubq, - struct ublk_io *io, struct io_uring_cmd *cmd, - const struct ublksrv_io_cmd *ub_cmd, - unsigned int issue_flags) +static int ublk_check_commit_and_fetch(const struct ublk_queue *ubq, + struct ublk_io *io, __u64 buf_addr) { struct request *req = io->req; @@ -2102,10 +2225,10 @@ static int ublk_commit_and_fetch(const struct ublk_queue *ubq, * COMMIT_AND_FETCH_REQ has to provide IO buffer if * NEED GET DATA is not enabled or it is Read IO. */ - if (!ub_cmd->addr && (!ublk_need_get_data(ubq) || + if (!buf_addr && (!ublk_need_get_data(ubq) || req_op(req) == REQ_OP_READ)) return -EINVAL; - } else if (req_op(req) != REQ_OP_ZONE_APPEND && ub_cmd->addr) { + } else if (req_op(req) != REQ_OP_ZONE_APPEND && buf_addr) { /* * User copy requires addr to be unset when command is * not zone append @@ -2113,52 +2236,20 @@ static int ublk_commit_and_fetch(const struct ublk_queue *ubq, return -EINVAL; } - if (ublk_support_auto_buf_reg(ubq)) { - int ret; - - /* - * `UBLK_F_AUTO_BUF_REG` only works iff `UBLK_IO_FETCH_REQ` - * and `UBLK_IO_COMMIT_AND_FETCH_REQ` are issued from same - * `io_ring_ctx`. - * - * If this uring_cmd's io_ring_ctx isn't same with the - * one for registering the buffer, it is ublk server's - * responsibility for unregistering the buffer, otherwise - * this ublk request gets stuck. - */ - if (io->flags & UBLK_IO_FLAG_AUTO_BUF_REG) { - struct ublk_rq_data *data = blk_mq_rq_to_pdu(req); - - if (data->buf_ctx_handle == io_uring_cmd_ctx_handle(cmd)) - io_buffer_unregister_bvec(cmd, data->buf_index, - issue_flags); - io->flags &= ~UBLK_IO_FLAG_AUTO_BUF_REG; - } - - ret = ublk_set_auto_buf_reg(cmd); - if (ret) - return ret; - } - - ublk_fill_io_cmd(io, cmd, ub_cmd->addr); - - /* now this cmd slot is owned by ublk driver */ - io->flags &= ~UBLK_IO_FLAG_OWNED_BY_SRV; - io->res = ub_cmd->result; - - if (req_op(req) == REQ_OP_ZONE_APPEND) - req->__sector = ub_cmd->zone_append_lba; - - if (likely(!blk_should_fake_timeout(req->q))) - ublk_put_req_ref(ubq, req); - return 0; } -static bool ublk_get_data(const struct ublk_queue *ubq, struct ublk_io *io) +static bool ublk_need_complete_req(const struct ublk_queue *ubq, + struct ublk_io *io) { - struct request *req = io->req; + if (ublk_need_req_ref(ubq)) + return ublk_sub_req_ref(io); + return true; +} +static bool ublk_get_data(const struct ublk_queue *ubq, struct ublk_io *io, + struct request *req) +{ /* * We have handled UBLK_IO_NEED_GET_DATA command, * so clear UBLK_IO_FLAG_NEED_GET_DATA now and just @@ -2178,39 +2269,72 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags, const struct ublksrv_io_cmd *ub_cmd) { + u16 buf_idx = UBLK_INVALID_BUF_IDX; struct ublk_device *ub = cmd->file->private_data; struct ublk_queue *ubq; struct ublk_io *io; u32 cmd_op = cmd->cmd_op; unsigned tag = ub_cmd->tag; - int ret = -EINVAL; + struct request *req; + int ret; + bool compl; pr_devel("%s: received: cmd op %d queue %d tag %d result %d\n", __func__, cmd->cmd_op, ub_cmd->q_id, tag, ub_cmd->result); + ret = ublk_check_cmd_op(cmd_op); + if (ret) + goto out; + + /* + * io_buffer_unregister_bvec() doesn't access the ubq or io, + * so no need to validate the q_id, tag, or task + */ + if (_IOC_NR(cmd_op) == UBLK_IO_UNREGISTER_IO_BUF) + return ublk_unregister_io_buf(cmd, ub, ub_cmd->addr, + issue_flags); + + ret = -EINVAL; if (ub_cmd->q_id >= ub->dev_info.nr_hw_queues) goto out; ubq = ublk_get_queue(ub, ub_cmd->q_id); - if (ubq->ubq_daemon && ubq->ubq_daemon != current) - goto out; if (tag >= ubq->q_depth) goto out; io = &ubq->ios[tag]; + /* UBLK_IO_FETCH_REQ can be handled on any task, which sets io->task */ + if (unlikely(_IOC_NR(cmd_op) == UBLK_IO_FETCH_REQ)) { + ret = ublk_check_fetch_buf(ubq, ub_cmd->addr); + if (ret) + goto out; + ret = ublk_fetch(cmd, ubq, io, ub_cmd->addr); + if (ret) + goto out; + + ublk_prep_cancel(cmd, issue_flags, ubq, tag); + return -EIOCBQUEUED; + } + + if (READ_ONCE(io->task) != current) { + /* + * ublk_register_io_buf() accesses only the io's refcount, + * so can be handled on any task + */ + if (_IOC_NR(cmd_op) == UBLK_IO_REGISTER_IO_BUF) + return ublk_register_io_buf(cmd, ubq, io, ub_cmd->addr, + issue_flags); - /* there is pending io cmd, something must be wrong */ - if (io->flags & UBLK_IO_FLAG_ACTIVE) { - ret = -EBUSY; goto out; } - /* only UBLK_IO_FETCH_REQ is allowed if io is not OWNED_BY_SRV */ - if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV) && - _IOC_NR(cmd_op) != UBLK_IO_FETCH_REQ) + /* there is pending io cmd, something must be wrong */ + if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)) { + ret = -EBUSY; goto out; + } /* * ensure that the user issues UBLK_IO_NEED_GET_DATA @@ -2220,32 +2344,44 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, ^ (_IOC_NR(cmd_op) == UBLK_IO_NEED_GET_DATA)) goto out; - ret = ublk_check_cmd_op(cmd_op); - if (ret) - goto out; - - ret = -EINVAL; switch (_IOC_NR(cmd_op)) { case UBLK_IO_REGISTER_IO_BUF: - return ublk_register_io_buf(cmd, ubq, tag, ub_cmd->addr, issue_flags); - case UBLK_IO_UNREGISTER_IO_BUF: - return ublk_unregister_io_buf(cmd, ubq, ub_cmd->addr, issue_flags); - case UBLK_IO_FETCH_REQ: - ret = ublk_fetch(cmd, ubq, io, ub_cmd->addr); + return ublk_daemon_register_io_buf(cmd, ubq, io, ub_cmd->addr, + issue_flags); + case UBLK_IO_COMMIT_AND_FETCH_REQ: + ret = ublk_check_commit_and_fetch(ubq, io, ub_cmd->addr); if (ret) goto out; - break; - case UBLK_IO_COMMIT_AND_FETCH_REQ: - ret = ublk_commit_and_fetch(ubq, io, cmd, ub_cmd, issue_flags); + io->res = ub_cmd->result; + req = ublk_fill_io_cmd(io, cmd); + ret = ublk_config_io_buf(ubq, io, cmd, ub_cmd->addr, &buf_idx); + compl = ublk_need_complete_req(ubq, io); + + /* can't touch 'ublk_io' any more */ + if (buf_idx != UBLK_INVALID_BUF_IDX) + io_buffer_unregister_bvec(cmd, buf_idx, issue_flags); + if (req_op(req) == REQ_OP_ZONE_APPEND) + req->__sector = ub_cmd->zone_append_lba; + if (compl) + __ublk_complete_rq(req); + if (ret) goto out; break; case UBLK_IO_NEED_GET_DATA: - io->addr = ub_cmd->addr; - if (!ublk_get_data(ubq, io)) - return -EIOCBQUEUED; - - return UBLK_IO_RES_OK; + /* + * ublk_get_data() may fail and fallback to requeue, so keep + * uring_cmd active first and prepare for handling new requeued + * request + */ + req = ublk_fill_io_cmd(io, cmd); + ret = ublk_config_io_buf(ubq, io, cmd, ub_cmd->addr, NULL); + WARN_ON_ONCE(ret); + if (likely(ublk_get_data(ubq, io, req))) { + __ublk_prep_compl_io_cmd(io, req); + return UBLK_IO_RES_OK; + } + break; default: goto out; } @@ -2259,15 +2395,20 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, } static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub, - const struct ublk_queue *ubq, int tag, size_t offset) + const struct ublk_queue *ubq, struct ublk_io *io, size_t offset) { + unsigned tag = io - ubq->ios; struct request *req; + /* + * can't use io->req in case of concurrent UBLK_IO_COMMIT_AND_FETCH_REQ, + * which would overwrite it with io->cmd + */ req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag); if (!req) return NULL; - if (!ublk_get_req_ref(ubq, req)) + if (!ublk_get_req_ref(io)) return NULL; if (unlikely(!blk_mq_request_started(req) || req->tag != tag)) @@ -2281,7 +2422,7 @@ static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub, return req; fail_put: - ublk_put_req_ref(ubq, req); + ublk_put_req_ref(io, req); return NULL; } @@ -2348,7 +2489,8 @@ static inline bool ublk_check_ubuf_dir(const struct request *req, } static struct request *ublk_check_and_get_req(struct kiocb *iocb, - struct iov_iter *iter, size_t *off, int dir) + struct iov_iter *iter, size_t *off, int dir, + struct ublk_io **io) { struct ublk_device *ub = iocb->ki_filp->private_data; struct ublk_queue *ubq; @@ -2382,7 +2524,8 @@ static struct request *ublk_check_and_get_req(struct kiocb *iocb, if (tag >= ubq->q_depth) return ERR_PTR(-EINVAL); - req = __ublk_check_and_get_req(ub, ubq, tag, buf_off); + *io = &ubq->ios[tag]; + req = __ublk_check_and_get_req(ub, ubq, *io, buf_off); if (!req) return ERR_PTR(-EINVAL); @@ -2395,42 +2538,40 @@ static struct request *ublk_check_and_get_req(struct kiocb *iocb, *off = buf_off; return req; fail: - ublk_put_req_ref(ubq, req); + ublk_put_req_ref(*io, req); return ERR_PTR(-EACCES); } static ssize_t ublk_ch_read_iter(struct kiocb *iocb, struct iov_iter *to) { - struct ublk_queue *ubq; struct request *req; + struct ublk_io *io; size_t buf_off; size_t ret; - req = ublk_check_and_get_req(iocb, to, &buf_off, ITER_DEST); + req = ublk_check_and_get_req(iocb, to, &buf_off, ITER_DEST, &io); if (IS_ERR(req)) return PTR_ERR(req); ret = ublk_copy_user_pages(req, buf_off, to, ITER_DEST); - ubq = req->mq_hctx->driver_data; - ublk_put_req_ref(ubq, req); + ublk_put_req_ref(io, req); return ret; } static ssize_t ublk_ch_write_iter(struct kiocb *iocb, struct iov_iter *from) { - struct ublk_queue *ubq; struct request *req; + struct ublk_io *io; size_t buf_off; size_t ret; - req = ublk_check_and_get_req(iocb, from, &buf_off, ITER_SOURCE); + req = ublk_check_and_get_req(iocb, from, &buf_off, ITER_SOURCE, &io); if (IS_ERR(req)) return PTR_ERR(req); ret = ublk_copy_user_pages(req, buf_off, from, ITER_SOURCE); - ubq = req->mq_hctx->driver_data; - ublk_put_req_ref(ubq, req); + ublk_put_req_ref(io, req); return ret; } @@ -2449,9 +2590,16 @@ static void ublk_deinit_queue(struct ublk_device *ub, int q_id) { int size = ublk_queue_cmd_buf_size(ub, q_id); struct ublk_queue *ubq = ublk_get_queue(ub, q_id); + int i; + + for (i = 0; i < ubq->q_depth; i++) { + struct ublk_io *io = &ubq->ios[i]; + if (io->task) + put_task_struct(io->task); + WARN_ON_ONCE(refcount_read(&io->ref)); + WARN_ON_ONCE(io->task_registered_buffers); + } - if (ubq->ubq_daemon) - put_task_struct(ubq->ubq_daemon); if (ubq->io_cmd_buf) free_pages((unsigned long)ubq->io_cmd_buf, get_order(size)); } @@ -2488,7 +2636,7 @@ static void ublk_deinit_queues(struct ublk_device *ub) for (i = 0; i < nr_queues; i++) ublk_deinit_queue(ub, i); - kfree(ub->__queues); + kvfree(ub->__queues); } static int ublk_init_queues(struct ublk_device *ub) @@ -2499,7 +2647,7 @@ static int ublk_init_queues(struct ublk_device *ub) int i, ret = -ENOMEM; ub->queue_size = ubq_size; - ub->__queues = kcalloc(nr_queues, ubq_size, GFP_KERNEL); + ub->__queues = kvcalloc(nr_queues, ubq_size, GFP_KERNEL); if (!ub->__queues) return ret; @@ -2555,6 +2703,7 @@ static void ublk_cdev_rel(struct device *dev) ublk_deinit_queues(ub); ublk_free_dev_number(ub); mutex_destroy(&ub->mutex); + mutex_destroy(&ub->cancel_mutex); kfree(ub); } @@ -2602,7 +2751,6 @@ static int ublk_add_tag_set(struct ublk_device *ub) ub->tag_set.nr_hw_queues = ub->dev_info.nr_hw_queues; ub->tag_set.queue_depth = ub->dev_info.queue_depth; ub->tag_set.numa_node = NUMA_NO_NODE; - ub->tag_set.cmd_size = sizeof(struct ublk_rq_data); ub->tag_set.driver_data = ub; return blk_mq_alloc_tag_set(&ub->tag_set); } @@ -2704,6 +2852,9 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, if (wait_for_completion_interruptible(&ub->completion) != 0) return -EINTR; + if (ub->ublksrv_tgid != ublksrv_pid) + return -EINVAL; + mutex_lock(&ub->mutex); if (ub->dev_info.state == UBLK_S_DEV_LIVE || test_bit(UB_STATE_USED, &ub->state)) { @@ -2725,8 +2876,8 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, ublk_apply_params(ub); - /* don't probe partitions if any one ubq daemon is un-trusted */ - if (ub->nr_privileged_daemon != ub->nr_queues_ready) + /* don't probe partitions if any daemon task is un-trusted */ + if (ub->unprivileged_daemons) set_bit(GD_SUPPRESS_PART_SCAN, &disk->state); ublk_get_device(ub); @@ -2825,6 +2976,10 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header) if (copy_from_user(&info, argp, sizeof(info))) return -EFAULT; + if (info.queue_depth > UBLK_MAX_QUEUE_DEPTH || !info.queue_depth || + info.nr_hw_queues > UBLK_MAX_NR_QUEUES || !info.nr_hw_queues) + return -EINVAL; + if (capable(CAP_SYS_ADMIN)) info.flags &= ~UBLK_F_UNPRIVILEGED_DEV; else if (!(info.flags & UBLK_F_UNPRIVILEGED_DEV)) @@ -2904,6 +3059,7 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header) goto out_unlock; mutex_init(&ub->mutex); spin_lock_init(&ub->lock); + mutex_init(&ub->cancel_mutex); ret = ublk_alloc_dev_number(ub, header->dev_id); if (ret < 0) @@ -2923,7 +3079,9 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header) ub->dev_info.flags &= UBLK_F_ALL; ub->dev_info.flags |= UBLK_F_CMD_IOCTL_ENCODE | - UBLK_F_URING_CMD_COMP_IN_TASK; + UBLK_F_URING_CMD_COMP_IN_TASK | + UBLK_F_PER_IO_DAEMON | + UBLK_F_BUF_REG_OFF_DAEMON; /* GET_DATA isn't needed any more with USER_COPY or ZERO COPY */ if (ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY | @@ -2973,6 +3131,7 @@ out_free_dev_number: ublk_free_dev_number(ub); out_free_ub: mutex_destroy(&ub->mutex); + mutex_destroy(&ub->cancel_mutex); kfree(ub); out_unlock: mutex_unlock(&ublk_ctl_mutex); @@ -3188,14 +3347,17 @@ static int ublk_ctrl_end_recovery(struct ublk_device *ub, int ublksrv_pid = (int)header->data[0]; int ret = -EINVAL; - pr_devel("%s: Waiting for new ubq_daemons(nr: %d) are ready, dev id %d...\n", - __func__, ub->dev_info.nr_hw_queues, header->dev_id); - /* wait until new ubq_daemon sending all FETCH_REQ */ + pr_devel("%s: Waiting for all FETCH_REQs, dev id %d...\n", __func__, + header->dev_id); + if (wait_for_completion_interruptible(&ub->completion)) return -EINTR; - pr_devel("%s: All new ubq_daemons(nr: %d) are ready, dev id %d\n", - __func__, ub->dev_info.nr_hw_queues, header->dev_id); + pr_devel("%s: All FETCH_REQs received, dev id %d\n", __func__, + header->dev_id); + + if (ub->ublksrv_tgid != ublksrv_pid) + return -EINVAL; mutex_lock(&ub->mutex); if (ublk_nosrv_should_stop_dev(ub)) @@ -3310,7 +3472,7 @@ static int ublk_ctrl_quiesce_dev(struct ublk_device *ub, /* zero means wait forever */ u64 timeout_ms = header->data[0]; struct gendisk *disk; - int i, ret = -ENODEV; + int ret = -ENODEV; if (!(ub->dev_info.flags & UBLK_F_QUIESCE)) return -EOPNOTSUPP; @@ -3327,14 +3489,12 @@ static int ublk_ctrl_quiesce_dev(struct ublk_device *ub, if (ub->dev_info.state != UBLK_S_DEV_LIVE) goto put_disk; - /* Mark all queues as canceling */ + /* Mark the device as canceling */ + mutex_lock(&ub->cancel_mutex); blk_mq_quiesce_queue(disk->queue); - for (i = 0; i < ub->dev_info.nr_hw_queues; i++) { - struct ublk_queue *ubq = ublk_get_queue(ub, i); - - ubq->canceling = true; - } + ublk_set_canceling(ub, true); blk_mq_unquiesce_queue(disk->queue); + mutex_unlock(&ub->cancel_mutex); if (!timeout_ms) timeout_ms = UINT_MAX; |