diff options
Diffstat (limited to 'drivers/nvme/host/ioctl.c')
-rw-r--r-- | drivers/nvme/host/ioctl.c | 317 |
1 files changed, 231 insertions, 86 deletions
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 27614bee7380..81f5550b670d 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -20,19 +20,20 @@ static void __user *nvme_to_user_ptr(uintptr_t ptrval) return (void __user *)ptrval; } -static void *nvme_add_user_metadata(struct bio *bio, void __user *ubuf, - unsigned len, u32 seed, bool write) +static void *nvme_add_user_metadata(struct request *req, void __user *ubuf, + unsigned len, u32 seed) { struct bio_integrity_payload *bip; int ret = -ENOMEM; void *buf; + struct bio *bio = req->bio; buf = kmalloc(len, GFP_KERNEL); if (!buf) goto out; ret = -EFAULT; - if (write && copy_from_user(buf, ubuf, len)) + if ((req_op(req) == REQ_OP_DRV_OUT) && copy_from_user(buf, ubuf, len)) goto out_free_meta; bip = bio_integrity_alloc(bio, GFP_KERNEL, 1); @@ -45,9 +46,13 @@ static void *nvme_add_user_metadata(struct bio *bio, void __user *ubuf, bip->bip_iter.bi_sector = seed; ret = bio_integrity_add_page(bio, virt_to_page(buf), len, offset_in_page(buf)); - if (ret == len) - return buf; - ret = -ENOMEM; + if (ret != len) { + ret = -ENOMEM; + goto out_free_meta; + } + + req->cmd_flags |= REQ_INTEGRITY; + return buf; out_free_meta: kfree(buf); out: @@ -65,90 +70,102 @@ static int nvme_finish_user_metadata(struct request *req, void __user *ubuf, } static struct request *nvme_alloc_user_request(struct request_queue *q, - struct nvme_command *cmd, void __user *ubuffer, - unsigned bufflen, void __user *meta_buffer, unsigned meta_len, - u32 meta_seed, void **metap, unsigned timeout, bool vec, - blk_opf_t rq_flags, blk_mq_req_flags_t blk_flags) + struct nvme_command *cmd, blk_opf_t rq_flags, + blk_mq_req_flags_t blk_flags) { - bool write = nvme_is_write(cmd); - struct nvme_ns *ns = q->queuedata; - struct block_device *bdev = ns ? ns->disk->part0 : NULL; struct request *req; - struct bio *bio = NULL; - void *meta = NULL; - int ret; req = blk_mq_alloc_request(q, nvme_req_op(cmd) | rq_flags, blk_flags); if (IS_ERR(req)) return req; nvme_init_request(req, cmd); - - if (timeout) - req->timeout = timeout; nvme_req(req)->flags |= NVME_REQ_USERCMD; + return req; +} - if (ubuffer && bufflen) { - if (!vec) - ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, - GFP_KERNEL); - else { - struct iovec fast_iov[UIO_FASTIOV]; - struct iovec *iov = fast_iov; - struct iov_iter iter; - - ret = import_iovec(rq_data_dir(req), ubuffer, bufflen, - UIO_FASTIOV, &iov, &iter); - if (ret < 0) - goto out; - ret = blk_rq_map_user_iov(q, req, NULL, &iter, - GFP_KERNEL); - kfree(iov); - } - if (ret) +static int nvme_map_user_request(struct request *req, u64 ubuffer, + unsigned bufflen, void __user *meta_buffer, unsigned meta_len, + u32 meta_seed, void **metap, struct io_uring_cmd *ioucmd, + bool vec) +{ + struct request_queue *q = req->q; + struct nvme_ns *ns = q->queuedata; + struct block_device *bdev = ns ? ns->disk->part0 : NULL; + struct bio *bio = NULL; + void *meta = NULL; + int ret; + + if (ioucmd && (ioucmd->flags & IORING_URING_CMD_FIXED)) { + struct iov_iter iter; + + /* fixedbufs is only for non-vectored io */ + if (WARN_ON_ONCE(vec)) + return -EINVAL; + ret = io_uring_cmd_import_fixed(ubuffer, bufflen, + rq_data_dir(req), &iter, ioucmd); + if (ret < 0) goto out; - bio = req->bio; - if (bdev) - bio_set_dev(bio, bdev); - if (bdev && meta_buffer && meta_len) { - meta = nvme_add_user_metadata(bio, meta_buffer, meta_len, - meta_seed, write); - if (IS_ERR(meta)) { - ret = PTR_ERR(meta); - goto out_unmap; - } - req->cmd_flags |= REQ_INTEGRITY; - *metap = meta; + ret = blk_rq_map_user_iov(q, req, NULL, &iter, GFP_KERNEL); + } else { + ret = blk_rq_map_user_io(req, NULL, nvme_to_user_ptr(ubuffer), + bufflen, GFP_KERNEL, vec, 0, 0, + rq_data_dir(req)); + } + + if (ret) + goto out; + bio = req->bio; + if (bdev) + bio_set_dev(bio, bdev); + + if (bdev && meta_buffer && meta_len) { + meta = nvme_add_user_metadata(req, meta_buffer, meta_len, + meta_seed); + if (IS_ERR(meta)) { + ret = PTR_ERR(meta); + goto out_unmap; } + *metap = meta; } - return req; + return ret; out_unmap: if (bio) blk_rq_unmap_user(bio); out: blk_mq_free_request(req); - return ERR_PTR(ret); + return ret; } static int nvme_submit_user_cmd(struct request_queue *q, - struct nvme_command *cmd, void __user *ubuffer, + struct nvme_command *cmd, u64 ubuffer, unsigned bufflen, void __user *meta_buffer, unsigned meta_len, u32 meta_seed, u64 *result, unsigned timeout, bool vec) { + struct nvme_ctrl *ctrl; struct request *req; void *meta = NULL; struct bio *bio; + u32 effects; int ret; - req = nvme_alloc_user_request(q, cmd, ubuffer, bufflen, meta_buffer, - meta_len, meta_seed, &meta, timeout, vec, 0, 0); + req = nvme_alloc_user_request(q, cmd, 0, 0); if (IS_ERR(req)) return PTR_ERR(req); + req->timeout = timeout; + if (ubuffer && bufflen) { + ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer, + meta_len, meta_seed, &meta, NULL, vec); + if (ret) + return ret; + } + bio = req->bio; + ctrl = nvme_req(req)->ctrl; - ret = nvme_execute_passthru_rq(req); + ret = nvme_execute_passthru_rq(req, &effects); if (result) *result = le64_to_cpu(nvme_req(req)->result.u64); @@ -158,6 +175,10 @@ static int nvme_submit_user_cmd(struct request_queue *q, if (bio) blk_rq_unmap_user(bio); blk_mq_free_request(req); + + if (effects) + nvme_passthru_end(ctrl, effects, cmd, ret); + return ret; } @@ -220,7 +241,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) c.rw.appmask = cpu_to_le16(io.appmask); return nvme_submit_user_cmd(ns->queue, &c, - nvme_to_user_ptr(io.addr), length, + io.addr, length, metadata, meta_len, lower_32_bits(io.slba), NULL, 0, false); } @@ -274,7 +295,7 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, timeout = msecs_to_jiffies(cmd.timeout_ms); status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, - nvme_to_user_ptr(cmd.addr), cmd.data_len, + cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata), cmd.metadata_len, 0, &result, timeout, false); @@ -320,7 +341,7 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, timeout = msecs_to_jiffies(cmd.timeout_ms); status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, - nvme_to_user_ptr(cmd.addr), cmd.data_len, + cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata), cmd.metadata_len, 0, &cmd.result, timeout, vec); @@ -349,9 +370,15 @@ struct nvme_uring_cmd_pdu { struct bio *bio; struct request *req; }; - void *meta; /* kernel-resident buffer */ - void __user *meta_buffer; u32 meta_len; + u32 nvme_status; + union { + struct { + void *meta; /* kernel-resident buffer */ + void __user *meta_buffer; + }; + u64 result; + } u; }; static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu( @@ -360,11 +387,10 @@ static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu( return (struct nvme_uring_cmd_pdu *)&ioucmd->pdu; } -static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd) +static void nvme_uring_task_meta_cb(struct io_uring_cmd *ioucmd) { struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); struct request *req = pdu->req; - struct bio *bio = req->bio; int status; u64 result; @@ -375,27 +401,72 @@ static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd) result = le64_to_cpu(nvme_req(req)->result.u64); - if (pdu->meta) - status = nvme_finish_user_metadata(req, pdu->meta_buffer, - pdu->meta, pdu->meta_len, status); - if (bio) - blk_rq_unmap_user(bio); + if (pdu->meta_len) + status = nvme_finish_user_metadata(req, pdu->u.meta_buffer, + pdu->u.meta, pdu->meta_len, status); + if (req->bio) + blk_rq_unmap_user(req->bio); blk_mq_free_request(req); io_uring_cmd_done(ioucmd, status, result); } -static void nvme_uring_cmd_end_io(struct request *req, blk_status_t err) +static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd) +{ + struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); + + if (pdu->bio) + blk_rq_unmap_user(pdu->bio); + + io_uring_cmd_done(ioucmd, pdu->nvme_status, pdu->u.result); +} + +static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, + blk_status_t err) { struct io_uring_cmd *ioucmd = req->end_io_data; struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); - /* extract bio before reusing the same field for request */ - struct bio *bio = pdu->bio; + void *cookie = READ_ONCE(ioucmd->cookie); + req->bio = pdu->bio; + if (nvme_req(req)->flags & NVME_REQ_CANCELLED) + pdu->nvme_status = -EINTR; + else + pdu->nvme_status = nvme_req(req)->status; + pdu->u.result = le64_to_cpu(nvme_req(req)->result.u64); + + /* + * For iopoll, complete it directly. + * Otherwise, move the completion to task work. + */ + if (cookie != NULL && blk_rq_is_poll(req)) + nvme_uring_task_cb(ioucmd); + else + io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_cb); + + return RQ_END_IO_FREE; +} + +static enum rq_end_io_ret nvme_uring_cmd_end_io_meta(struct request *req, + blk_status_t err) +{ + struct io_uring_cmd *ioucmd = req->end_io_data; + struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); + void *cookie = READ_ONCE(ioucmd->cookie); + + req->bio = pdu->bio; pdu->req = req; - req->bio = bio; - /* this takes care of moving rest of completion-work to task context */ - io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_cb); + + /* + * For iopoll, complete it directly. + * Otherwise, move the completion to task work. + */ + if (cookie != NULL && blk_rq_is_poll(req)) + nvme_uring_task_meta_cb(ioucmd); + else + io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_meta_cb); + + return RQ_END_IO_NONE; } static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, @@ -410,6 +481,7 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, blk_opf_t rq_flags = 0; blk_mq_req_flags_t blk_flags = 0; void *meta = NULL; + int ret; if (!capable(CAP_SYS_ADMIN)) return -EACCES; @@ -445,23 +517,45 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, rq_flags = REQ_NOWAIT; blk_flags = BLK_MQ_REQ_NOWAIT; } + if (issue_flags & IO_URING_F_IOPOLL) + rq_flags |= REQ_POLLED; - req = nvme_alloc_user_request(q, &c, nvme_to_user_ptr(d.addr), - d.data_len, nvme_to_user_ptr(d.metadata), - d.metadata_len, 0, &meta, d.timeout_ms ? - msecs_to_jiffies(d.timeout_ms) : 0, vec, rq_flags, - blk_flags); +retry: + req = nvme_alloc_user_request(q, &c, rq_flags, blk_flags); if (IS_ERR(req)) return PTR_ERR(req); - req->end_io = nvme_uring_cmd_end_io; - req->end_io_data = ioucmd; + req->timeout = d.timeout_ms ? msecs_to_jiffies(d.timeout_ms) : 0; + + if (d.addr && d.data_len) { + ret = nvme_map_user_request(req, d.addr, + d.data_len, nvme_to_user_ptr(d.metadata), + d.metadata_len, 0, &meta, ioucmd, vec); + if (ret) + return ret; + } + if (issue_flags & IO_URING_F_IOPOLL && rq_flags & REQ_POLLED) { + if (unlikely(!req->bio)) { + /* we can't poll this, so alloc regular req instead */ + blk_mq_free_request(req); + rq_flags &= ~REQ_POLLED; + goto retry; + } else { + WRITE_ONCE(ioucmd->cookie, req->bio); + req->bio->bi_opf |= REQ_POLLED; + } + } /* to free bio on completion, as req->bio will be null at that time */ pdu->bio = req->bio; - pdu->meta = meta; - pdu->meta_buffer = nvme_to_user_ptr(d.metadata); pdu->meta_len = d.metadata_len; - + req->end_io_data = ioucmd; + if (pdu->meta_len) { + pdu->u.meta = meta; + pdu->u.meta_buffer = nvme_to_user_ptr(d.metadata); + req->end_io = nvme_uring_cmd_end_io_meta; + } else { + req->end_io = nvme_uring_cmd_end_io; + } blk_execute_rq_nowait(req, false); return -EIOCBQUEUED; } @@ -559,9 +653,6 @@ long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) static int nvme_uring_cmd_checks(unsigned int issue_flags) { - /* IOPOLL not supported yet */ - if (issue_flags & IO_URING_F_IOPOLL) - return -EOPNOTSUPP; /* NVMe passthrough requires big SQE/CQE support */ if ((issue_flags & (IO_URING_F_SQE128|IO_URING_F_CQE32)) != @@ -604,6 +695,25 @@ int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) return nvme_ns_uring_cmd(ns, ioucmd, issue_flags); } +int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, + struct io_comp_batch *iob, + unsigned int poll_flags) +{ + struct bio *bio; + int ret = 0; + struct nvme_ns *ns; + struct request_queue *q; + + rcu_read_lock(); + bio = READ_ONCE(ioucmd->cookie); + ns = container_of(file_inode(ioucmd->file)->i_cdev, + struct nvme_ns, cdev); + q = ns->queue; + if (test_bit(QUEUE_FLAG_POLL, &q->queue_flags) && bio && bio->bi_bdev) + ret = bio_poll(bio, iob, poll_flags); + rcu_read_unlock(); + return ret; +} #ifdef CONFIG_NVME_MULTIPATH static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *argp, struct nvme_ns_head *head, int srcu_idx) @@ -685,6 +795,31 @@ int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd, srcu_read_unlock(&head->srcu, srcu_idx); return ret; } + +int nvme_ns_head_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, + struct io_comp_batch *iob, + unsigned int poll_flags) +{ + struct cdev *cdev = file_inode(ioucmd->file)->i_cdev; + struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev); + int srcu_idx = srcu_read_lock(&head->srcu); + struct nvme_ns *ns = nvme_find_path(head); + struct bio *bio; + int ret = 0; + struct request_queue *q; + + if (ns) { + rcu_read_lock(); + bio = READ_ONCE(ioucmd->cookie); + q = ns->queue; + if (test_bit(QUEUE_FLAG_POLL, &q->queue_flags) && bio + && bio->bi_bdev) + ret = bio_poll(bio, iob, poll_flags); + rcu_read_unlock(); + } + srcu_read_unlock(&head->srcu, srcu_idx); + return ret; +} #endif /* CONFIG_NVME_MULTIPATH */ int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) @@ -692,6 +827,10 @@ int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) struct nvme_ctrl *ctrl = ioucmd->file->private_data; int ret; + /* IOPOLL not supported yet */ + if (issue_flags & IO_URING_F_IOPOLL) + return -EOPNOTSUPP; + ret = nvme_uring_cmd_checks(issue_flags); if (ret) return ret; @@ -757,11 +896,17 @@ long nvme_dev_ioctl(struct file *file, unsigned int cmd, case NVME_IOCTL_IO_CMD: return nvme_dev_user_cmd(ctrl, argp); case NVME_IOCTL_RESET: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; dev_warn(ctrl->device, "resetting controller\n"); return nvme_reset_ctrl_sync(ctrl); case NVME_IOCTL_SUBSYS_RESET: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; return nvme_reset_subsystem(ctrl); case NVME_IOCTL_RESCAN: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; nvme_queue_scan(ctrl); return 0; default: |