diff options
-rw-r--r-- | fs/aio.c | 9 | ||||
-rw-r--r-- | fs/io_uring.c | 338 | ||||
-rw-r--r-- | fs/splice.c | 8 | ||||
-rw-r--r-- | include/linux/socket.h | 7 | ||||
-rw-r--r-- | include/linux/uio.h | 4 | ||||
-rw-r--r-- | include/uapi/linux/io_uring.h | 4 | ||||
-rw-r--r-- | lib/iov_iter.c | 15 | ||||
-rw-r--r-- | net/compat.c | 3 | ||||
-rw-r--r-- | net/socket.c | 18 |
9 files changed, 330 insertions, 76 deletions
@@ -1479,8 +1479,9 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb) return 0; } -static int aio_setup_rw(int rw, const struct iocb *iocb, struct iovec **iovec, - bool vectored, bool compat, struct iov_iter *iter) +static ssize_t aio_setup_rw(int rw, const struct iocb *iocb, + struct iovec **iovec, bool vectored, bool compat, + struct iov_iter *iter) { void __user *buf = (void __user *)(uintptr_t)iocb->aio_buf; size_t len = iocb->aio_nbytes; @@ -1537,7 +1538,7 @@ static int aio_read(struct kiocb *req, const struct iocb *iocb, return -EINVAL; ret = aio_setup_rw(READ, iocb, &iovec, vectored, compat, &iter); - if (ret) + if (ret < 0) return ret; ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter)); if (!ret) @@ -1565,7 +1566,7 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb, return -EINVAL; ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter); - if (ret) + if (ret < 0) return ret; ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter)); if (!ret) { diff --git a/fs/io_uring.c b/fs/io_uring.c index 4ed4b110a154..3fd884b4e0be 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -231,6 +231,7 @@ struct io_ring_ctx { struct task_struct *sqo_thread; /* if using sq thread polling */ struct mm_struct *sqo_mm; wait_queue_head_t sqo_wait; + struct completion sqo_thread_started; struct { /* CQ ring */ @@ -322,6 +323,7 @@ struct io_kiocb { struct io_ring_ctx *ctx; struct list_head list; + struct list_head link_list; unsigned int flags; refcount_t refs; #define REQ_F_NOWAIT 1 /* must not punt to workers */ @@ -330,8 +332,10 @@ struct io_kiocb { #define REQ_F_SEQ_PREV 8 /* sequential with previous */ #define REQ_F_IO_DRAIN 16 /* drain existing IO first */ #define REQ_F_IO_DRAINED 32 /* drain done */ +#define REQ_F_LINK 64 /* linked sqes */ +#define REQ_F_FAIL_LINK 128 /* fail rest of links */ u64 user_data; - u32 error; /* iopoll result from callback */ + u32 result; u32 sequence; struct work_struct work; @@ -403,6 +407,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) ctx->flags = p->flags; init_waitqueue_head(&ctx->cq_wait); init_completion(&ctx->ctx_done); + init_completion(&ctx->sqo_thread_started); mutex_init(&ctx->uring_lock); init_waitqueue_head(&ctx->wait); for (i = 0; i < ARRAY_SIZE(ctx->pending_async); i++) { @@ -584,6 +589,7 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx, req->flags = 0; /* one is dropped after submission, the other at completion */ refcount_set(&req->refs, 2); + req->result = 0; return req; out: io_ring_drop_ctx_refs(ctx, 1); @@ -599,7 +605,7 @@ static void io_free_req_many(struct io_ring_ctx *ctx, void **reqs, int *nr) } } -static void io_free_req(struct io_kiocb *req) +static void __io_free_req(struct io_kiocb *req) { if (req->file && !(req->flags & REQ_F_FIXED_FILE)) fput(req->file); @@ -607,6 +613,63 @@ static void io_free_req(struct io_kiocb *req) kmem_cache_free(req_cachep, req); } +static void io_req_link_next(struct io_kiocb *req) +{ + struct io_kiocb *nxt; + + /* + * The list should never be empty when we are called here. But could + * potentially happen if the chain is messed up, check to be on the + * safe side. + */ + nxt = list_first_entry_or_null(&req->link_list, struct io_kiocb, list); + if (nxt) { + list_del(&nxt->list); + if (!list_empty(&req->link_list)) { + INIT_LIST_HEAD(&nxt->link_list); + list_splice(&req->link_list, &nxt->link_list); + nxt->flags |= REQ_F_LINK; + } + + INIT_WORK(&nxt->work, io_sq_wq_submit_work); + queue_work(req->ctx->sqo_wq, &nxt->work); + } +} + +/* + * Called if REQ_F_LINK is set, and we fail the head request + */ +static void io_fail_links(struct io_kiocb *req) +{ + struct io_kiocb *link; + + while (!list_empty(&req->link_list)) { + link = list_first_entry(&req->link_list, struct io_kiocb, list); + list_del(&link->list); + + io_cqring_add_event(req->ctx, link->user_data, -ECANCELED); + __io_free_req(link); + } +} + +static void io_free_req(struct io_kiocb *req) +{ + /* + * If LINK is set, we have dependent requests in this chain. If we + * didn't fail this request, queue the first one up, moving any other + * dependencies to the next request. In case of failure, fail the rest + * of the chain. + */ + if (req->flags & REQ_F_LINK) { + if (req->flags & REQ_F_FAIL_LINK) + io_fail_links(req); + else + io_req_link_next(req); + } + + __io_free_req(req); +} + static void io_put_req(struct io_kiocb *req) { if (refcount_dec_and_test(&req->refs)) @@ -628,16 +691,17 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, req = list_first_entry(done, struct io_kiocb, list); list_del(&req->list); - io_cqring_fill_event(ctx, req->user_data, req->error); + io_cqring_fill_event(ctx, req->user_data, req->result); (*nr_events)++; if (refcount_dec_and_test(&req->refs)) { /* If we're not using fixed files, we have to pair the * completion part with the file put. Use regular * completions for those, only batch free for fixed - * file. + * file and non-linked commands. */ - if (req->flags & REQ_F_FIXED_FILE) { + if ((req->flags & (REQ_F_FIXED_FILE|REQ_F_LINK)) == + REQ_F_FIXED_FILE) { reqs[to_free++] = req; if (to_free == ARRAY_SIZE(reqs)) io_free_req_many(ctx, reqs, &to_free); @@ -776,6 +840,8 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2) kiocb_end_write(kiocb); + if ((req->flags & REQ_F_LINK) && res != req->result) + req->flags |= REQ_F_FAIL_LINK; io_cqring_add_event(req->ctx, req->user_data, res); io_put_req(req); } @@ -786,7 +852,9 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) kiocb_end_write(kiocb); - req->error = res; + if ((req->flags & REQ_F_LINK) && res != req->result) + req->flags |= REQ_F_FAIL_LINK; + req->result = res; if (res != -EAGAIN) req->flags |= REQ_F_IOPOLL_COMPLETED; } @@ -929,7 +997,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s, !kiocb->ki_filp->f_op->iopoll) return -EOPNOTSUPP; - req->error = 0; kiocb->ki_flags |= IOCB_HIPRI; kiocb->ki_complete = io_complete_rw_iopoll; } else { @@ -1001,9 +1068,9 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw, return 0; } -static int io_import_iovec(struct io_ring_ctx *ctx, int rw, - const struct sqe_submit *s, struct iovec **iovec, - struct iov_iter *iter) +static ssize_t io_import_iovec(struct io_ring_ctx *ctx, int rw, + const struct sqe_submit *s, struct iovec **iovec, + struct iov_iter *iter) { const struct io_uring_sqe *sqe = s->sqe; void __user *buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); @@ -1021,7 +1088,7 @@ static int io_import_iovec(struct io_ring_ctx *ctx, int rw, opcode = READ_ONCE(sqe->opcode); if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) { - int ret = io_import_fixed(ctx, rw, sqe, iter); + ssize_t ret = io_import_fixed(ctx, rw, sqe, iter); *iovec = NULL; return ret; } @@ -1087,7 +1154,7 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s, struct iov_iter iter; struct file *file; size_t iov_count; - int ret; + ssize_t read_size, ret; ret = io_prep_rw(req, s, force_nonblock); if (ret) @@ -1100,16 +1167,30 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s, return -EINVAL; ret = io_import_iovec(req->ctx, READ, s, &iovec, &iter); - if (ret) + if (ret < 0) return ret; + read_size = ret; + if (req->flags & REQ_F_LINK) + req->result = read_size; + iov_count = iov_iter_count(&iter); ret = rw_verify_area(READ, file, &kiocb->ki_pos, iov_count); if (!ret) { ssize_t ret2; - /* Catch -EAGAIN return for forced non-blocking submission */ ret2 = call_read_iter(file, kiocb, &iter); + /* + * In case of a short read, punt to async. This can happen + * if we have data partially cached. Alternatively we can + * return the short read, in which case the application will + * need to issue another SQE and wait for it. That SQE will + * need async punt anyway, so it's more efficient to do it + * here. + */ + if (force_nonblock && ret2 > 0 && ret2 < read_size) + ret2 = -EAGAIN; + /* Catch -EAGAIN return for forced non-blocking submission */ if (!force_nonblock || ret2 != -EAGAIN) { io_rw_done(kiocb, ret2); } else { @@ -1134,7 +1215,7 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s, struct iov_iter iter; struct file *file; size_t iov_count; - int ret; + ssize_t ret; ret = io_prep_rw(req, s, force_nonblock); if (ret) @@ -1147,9 +1228,12 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s, return -EINVAL; ret = io_import_iovec(req->ctx, WRITE, s, &iovec, &iter); - if (ret) + if (ret < 0) return ret; + if (req->flags & REQ_F_LINK) + req->result = ret; + iov_count = iov_iter_count(&iter); ret = -EAGAIN; @@ -1253,6 +1337,8 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe, end > 0 ? end : LLONG_MAX, fsync_flags & IORING_FSYNC_DATASYNC); + if (ret < 0 && (req->flags & REQ_F_LINK)) + req->flags |= REQ_F_FAIL_LINK; io_cqring_add_event(req->ctx, sqe->user_data, ret); io_put_req(req); return 0; @@ -1297,11 +1383,70 @@ static int io_sync_file_range(struct io_kiocb *req, ret = sync_file_range(req->rw.ki_filp, sqe_off, sqe_len, flags); + if (ret < 0 && (req->flags & REQ_F_LINK)) + req->flags |= REQ_F_FAIL_LINK; io_cqring_add_event(req->ctx, sqe->user_data, ret); io_put_req(req); return 0; } +#if defined(CONFIG_NET) +static int io_send_recvmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe, + bool force_nonblock, + long (*fn)(struct socket *, struct user_msghdr __user *, + unsigned int)) +{ + struct socket *sock; + int ret; + + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; + + sock = sock_from_file(req->file, &ret); + if (sock) { + struct user_msghdr __user *msg; + unsigned flags; + + flags = READ_ONCE(sqe->msg_flags); + if (flags & MSG_DONTWAIT) + req->flags |= REQ_F_NOWAIT; + else if (force_nonblock) + flags |= MSG_DONTWAIT; + + msg = (struct user_msghdr __user *) (unsigned long) + READ_ONCE(sqe->addr); + + ret = fn(sock, msg, flags); + if (force_nonblock && ret == -EAGAIN) + return ret; + } + + io_cqring_add_event(req->ctx, sqe->user_data, ret); + io_put_req(req); + return 0; +} +#endif + +static int io_sendmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe, + bool force_nonblock) +{ +#if defined(CONFIG_NET) + return io_send_recvmsg(req, sqe, force_nonblock, __sys_sendmsg_sock); +#else + return -EOPNOTSUPP; +#endif +} + +static int io_recvmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe, + bool force_nonblock) +{ +#if defined(CONFIG_NET) + return io_send_recvmsg(req, sqe, force_nonblock, __sys_recvmsg_sock); +#else + return -EOPNOTSUPP; +#endif +} + static void io_poll_remove_one(struct io_kiocb *req) { struct io_poll_iocb *poll = &req->poll; @@ -1549,9 +1694,10 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, { int ret, opcode; + req->user_data = READ_ONCE(s->sqe->user_data); + if (unlikely(s->index >= ctx->sq_entries)) return -EINVAL; - req->user_data = READ_ONCE(s->sqe->user_data); opcode = READ_ONCE(s->sqe->opcode); switch (opcode) { @@ -1586,6 +1732,12 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, case IORING_OP_SYNC_FILE_RANGE: ret = io_sync_file_range(req, s->sqe, force_nonblock); break; + case IORING_OP_SENDMSG: + ret = io_sendmsg(req, s->sqe, force_nonblock); + break; + case IORING_OP_RECVMSG: + ret = io_recvmsg(req, s->sqe, force_nonblock); + break; default: ret = -EINVAL; break; @@ -1595,7 +1747,7 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, return ret; if (ctx->flags & IORING_SETUP_IOPOLL) { - if (req->error == -EAGAIN) + if (req->result == -EAGAIN) return -EAGAIN; /* workqueue context doesn't hold uring_lock, grab it now */ @@ -1819,31 +1971,11 @@ static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s, return 0; } -static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s, - struct io_submit_state *state) +static int io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, + struct sqe_submit *s) { - struct io_kiocb *req; int ret; - /* enforce forwards compatibility on users */ - if (unlikely(s->sqe->flags & ~(IOSQE_FIXED_FILE | IOSQE_IO_DRAIN))) - return -EINVAL; - - req = io_get_req(ctx, state); - if (unlikely(!req)) - return -EAGAIN; - - ret = io_req_set_file(ctx, s, state, req); - if (unlikely(ret)) - goto out; - - ret = io_req_defer(ctx, req, s->sqe); - if (ret) { - if (ret == -EIOCBQUEUED) - ret = 0; - return ret; - } - ret = __io_submit_sqe(ctx, req, s, true); if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) { struct io_uring_sqe *sqe_copy; @@ -1866,24 +1998,93 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s, /* * Queued up for async execution, worker will release - * submit reference when the iocb is actually - * submitted. + * submit reference when the iocb is actually submitted. */ return 0; } } -out: /* drop submission reference */ io_put_req(req); /* and drop final reference, if we failed */ - if (ret) + if (ret) { + io_cqring_add_event(ctx, req->user_data, ret); + if (req->flags & REQ_F_LINK) + req->flags |= REQ_F_FAIL_LINK; io_put_req(req); + } return ret; } +#define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK) + +static void io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s, + struct io_submit_state *state, struct io_kiocb **link) +{ + struct io_uring_sqe *sqe_copy; + struct io_kiocb *req; + int ret; + + /* enforce forwards compatibility on users */ + if (unlikely(s->sqe->flags & ~SQE_VALID_FLAGS)) { + ret = -EINVAL; + goto err; + } + + req = io_get_req(ctx, state); + if (unlikely(!req)) { + ret = -EAGAIN; + goto err; + } + + ret = io_req_set_file(ctx, s, state, req); + if (unlikely(ret)) { +err_req: + io_free_req(req); +err: + io_cqring_add_event(ctx, s->sqe->user_data, ret); + return; + } + + ret = io_req_defer(ctx, req, s->sqe); + if (ret) { + if (ret != -EIOCBQUEUED) + goto err_req; + return; + } + + /* + * If we already have a head request, queue this one for async + * submittal once the head completes. If we don't have a head but + * IOSQE_IO_LINK is set in the sqe, start a new head. This one will be + * submitted sync once the chain is complete. If none of those + * conditions are true (normal request), then just queue it. + */ + if (*link) { + struct io_kiocb *prev = *link; + + sqe_copy = kmemdup(s->sqe, sizeof(*sqe_copy), GFP_KERNEL); + if (!sqe_copy) { + ret = -EAGAIN; + goto err_req; + } + + s->sqe = sqe_copy; + memcpy(&req->submit, s, sizeof(*s)); + list_add_tail(&req->list, &prev->link_list); + } else if (s->sqe->flags & IOSQE_IO_LINK) { + req->flags |= REQ_F_LINK; + + memcpy(&req->submit, s, sizeof(*s)); + INIT_LIST_HEAD(&req->link_list); + *link = req; + } else { + io_queue_sqe(ctx, req, s); + } +} + /* * Batched submission is done, ensure local IO is flushed out. */ @@ -1966,7 +2167,9 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes, unsigned int nr, bool has_user, bool mm_fault) { struct io_submit_state state, *statep = NULL; - int ret, i, submitted = 0; + struct io_kiocb *link = NULL; + bool prev_was_link = false; + int i, submitted = 0; if (nr > IO_PLUG_THRESHOLD) { io_submit_state_start(&state, ctx, nr); @@ -1974,22 +2177,30 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes, } for (i = 0; i < nr; i++) { + /* + * If previous wasn't linked and we have a linked command, + * that's the end of the chain. Submit the previous link. + */ + if (!prev_was_link && link) { + io_queue_sqe(ctx, link, &link->submit); + link = NULL; + } + prev_was_link = (sqes[i].sqe->flags & IOSQE_IO_LINK) != 0; + if (unlikely(mm_fault)) { - ret = -EFAULT; + io_cqring_add_event(ctx, sqes[i].sqe->user_data, + -EFAULT); } else { sqes[i].has_user = has_user; sqes[i].needs_lock = true; sqes[i].needs_fixed_file = true; - ret = io_submit_sqe(ctx, &sqes[i], statep); - } - if (!ret) { + io_submit_sqe(ctx, &sqes[i], statep, &link); submitted++; - continue; } - - io_cqring_add_event(ctx, sqes[i].sqe->user_data, ret); } + if (link) + io_queue_sqe(ctx, link, &link->submit); if (statep) io_submit_state_end(&state); @@ -2006,6 +2217,8 @@ static int io_sq_thread(void *data) unsigned inflight; unsigned long timeout; + complete(&ctx->sqo_thread_started); + old_fs = get_fs(); set_fs(USER_DS); @@ -2130,6 +2343,8 @@ static int io_sq_thread(void *data) static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit) { struct io_submit_state state, *statep = NULL; + struct io_kiocb *link = NULL; + bool prev_was_link = false; int i, submit = 0; if (to_submit > IO_PLUG_THRESHOLD) { @@ -2139,22 +2354,30 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit) for (i = 0; i < to_submit; i++) { struct sqe_submit s; - int ret; if (!io_get_sqring(ctx, &s)) break; + /* + * If previous wasn't linked and we have a linked command, + * that's the end of the chain. Submit the previous link. + */ + if (!prev_was_link && link) { + io_queue_sqe(ctx, link, &link->submit); + link = NULL; + } + prev_was_link = (s.sqe->flags & IOSQE_IO_LINK) != 0; + s.has_user = true; s.needs_lock = false; s.needs_fixed_file = false; submit++; - - ret = io_submit_sqe(ctx, &s, statep); - if (ret) - io_cqring_add_event(ctx, s.sqe->user_data, ret); + io_submit_sqe(ctx, &s, statep, &link); } io_commit_sqring(ctx); + if (link) + io_queue_sqe(ctx, link, &link->submit); if (statep) io_submit_state_end(statep); @@ -2240,6 +2463,7 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx) static void io_sq_thread_stop(struct io_ring_ctx *ctx) { if (ctx->sqo_thread) { + wait_for_completion(&ctx->sqo_thread_started); /* * The park is a bit of a work-around, without it we get * warning spews on shutdown with SQPOLL set and affinity diff --git a/fs/splice.c b/fs/splice.c index 14cb602d9a2f..98412721f056 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1356,7 +1356,7 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov, struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; struct iov_iter iter; - long error; + ssize_t error; struct fd f; int type; @@ -1367,7 +1367,7 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov, error = import_iovec(type, uiov, nr_segs, ARRAY_SIZE(iovstack), &iov, &iter); - if (!error) { + if (error >= 0) { error = do_vmsplice(f.file, &iter, flags); kfree(iov); } @@ -1382,7 +1382,7 @@ COMPAT_SYSCALL_DEFINE4(vmsplice, int, fd, const struct compat_iovec __user *, io struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; struct iov_iter iter; - long error; + ssize_t error; struct fd f; int type; @@ -1393,7 +1393,7 @@ COMPAT_SYSCALL_DEFINE4(vmsplice, int, fd, const struct compat_iovec __user *, io error = compat_import_iovec(type, iov32, nr_segs, ARRAY_SIZE(iovstack), &iov, &iter); - if (!error) { + if (error >= 0) { error = do_vmsplice(f.file, &iter, flags); kfree(iov); } diff --git a/include/linux/socket.h b/include/linux/socket.h index b57cd8bf96e2..97523818cb14 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -12,6 +12,7 @@ struct pid; struct cred; +struct socket; #define __sockaddr_check_size(size) \ BUILD_BUG_ON(((size) > sizeof(struct __kernel_sockaddr_storage))) @@ -374,6 +375,12 @@ extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, unsigned int flags, bool forbid_cmsg_compat); +extern long __sys_sendmsg_sock(struct socket *sock, + struct user_msghdr __user *msg, + unsigned int flags); +extern long __sys_recvmsg_sock(struct socket *sock, + struct user_msghdr __user *msg, + unsigned int flags); /* helpers which do the actual work for syscalls */ extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size, diff --git a/include/linux/uio.h b/include/linux/uio.h index cea1761c5672..ab5f523bc0df 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -267,13 +267,13 @@ bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, struct iov_iter *i); -int import_iovec(int type, const struct iovec __user * uvector, +ssize_t import_iovec(int type, const struct iovec __user * uvector, unsigned nr_segs, unsigned fast_segs, struct iovec **iov, struct iov_iter *i); #ifdef CONFIG_COMPAT struct compat_iovec; -int compat_import_iovec(int type, const struct compat_iovec __user * uvector, +ssize_t compat_import_iovec(int type, const struct compat_iovec __user * uvector, unsigned nr_segs, unsigned fast_segs, struct iovec **iov, struct iov_iter *i); #endif diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index a0c460025036..1e1652f25cc1 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -27,6 +27,7 @@ struct io_uring_sqe { __u32 fsync_flags; __u16 poll_events; __u32 sync_range_flags; + __u32 msg_flags; }; __u64 user_data; /* data to be passed back at completion time */ union { @@ -40,6 +41,7 @@ struct io_uring_sqe { */ #define IOSQE_FIXED_FILE (1U << 0) /* use fixed fileset */ #define IOSQE_IO_DRAIN (1U << 1) /* issue after inflight IO */ +#define IOSQE_IO_LINK (1U << 2) /* links next sqe */ /* * io_uring_setup() flags @@ -57,6 +59,8 @@ struct io_uring_sqe { #define IORING_OP_POLL_ADD 6 #define IORING_OP_POLL_REMOVE 7 #define IORING_OP_SYNC_FILE_RANGE 8 +#define IORING_OP_SENDMSG 9 +#define IORING_OP_RECVMSG 10 /* * sqe->fsync_flags diff --git a/lib/iov_iter.c b/lib/iov_iter.c index f99c41d4eb54..f1e0569b4539 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1634,9 +1634,9 @@ EXPORT_SYMBOL(dup_iter); * on-stack array was used or not (and regardless of whether this function * returns an error or not). * - * Return: 0 on success or negative error code on error. + * Return: Negative error code on error, bytes imported on success */ -int import_iovec(int type, const struct iovec __user * uvector, +ssize_t import_iovec(int type, const struct iovec __user * uvector, unsigned nr_segs, unsigned fast_segs, struct iovec **iov, struct iov_iter *i) { @@ -1652,16 +1652,17 @@ int import_iovec(int type, const struct iovec __user * uvector, } iov_iter_init(i, type, p, nr_segs, n); *iov = p == *iov ? NULL : p; - return 0; + return n; } EXPORT_SYMBOL(import_iovec); #ifdef CONFIG_COMPAT #include <linux/compat.h> -int compat_import_iovec(int type, const struct compat_iovec __user * uvector, - unsigned nr_segs, unsigned fast_segs, - struct iovec **iov, struct iov_iter *i) +ssize_t compat_import_iovec(int type, + const struct compat_iovec __user * uvector, + unsigned nr_segs, unsigned fast_segs, + struct iovec **iov, struct iov_iter *i) { ssize_t n; struct iovec *p; @@ -1675,7 +1676,7 @@ int compat_import_iovec(int type, const struct compat_iovec __user * uvector, } iov_iter_init(i, type, p, nr_segs, n); *iov = p == *iov ? NULL : p; - return 0; + return n; } #endif diff --git a/net/compat.c b/net/compat.c index 3f9ce609397f..0f7ded26059e 100644 --- a/net/compat.c +++ b/net/compat.c @@ -80,9 +80,10 @@ int get_compat_msghdr(struct msghdr *kmsg, kmsg->msg_iocb = NULL; - return compat_import_iovec(save_addr ? READ : WRITE, + err = compat_import_iovec(save_addr ? READ : WRITE, compat_ptr(msg.msg_iov), msg.msg_iovlen, UIO_FASTIOV, iov, &kmsg->msg_iter); + return err < 0 ? err : 0; } /* Bleech... */ diff --git a/net/socket.c b/net/socket.c index 16449d6daeca..293d56836f01 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2222,9 +2222,10 @@ static int copy_msghdr_from_user(struct msghdr *kmsg, kmsg->msg_iocb = NULL; - return import_iovec(save_addr ? READ : WRITE, + err = import_iovec(save_addr ? READ : WRITE, msg.msg_iov, msg.msg_iovlen, UIO_FASTIOV, iov, &kmsg->msg_iter); + return err < 0 ? err : 0; } static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, @@ -2326,6 +2327,13 @@ out_freeiov: /* * BSD sendmsg interface */ +long __sys_sendmsg_sock(struct socket *sock, struct user_msghdr __user *msg, + unsigned int flags) +{ + struct msghdr msg_sys; + + return ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0); +} long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags, bool forbid_cmsg_compat) @@ -2500,6 +2508,14 @@ out_freeiov: * BSD recvmsg interface */ +long __sys_recvmsg_sock(struct socket *sock, struct user_msghdr __user *msg, + unsigned int flags) +{ + struct msghdr msg_sys; + + return ___sys_recvmsg(sock, msg, &msg_sys, flags, 0); +} + long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags, bool forbid_cmsg_compat) { |