summaryrefslogtreecommitdiff
path: root/io_uring/net.c
diff options
context:
space:
mode:
Diffstat (limited to 'io_uring/net.c')
-rw-r--r--io_uring/net.c46
1 files changed, 39 insertions, 7 deletions
diff --git a/io_uring/net.c b/io_uring/net.c
index 384915d931b7..0116cfaec848 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -76,12 +76,18 @@ struct io_sr_msg {
/* initialised and used only by !msg send variants */
u16 addr_len;
u16 buf_group;
+ unsigned short retry_flags;
void __user *addr;
void __user *msg_control;
/* used only for send zerocopy */
struct io_kiocb *notif;
};
+enum sr_retry_flags {
+ IO_SR_MSG_RETRY = 1,
+ IO_SR_MSG_PARTIAL_MAP = 2,
+};
+
/*
* Number of times we'll try and do receives if there's more data. If we
* exceed this limit, then add us to the back of the queue and retry from
@@ -203,6 +209,7 @@ static inline void io_mshot_prep_retry(struct io_kiocb *req,
req->flags &= ~REQ_F_BL_EMPTY;
sr->done_io = 0;
+ sr->retry_flags = 0;
sr->len = 0; /* get from the provided buffer */
req->buf_index = sr->buf_group;
}
@@ -409,6 +416,7 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
sr->done_io = 0;
+ sr->retry_flags = 0;
if (req->opcode == IORING_OP_SEND) {
if (READ_ONCE(sqe->__pad3[0]))
@@ -780,6 +788,7 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
sr->done_io = 0;
+ sr->retry_flags = 0;
if (unlikely(sqe->file_index || sqe->addr2))
return -EINVAL;
@@ -828,6 +837,9 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return io_recvmsg_prep_setup(req);
}
+/* bits to clear in old and inherit in new cflags on bundle retry */
+#define CQE_F_MASK (IORING_CQE_F_SOCK_NONEMPTY|IORING_CQE_F_MORE)
+
/*
* Finishes io_recv and io_recvmsg.
*
@@ -845,11 +857,27 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
cflags |= IORING_CQE_F_SOCK_NONEMPTY;
if (sr->flags & IORING_RECVSEND_BUNDLE) {
- cflags |= io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret),
+ size_t this_ret = *ret - sr->done_io;
+
+ cflags |= io_put_kbufs(req, this_ret, io_bundle_nbufs(kmsg, this_ret),
issue_flags);
+ if (sr->retry_flags & IO_SR_MSG_RETRY)
+ cflags = req->cqe.flags | (cflags & CQE_F_MASK);
/* bundle with no more immediate buffers, we're done */
if (req->flags & REQ_F_BL_EMPTY)
goto finish;
+ /*
+ * If more is available AND it was a full transfer, retry and
+ * append to this one
+ */
+ if (!sr->retry_flags && kmsg->msg.msg_inq > 1 && this_ret > 0 &&
+ !iov_iter_count(&kmsg->msg.msg_iter)) {
+ req->cqe.flags = cflags & ~CQE_F_MASK;
+ sr->len = kmsg->msg.msg_inq;
+ sr->done_io += this_ret;
+ sr->retry_flags |= IO_SR_MSG_RETRY;
+ return false;
+ }
} else {
cflags |= io_put_kbuf(req, *ret, issue_flags);
}
@@ -1088,13 +1116,21 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg
arg.mode |= KBUF_MODE_FREE;
}
- if (kmsg->msg.msg_inq > 0)
+ if (kmsg->msg.msg_inq > 1)
arg.max_len = min_not_zero(sr->len, kmsg->msg.msg_inq);
ret = io_buffers_peek(req, &arg);
if (unlikely(ret < 0))
return ret;
+ if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) {
+ kmsg->free_iov_nr = ret;
+ kmsg->free_iov = arg.iovs;
+ req->flags |= REQ_F_NEED_CLEANUP;
+ }
+ if (arg.partial_map)
+ sr->retry_flags |= IO_SR_MSG_PARTIAL_MAP;
+
/* special case 1 vec, can be a fast path */
if (ret == 1) {
sr->buf = arg.iovs[0].iov_base;
@@ -1103,11 +1139,6 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg
}
iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret,
arg.out_len);
- if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) {
- kmsg->free_iov_nr = ret;
- kmsg->free_iov = arg.iovs;
- req->flags |= REQ_F_NEED_CLEANUP;
- }
} else {
void __user *buf;
@@ -1228,6 +1259,7 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
struct io_kiocb *notif;
zc->done_io = 0;
+ zc->retry_flags = 0;
req->flags |= REQ_F_POLL_NO_LAZY;
if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)))