summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-05-23 22:51:04 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2022-05-23 22:51:04 +0300
commite1a8fde7203fa8a3e3f35d4f9df47477d23529c1 (patch)
treeb56c41f8e0e340679c40c09b3dab0544de8b54c5
parent368da430d04dbe31aded44e5f5c255ff0899ad1d (diff)
parentf548a12efd5ab97e6b1fb332e5634ce44b3d9328 (diff)
downloadlinux-e1a8fde7203fa8a3e3f35d4f9df47477d23529c1.tar.xz
Merge tag 'for-5.19/io_uring-net-2022-05-22' of git://git.kernel.dk/linux-block
Pull io_uring 'more data in socket' support from Jens Axboe: "To be able to fully utilize the 'poll first' support in the core io_uring branch, it's advantageous knowing if the socket was empty after a receive. This adds support for that" * tag 'for-5.19/io_uring-net-2022-05-22' of git://git.kernel.dk/linux-block: io_uring: return hint on whether more data is available after receive tcp: pass back data left in socket after receive
-rw-r--r--fs/io_uring.c18
-rw-r--r--include/linux/socket.h6
-rw-r--r--include/uapi/linux/io_uring.h2
-rw-r--r--net/ipv4/tcp.c16
4 files changed, 31 insertions, 11 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c
index d9529275a030..1015dd49e7e5 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -6115,6 +6115,7 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
struct io_async_msghdr iomsg, *kmsg;
struct io_sr_msg *sr = &req->sr_msg;
struct socket *sock;
+ unsigned int cflags;
unsigned flags;
int ret, min_ret = 0;
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
@@ -6154,6 +6155,7 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
if (flags & MSG_WAITALL)
min_ret = iov_iter_count(&kmsg->msg.msg_iter);
+ kmsg->msg.msg_get_inq = 1;
ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, kmsg->uaddr, flags);
if (ret < min_ret) {
if (ret == -EAGAIN && force_nonblock)
@@ -6178,7 +6180,10 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
ret += sr->done_io;
else if (sr->done_io)
ret = sr->done_io;
- __io_req_complete(req, issue_flags, ret, io_put_kbuf(req, issue_flags));
+ cflags = io_put_kbuf(req, issue_flags);
+ if (kmsg->msg.msg_inq)
+ cflags |= IORING_CQE_F_SOCK_NONEMPTY;
+ __io_req_complete(req, issue_flags, ret, cflags);
return 0;
}
@@ -6188,6 +6193,7 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
struct msghdr msg;
struct socket *sock;
struct iovec iov;
+ unsigned int cflags;
unsigned flags;
int ret, min_ret = 0;
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
@@ -6214,11 +6220,12 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
goto out_free;
msg.msg_name = NULL;
+ msg.msg_namelen = 0;
msg.msg_control = NULL;
+ msg.msg_get_inq = 1;
+ msg.msg_flags = 0;
msg.msg_controllen = 0;
- msg.msg_namelen = 0;
msg.msg_iocb = NULL;
- msg.msg_flags = 0;
flags = sr->msg_flags;
if (force_nonblock)
@@ -6249,7 +6256,10 @@ out_free:
ret += sr->done_io;
else if (sr->done_io)
ret = sr->done_io;
- __io_req_complete(req, issue_flags, ret, io_put_kbuf(req, issue_flags));
+ cflags = io_put_kbuf(req, issue_flags);
+ if (msg.msg_inq)
+ cflags |= IORING_CQE_F_SOCK_NONEMPTY;
+ __io_req_complete(req, issue_flags, ret, cflags);
return 0;
}
diff --git a/include/linux/socket.h b/include/linux/socket.h
index a1882e1e71d2..17311ad9f9af 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -50,6 +50,9 @@ struct linger {
struct msghdr {
void *msg_name; /* ptr to socket address structure */
int msg_namelen; /* size of socket address structure */
+
+ int msg_inq; /* output, data left in socket */
+
struct iov_iter msg_iter; /* data */
/*
@@ -62,8 +65,9 @@ struct msghdr {
void __user *msg_control_user;
};
bool msg_control_is_user : 1;
- __kernel_size_t msg_controllen; /* ancillary data buffer length */
+ bool msg_get_inq : 1;/* return INQ after receive */
unsigned int msg_flags; /* flags on received message */
+ __kernel_size_t msg_controllen; /* ancillary data buffer length */
struct kiocb *msg_iocb; /* ptr to iocb for async requests */
};
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 804916814e51..cc9544629eee 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -258,9 +258,11 @@ struct io_uring_cqe {
*
* IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID
* IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries
+ * IORING_CQE_F_SOCK_NONEMPTY If set, more data to read after socket recv
*/
#define IORING_CQE_F_BUFFER (1U << 0)
#define IORING_CQE_F_MORE (1U << 1)
+#define IORING_CQE_F_SOCK_NONEMPTY (1U << 2)
enum {
IORING_CQE_BUFFER_SHIFT = 16,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index cf18fbcbf123..bb7ef45408e1 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2335,8 +2335,10 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
if (sk->sk_state == TCP_LISTEN)
goto out;
- if (tp->recvmsg_inq)
+ if (tp->recvmsg_inq) {
*cmsg_flags = TCP_CMSG_INQ;
+ msg->msg_get_inq = 1;
+ }
timeo = sock_rcvtimeo(sk, nonblock);
/* Urgent data needs to be handled specially. */
@@ -2559,7 +2561,7 @@ recv_sndq:
int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
int flags, int *addr_len)
{
- int cmsg_flags = 0, ret, inq;
+ int cmsg_flags = 0, ret;
struct scm_timestamping_internal tss;
if (unlikely(flags & MSG_ERRQUEUE))
@@ -2576,12 +2578,14 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
release_sock(sk);
sk_defer_free_flush(sk);
- if (cmsg_flags && ret >= 0) {
+ if ((cmsg_flags || msg->msg_get_inq) && ret >= 0) {
if (cmsg_flags & TCP_CMSG_TS)
tcp_recv_timestamp(msg, sk, &tss);
- if (cmsg_flags & TCP_CMSG_INQ) {
- inq = tcp_inq_hint(sk);
- put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq);
+ if (msg->msg_get_inq) {
+ msg->msg_inq = tcp_inq_hint(sk);
+ if (cmsg_flags & TCP_CMSG_INQ)
+ put_cmsg(msg, SOL_TCP, TCP_CM_INQ,
+ sizeof(msg->msg_inq), &msg->msg_inq);
}
}
return ret;