summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaolo Abeni <pabeni@redhat.com>2025-10-23 16:46:10 +0300
committerPaolo Abeni <pabeni@redhat.com>2025-10-23 16:46:10 +0300
commitdf890ceeb2e51af4c31c7b8c1c4c855dbbeba1e5 (patch)
tree304f33640c98a7fbb3f1b554049e52730816b619
parent10843e1492e474c02b91314963161731fa92af91 (diff)
parentefd729408bc7d57e0c8d027b9ff514187fc1a05b (diff)
downloadlinux-df890ceeb2e51af4c31c7b8c1c4c855dbbeba1e5.tar.xz
Merge branch 'fix-poll-behaviour-for-tcp-based-tunnel-protocols'
Ralf Lici says: ==================== fix poll behaviour for TCP-based tunnel protocols This patch series introduces a polling function for datagram-style sockets that operates on custom skb queues, and updates ovpn (the OpenVPN data-channel offload module) and espintcp (the TCP Encapsulation of IKE and IPsec Packets implementation) to use it accordingly. Protocols like the aforementioned one decapsulate packets received over TCP and deliver userspace-bound data through a separate skb queue, not the standard sk_receive_queue. Previously, both relied on datagram_poll(), which would signal readiness based on non-userspace packets, leading to misleading poll results and unnecessary recv attempts in userspace. Patch 1 introduces datagram_poll_queue(), a variant of datagram_poll() that accepts an explicit receive queue. This builds on the approach introduced in commit b50b058, which extended other skb-related functions to support custom queues. Patch 2 and 3 update espintcp_poll() and ovpn_tcp_poll() respectively to use this helper, ensuring readiness is only signaled when userspace data is available. Each patch is self-contained and the ovpn one includes rationale and lifecycle enforcement where appropriate. ==================== Link: https://patch.msgid.link/20251021100942.195010-1-ralf@mandelbit.com Signed-off-by: Paolo Abeni <pabeni@redhat.com>
-rw-r--r--drivers/net/ovpn/tcp.c26
-rw-r--r--include/linux/skbuff.h3
-rw-r--r--net/core/datagram.c44
-rw-r--r--net/xfrm/espintcp.c6
4 files changed, 60 insertions, 19 deletions
diff --git a/drivers/net/ovpn/tcp.c b/drivers/net/ovpn/tcp.c
index 289f62c5d2c7..0d7f30360d87 100644
--- a/drivers/net/ovpn/tcp.c
+++ b/drivers/net/ovpn/tcp.c
@@ -560,16 +560,34 @@ static void ovpn_tcp_close(struct sock *sk, long timeout)
static __poll_t ovpn_tcp_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
- __poll_t mask = datagram_poll(file, sock, wait);
+ struct sk_buff_head *queue = &sock->sk->sk_receive_queue;
struct ovpn_socket *ovpn_sock;
+ struct ovpn_peer *peer = NULL;
+ __poll_t mask;
rcu_read_lock();
ovpn_sock = rcu_dereference_sk_user_data(sock->sk);
- if (ovpn_sock && ovpn_sock->peer &&
- !skb_queue_empty(&ovpn_sock->peer->tcp.user_queue))
- mask |= EPOLLIN | EPOLLRDNORM;
+ /* if we landed in this callback, we expect to have a
+ * meaningful state. The ovpn_socket lifecycle would
+ * prevent it otherwise.
+ */
+ if (WARN(!ovpn_sock || !ovpn_sock->peer,
+ "ovpn: null state in ovpn_tcp_poll!")) {
+ rcu_read_unlock();
+ return 0;
+ }
+
+ if (ovpn_peer_hold(ovpn_sock->peer)) {
+ peer = ovpn_sock->peer;
+ queue = &peer->tcp.user_queue;
+ }
rcu_read_unlock();
+ mask = datagram_poll_queue(file, sock, wait, queue);
+
+ if (peer)
+ ovpn_peer_put(peer);
+
return mask;
}
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index fb3fec9affaa..a7cc3d1f4fd1 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -4204,6 +4204,9 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk,
struct sk_buff_head *sk_queue,
unsigned int flags, int *off, int *err);
struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags, int *err);
+__poll_t datagram_poll_queue(struct file *file, struct socket *sock,
+ struct poll_table_struct *wait,
+ struct sk_buff_head *rcv_queue);
__poll_t datagram_poll(struct file *file, struct socket *sock,
struct poll_table_struct *wait);
int skb_copy_datagram_iter(const struct sk_buff *from, int offset,
diff --git a/net/core/datagram.c b/net/core/datagram.c
index cb4b9ef2e4e3..c285c6465923 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -920,21 +920,22 @@ fault:
EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg);
/**
- * datagram_poll - generic datagram poll
+ * datagram_poll_queue - same as datagram_poll, but on a specific receive
+ * queue
* @file: file struct
* @sock: socket
* @wait: poll table
+ * @rcv_queue: receive queue to poll
*
- * Datagram poll: Again totally generic. This also handles
- * sequenced packet sockets providing the socket receive queue
- * is only ever holding data ready to receive.
+ * Performs polling on the given receive queue, handling shutdown, error,
+ * and connection state. This is useful for protocols that deliver
+ * userspace-bound packets through a custom queue instead of
+ * sk->sk_receive_queue.
*
- * Note: when you *don't* use this routine for this protocol,
- * and you use a different write policy from sock_writeable()
- * then please supply your own write_space callback.
+ * Return: poll bitmask indicating the socket's current state
*/
-__poll_t datagram_poll(struct file *file, struct socket *sock,
- poll_table *wait)
+__poll_t datagram_poll_queue(struct file *file, struct socket *sock,
+ poll_table *wait, struct sk_buff_head *rcv_queue)
{
struct sock *sk = sock->sk;
__poll_t mask;
@@ -956,7 +957,7 @@ __poll_t datagram_poll(struct file *file, struct socket *sock,
mask |= EPOLLHUP;
/* readable? */
- if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
+ if (!skb_queue_empty_lockless(rcv_queue))
mask |= EPOLLIN | EPOLLRDNORM;
/* Connection-based need to check for termination and startup */
@@ -978,4 +979,27 @@ __poll_t datagram_poll(struct file *file, struct socket *sock,
return mask;
}
+EXPORT_SYMBOL(datagram_poll_queue);
+
+/**
+ * datagram_poll - generic datagram poll
+ * @file: file struct
+ * @sock: socket
+ * @wait: poll table
+ *
+ * Datagram poll: Again totally generic. This also handles
+ * sequenced packet sockets providing the socket receive queue
+ * is only ever holding data ready to receive.
+ *
+ * Note: when you *don't* use this routine for this protocol,
+ * and you use a different write policy from sock_writeable()
+ * then please supply your own write_space callback.
+ *
+ * Return: poll bitmask indicating the socket's current state
+ */
+__poll_t datagram_poll(struct file *file, struct socket *sock, poll_table *wait)
+{
+ return datagram_poll_queue(file, sock, wait,
+ &sock->sk->sk_receive_queue);
+}
EXPORT_SYMBOL(datagram_poll);
diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c
index fc7a603b04f1..bf744ac9d5a7 100644
--- a/net/xfrm/espintcp.c
+++ b/net/xfrm/espintcp.c
@@ -555,14 +555,10 @@ static void espintcp_close(struct sock *sk, long timeout)
static __poll_t espintcp_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
- __poll_t mask = datagram_poll(file, sock, wait);
struct sock *sk = sock->sk;
struct espintcp_ctx *ctx = espintcp_getctx(sk);
- if (!skb_queue_empty(&ctx->ike_queue))
- mask |= EPOLLIN | EPOLLRDNORM;
-
- return mask;
+ return datagram_poll_queue(file, sock, wait, &ctx->ike_queue);
}
static void build_protos(struct proto *espintcp_prot,