summaryrefslogtreecommitdiff
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2025-06-16 21:10:47 +0300
committerJakub Kicinski <kuba@kernel.org>2025-06-18 04:29:21 +0300
commitfd0406e5ca53b804353d4b1b60a980c13cbfbea3 (patch)
tree0c607c5a0e26fc63260ee9c3ad71e1bbea7ac76e /net/ipv4/tcp_output.c
parente15962ae74d9d093ecf3cf7f60dc145801d9273e (diff)
downloadlinux-fd0406e5ca53b804353d4b1b60a980c13cbfbea3.tar.xz
net: tcp: tsq: Convert from tasklet to BH workqueue
The only generic interface to execute asynchronously in the BH context is tasklet; however, it's marked deprecated and has some design flaws. To replace tasklets, BH workqueue support was recently added. A BH workqueue behaves similarly to regular workqueues except that the queued work items are executed in the BH context. This patch converts TCP Small Queues implementation from tasklet to BH workqueue. Semantically, this is an equivalent conversion and there shouldn't be any user-visible behavior changes. While workqueue's queueing and execution paths are a bit heavier than tasklet's, unless the work item is being queued every packet, the difference hopefully shouldn't matter. My experience with the networking stack is very limited and this patch definitely needs attention from someone who actually understands networking. Signed-off-by: Tejun Heo <tj@kernel.org> Reviewed-by: Jason Xing <kerneljasonxing@gmail.com> Reviewed-by: Eric Dumazet <edumazet@google.com> Cc: David Ahern <dsahern@kernel.org> Link: https://patch.msgid.link/aFBeJ38AS1ZF3Dq5@slm.duckdns.org Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c36
1 files changed, 18 insertions, 18 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index eb50746dc482..28f840724fe8 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1066,15 +1066,15 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
* needs to be reallocated in a driver.
* The invariant being skb->truesize subtracted from sk->sk_wmem_alloc
*
- * Since transmit from skb destructor is forbidden, we use a tasklet
+ * Since transmit from skb destructor is forbidden, we use a BH work item
* to process all sockets that eventually need to send more skbs.
- * We use one tasklet per cpu, with its own queue of sockets.
+ * We use one work item per cpu, with its own queue of sockets.
*/
-struct tsq_tasklet {
- struct tasklet_struct tasklet;
+struct tsq_work {
+ struct work_struct work;
struct list_head head; /* queue of tcp sockets */
};
-static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet);
+static DEFINE_PER_CPU(struct tsq_work, tsq_work);
static void tcp_tsq_write(struct sock *sk)
{
@@ -1104,14 +1104,14 @@ static void tcp_tsq_handler(struct sock *sk)
bh_unlock_sock(sk);
}
/*
- * One tasklet per cpu tries to send more skbs.
- * We run in tasklet context but need to disable irqs when
+ * One work item per cpu tries to send more skbs.
+ * We run in BH context but need to disable irqs when
* transferring tsq->head because tcp_wfree() might
* interrupt us (non NAPI drivers)
*/
-static void tcp_tasklet_func(struct tasklet_struct *t)
+static void tcp_tsq_workfn(struct work_struct *work)
{
- struct tsq_tasklet *tsq = from_tasklet(tsq, t, tasklet);
+ struct tsq_work *tsq = container_of(work, struct tsq_work, work);
LIST_HEAD(list);
unsigned long flags;
struct list_head *q, *n;
@@ -1181,15 +1181,15 @@ void tcp_release_cb(struct sock *sk)
}
EXPORT_IPV6_MOD(tcp_release_cb);
-void __init tcp_tasklet_init(void)
+void __init tcp_tsq_work_init(void)
{
int i;
for_each_possible_cpu(i) {
- struct tsq_tasklet *tsq = &per_cpu(tsq_tasklet, i);
+ struct tsq_work *tsq = &per_cpu(tsq_work, i);
INIT_LIST_HEAD(&tsq->head);
- tasklet_setup(&tsq->tasklet, tcp_tasklet_func);
+ INIT_WORK(&tsq->work, tcp_tsq_workfn);
}
}
@@ -1203,11 +1203,11 @@ void tcp_wfree(struct sk_buff *skb)
struct sock *sk = skb->sk;
struct tcp_sock *tp = tcp_sk(sk);
unsigned long flags, nval, oval;
- struct tsq_tasklet *tsq;
+ struct tsq_work *tsq;
bool empty;
/* Keep one reference on sk_wmem_alloc.
- * Will be released by sk_free() from here or tcp_tasklet_func()
+ * Will be released by sk_free() from here or tcp_tsq_workfn()
*/
WARN_ON(refcount_sub_and_test(skb->truesize - 1, &sk->sk_wmem_alloc));
@@ -1229,13 +1229,13 @@ void tcp_wfree(struct sk_buff *skb)
nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED;
} while (!try_cmpxchg(&sk->sk_tsq_flags, &oval, nval));
- /* queue this socket to tasklet queue */
+ /* queue this socket to BH workqueue */
local_irq_save(flags);
- tsq = this_cpu_ptr(&tsq_tasklet);
+ tsq = this_cpu_ptr(&tsq_work);
empty = list_empty(&tsq->head);
list_add(&tp->tsq_node, &tsq->head);
if (empty)
- tasklet_schedule(&tsq->tasklet);
+ queue_work(system_bh_wq, &tsq->work);
local_irq_restore(flags);
return;
out:
@@ -2634,7 +2634,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
if (refcount_read(&sk->sk_wmem_alloc) > limit) {
/* Always send skb if rtx queue is empty or has one skb.
* No need to wait for TX completion to call us back,
- * after softirq/tasklet schedule.
+ * after softirq schedule.
* This helps when TX completions are delayed too much.
*/
if (tcp_rtx_queue_empty_or_single_skb(sk))