diff options
| author | David S. Miller <davem@davemloft.net> | 2017-03-25 06:49:31 +0300 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2017-03-25 06:49:31 +0300 |
| commit | 2239cc634395ecce69dd047be9104f71edc417b4 (patch) | |
| tree | c80a83afc1d175816d95920e9ea185f4f5f96650 /net/core | |
| parent | dcb421f4279f362c3eef7479616c76588b74d782 (diff) | |
| parent | 6d4339028b350efbf87c61e6d9e113e5373545c9 (diff) | |
| download | linux-2239cc634395ecce69dd047be9104f71edc417b4.tar.xz | |
Merge branch 'epoll-busypoll'
Alexander Duyck says:
====================
Add busy poll support for epoll
This patch set adds support for using busy polling with epoll. The main
idea behind this is that we record the NAPI ID for the last event that is
moved onto the ready list for the epoll context and then when we no longer
have any events on the ready list we begin polling with that ID. If the
busy polling does not yield any events then we will reset the NAPI ID to 0
and wait until a new event is added to the ready list with a valid NAPI ID
before we will resume busy polling.
Most of the changes in this set authored by me are meant to be cleanup or
fixes for various things. For example, I am trying to make it so that we
don't perform hash look-ups for the NAPI instance when we are only working
with sender_cpu and the like.
At the heart of this set is the last 3 patches which enable epoll support
and add support for obtaining the NAPI ID of a given socket. With these it
becomes possible for an application to make use of epoll and get optimal
busy poll utilization by stacking multiple sockets with the same NAPI ID on
the same epoll context.
v1: The first version of this series only allowed epoll to busy poll if all
of the sockets with a NAPI ID shared the same NAPI ID. I feel we were
too strict with this requirement, so I changed the behavior for v2.
v2: The second version was pretty much a full rewrite of the first set. The
main changes consisted of pulling apart several patches to better
address the need to clean up a few items and to make the code easier to
review. In the set however I went a bit overboard and was trying to fix
an issue that would only occur with 500+ years of uptime, and in the
process limited the range for busy_poll/busy_read unnecessarily.
v3: Split off the code for limiting busy_poll and busy_read into a separate
patch for net.
Updated patch that changed busy loop time tracking so that it uses
"local_clock() >> 10" as we originally did.
Tweaked "Change return type.." patch by moving declaration of "work"
inside the loop where is was accessed and always reset to 0.
Added "Acked-by" for patches that received acks.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core')
| -rw-r--r-- | net/core/datagram.c | 8 | ||||
| -rw-r--r-- | net/core/dev.c | 41 | ||||
| -rw-r--r-- | net/core/sock.c | 23 |
3 files changed, 48 insertions, 24 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c index ea633342ab0d..4608aa245410 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -256,8 +256,12 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, } spin_unlock_irqrestore(&queue->lock, cpu_flags); - } while (sk_can_busy_loop(sk) && - sk_busy_loop(sk, flags & MSG_DONTWAIT)); + + if (!sk_can_busy_loop(sk)) + break; + + sk_busy_loop(sk, flags & MSG_DONTWAIT); + } while (!skb_queue_empty(&sk->sk_receive_queue)); error = -EAGAIN; diff --git a/net/core/dev.c b/net/core/dev.c index 7869ae3837ca..ef9fe60ee294 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5060,27 +5060,28 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock) do_softirq(); } -bool sk_busy_loop(struct sock *sk, int nonblock) +void napi_busy_loop(unsigned int napi_id, + bool (*loop_end)(void *, unsigned long), + void *loop_end_arg) { - unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0; + unsigned long start_time = loop_end ? busy_loop_current_time() : 0; int (*napi_poll)(struct napi_struct *napi, int budget); void *have_poll_lock = NULL; struct napi_struct *napi; - int rc; restart: - rc = false; napi_poll = NULL; rcu_read_lock(); - napi = napi_by_id(sk->sk_napi_id); + napi = napi_by_id(napi_id); if (!napi) goto out; preempt_disable(); for (;;) { - rc = 0; + int work = 0; + local_bh_disable(); if (!napi_poll) { unsigned long val = READ_ONCE(napi->state); @@ -5098,16 +5099,15 @@ restart: have_poll_lock = netpoll_poll_lock(napi); napi_poll = napi->poll; } - rc = napi_poll(napi, BUSY_POLL_BUDGET); - trace_napi_poll(napi, rc, BUSY_POLL_BUDGET); + work = napi_poll(napi, BUSY_POLL_BUDGET); + trace_napi_poll(napi, work, BUSY_POLL_BUDGET); count: - if (rc > 0) - __NET_ADD_STATS(sock_net(sk), - LINUX_MIB_BUSYPOLLRXPACKETS, rc); + if (work > 0) + __NET_ADD_STATS(dev_net(napi->dev), + LINUX_MIB_BUSYPOLLRXPACKETS, work); local_bh_enable(); - if (nonblock || !skb_queue_empty(&sk->sk_receive_queue) || - busy_loop_timeout(end_time)) + if (!loop_end || loop_end(loop_end_arg, start_time)) break; if (unlikely(need_resched())) { @@ -5116,9 +5116,8 @@ count: preempt_enable(); rcu_read_unlock(); cond_resched(); - rc = !skb_queue_empty(&sk->sk_receive_queue); - if (rc || busy_loop_timeout(end_time)) - return rc; + if (loop_end(loop_end_arg, start_time)) + return; goto restart; } cpu_relax(); @@ -5126,12 +5125,10 @@ count: if (napi_poll) busy_poll_stop(napi, have_poll_lock); preempt_enable(); - rc = !skb_queue_empty(&sk->sk_receive_queue); out: rcu_read_unlock(); - return rc; } -EXPORT_SYMBOL(sk_busy_loop); +EXPORT_SYMBOL(napi_busy_loop); #endif /* CONFIG_NET_RX_BUSY_POLL */ @@ -5143,10 +5140,10 @@ static void napi_hash_add(struct napi_struct *napi) spin_lock(&napi_hash_lock); - /* 0..NR_CPUS+1 range is reserved for sender_cpu use */ + /* 0..NR_CPUS range is reserved for sender_cpu use */ do { - if (unlikely(++napi_gen_id < NR_CPUS + 1)) - napi_gen_id = NR_CPUS + 1; + if (unlikely(++napi_gen_id < MIN_NAPI_ID)) + napi_gen_id = MIN_NAPI_ID; } while (napi_by_id(napi_gen_id)); napi->napi_id = napi_gen_id; diff --git a/net/core/sock.c b/net/core/sock.c index 1b9030ee6f4b..1a58a9dc6888 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1328,6 +1328,18 @@ int sock_getsockopt(struct socket *sock, int level, int optname, goto lenout; } + +#ifdef CONFIG_NET_RX_BUSY_POLL + case SO_INCOMING_NAPI_ID: + v.val = READ_ONCE(sk->sk_napi_id); + + /* aggregate non-NAPI IDs down to 0 */ + if (v.val < MIN_NAPI_ID) + v.val = 0; + + break; +#endif + default: /* We implement the SO_SNDLOWAT etc to not be settable * (1003.1g 7). @@ -3237,3 +3249,14 @@ static int __init proto_init(void) subsys_initcall(proto_init); #endif /* PROC_FS */ + +#ifdef CONFIG_NET_RX_BUSY_POLL +bool sk_busy_loop_end(void *p, unsigned long start_time) +{ + struct sock *sk = p; + + return !skb_queue_empty(&sk->sk_receive_queue) || + sk_busy_loop_timeout(sk, start_time); +} +EXPORT_SYMBOL(sk_busy_loop_end); +#endif /* CONFIG_NET_RX_BUSY_POLL */ |
