summaryrefslogtreecommitdiff
path: root/fs/eventpoll.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/eventpoll.c')
-rw-r--r--fs/eventpoll.c107
1 files changed, 60 insertions, 47 deletions
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 8c596641a72b..12eebcdea9c8 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1171,6 +1171,10 @@ static inline bool chain_epi_lockless(struct epitem *epi)
{
struct eventpoll *ep = epi->ep;
+ /* Fast preliminary check */
+ if (epi->next != EP_UNACTIVE_PTR)
+ return false;
+
/* Check that the same epi has not been just chained from another CPU */
if (cmpxchg(&epi->next, EP_UNACTIVE_PTR, NULL) != EP_UNACTIVE_PTR)
return false;
@@ -1237,16 +1241,12 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
* chained in ep->ovflist and requeued later on.
*/
if (READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR) {
- if (epi->next == EP_UNACTIVE_PTR &&
- chain_epi_lockless(epi))
+ if (chain_epi_lockless(epi))
+ ep_pm_stay_awake_rcu(epi);
+ } else if (!ep_is_linked(epi)) {
+ /* In the usual case, add event to ready list. */
+ if (list_add_tail_lockless(&epi->rdllink, &ep->rdllist))
ep_pm_stay_awake_rcu(epi);
- goto out_unlock;
- }
-
- /* If this file is already in the ready list we exit soon */
- if (!ep_is_linked(epi) &&
- list_add_tail_lockless(&epi->rdllink, &ep->rdllist)) {
- ep_pm_stay_awake_rcu(epi);
}
/*
@@ -1822,7 +1822,6 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
{
int res = 0, eavail, timed_out = 0;
u64 slack = 0;
- bool waiter = false;
wait_queue_entry_t wait;
ktime_t expires, *to = NULL;
@@ -1867,55 +1866,75 @@ fetch_events:
*/
ep_reset_busy_poll_napi_id(ep);
- /*
- * We don't have any available event to return to the caller. We need
- * to sleep here, and we will be woken by ep_poll_callback() when events
- * become available.
- */
- if (!waiter) {
- waiter = true;
- init_waitqueue_entry(&wait, current);
+ do {
+ /*
+ * Internally init_wait() uses autoremove_wake_function(),
+ * thus wait entry is removed from the wait queue on each
+ * wakeup. Why it is important? In case of several waiters
+ * each new wakeup will hit the next waiter, giving it the
+ * chance to harvest new event. Otherwise wakeup can be
+ * lost. This is also good performance-wise, because on
+ * normal wakeup path no need to call __remove_wait_queue()
+ * explicitly, thus ep->lock is not taken, which halts the
+ * event delivery.
+ */
+ init_wait(&wait);
write_lock_irq(&ep->lock);
- __add_wait_queue_exclusive(&ep->wq, &wait);
- write_unlock_irq(&ep->lock);
- }
-
- for (;;) {
/*
- * We don't want to sleep if the ep_poll_callback() sends us
- * a wakeup in between. That's why we set the task state
- * to TASK_INTERRUPTIBLE before doing the checks.
+ * Barrierless variant, waitqueue_active() is called under
+ * the same lock on wakeup ep_poll_callback() side, so it
+ * is safe to avoid an explicit barrier.
*/
- set_current_state(TASK_INTERRUPTIBLE);
+ __set_current_state(TASK_INTERRUPTIBLE);
+
/*
- * Always short-circuit for fatal signals to allow
- * threads to make a timely exit without the chance of
- * finding more events available and fetching
- * repeatedly.
+ * Do the final check under the lock. ep_scan_ready_list()
+ * plays with two lists (->rdllist and ->ovflist) and there
+ * is always a race when both lists are empty for short
+ * period of time although events are pending, so lock is
+ * important.
*/
- if (fatal_signal_pending(current)) {
- res = -EINTR;
- break;
+ eavail = ep_events_available(ep);
+ if (!eavail) {
+ if (signal_pending(current))
+ res = -EINTR;
+ else
+ __add_wait_queue_exclusive(&ep->wq, &wait);
}
+ write_unlock_irq(&ep->lock);
- eavail = ep_events_available(ep);
- if (eavail)
+ if (eavail || res)
break;
- if (signal_pending(current)) {
- res = -EINTR;
- break;
- }
if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) {
timed_out = 1;
break;
}
- }
+
+ /* We were woken up, thus go and try to harvest some events */
+ eavail = 1;
+
+ } while (0);
__set_current_state(TASK_RUNNING);
+ if (!list_empty_careful(&wait.entry)) {
+ write_lock_irq(&ep->lock);
+ __remove_wait_queue(&ep->wq, &wait);
+ write_unlock_irq(&ep->lock);
+ }
+
send_events:
+ if (fatal_signal_pending(current)) {
+ /*
+ * Always short-circuit for fatal signals to allow
+ * threads to make a timely exit without the chance of
+ * finding more events available and fetching
+ * repeatedly.
+ */
+ res = -EINTR;
+ }
/*
* Try to transfer events to user space. In case we get 0 events and
* there's still timeout left over, we go trying again in search of
@@ -1925,12 +1944,6 @@ send_events:
!(res = ep_send_events(ep, events, maxevents)) && !timed_out)
goto fetch_events;
- if (waiter) {
- write_lock_irq(&ep->lock);
- __remove_wait_queue(&ep->wq, &wait);
- write_unlock_irq(&ep->lock);
- }
-
return res;
}