summaryrefslogtreecommitdiff
path: root/io_uring/eventfd.c
diff options
context:
space:
mode:
Diffstat (limited to 'io_uring/eventfd.c')
-rw-r--r--io_uring/eventfd.c111
1 files changed, 59 insertions, 52 deletions
diff --git a/io_uring/eventfd.c b/io_uring/eventfd.c
index e37fddd5d9ce..78f8ab7db104 100644
--- a/io_uring/eventfd.c
+++ b/io_uring/eventfd.c
@@ -13,10 +13,12 @@
struct io_ev_fd {
struct eventfd_ctx *cq_ev_fd;
- unsigned int eventfd_async: 1;
- struct rcu_head rcu;
+ unsigned int eventfd_async;
+ /* protected by ->completion_lock */
+ unsigned last_cq_tail;
refcount_t refs;
atomic_t ops;
+ struct rcu_head rcu;
};
enum {
@@ -31,77 +33,83 @@ static void io_eventfd_free(struct rcu_head *rcu)
kfree(ev_fd);
}
+static void io_eventfd_put(struct io_ev_fd *ev_fd)
+{
+ if (refcount_dec_and_test(&ev_fd->refs))
+ call_rcu(&ev_fd->rcu, io_eventfd_free);
+}
+
static void io_eventfd_do_signal(struct rcu_head *rcu)
{
struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
+ io_eventfd_put(ev_fd);
+}
- if (refcount_dec_and_test(&ev_fd->refs))
- io_eventfd_free(rcu);
+/*
+ * Returns true if the caller should put the ev_fd reference, false if not.
+ */
+static bool __io_eventfd_signal(struct io_ev_fd *ev_fd)
+{
+ if (eventfd_signal_allowed()) {
+ eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
+ return true;
+ }
+ if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops)) {
+ call_rcu_hurry(&ev_fd->rcu, io_eventfd_do_signal);
+ return false;
+ }
+ return true;
}
-void io_eventfd_signal(struct io_ring_ctx *ctx)
+/*
+ * Trigger if eventfd_async isn't set, or if it's set and the caller is
+ * an async worker.
+ */
+static bool io_eventfd_trigger(struct io_ev_fd *ev_fd)
{
- struct io_ev_fd *ev_fd = NULL;
+ return !ev_fd->eventfd_async || io_wq_current_is_worker();
+}
+
+void io_eventfd_signal(struct io_ring_ctx *ctx, bool cqe_event)
+{
+ bool skip = false;
+ struct io_ev_fd *ev_fd;
if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
return;
guard(rcu)();
-
- /*
- * rcu_dereference ctx->io_ev_fd once and use it for both for checking
- * and eventfd_signal
- */
ev_fd = rcu_dereference(ctx->io_ev_fd);
-
/*
- * Check again if ev_fd exists incase an io_eventfd_unregister call
+ * Check again if ev_fd exists in case an io_eventfd_unregister call
* completed between the NULL check of ctx->io_ev_fd at the start of
* the function and rcu_read_lock.
*/
- if (unlikely(!ev_fd))
+ if (!ev_fd)
return;
- if (!refcount_inc_not_zero(&ev_fd->refs))
+ if (!io_eventfd_trigger(ev_fd) || !refcount_inc_not_zero(&ev_fd->refs))
return;
- if (ev_fd->eventfd_async && !io_wq_current_is_worker())
- goto out;
- if (likely(eventfd_signal_allowed())) {
- eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
- } else {
- if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops)) {
- call_rcu_hurry(&ev_fd->rcu, io_eventfd_do_signal);
- return;
- }
+ if (cqe_event) {
+ /*
+ * Eventfd should only get triggered when at least one event
+ * has been posted. Some applications rely on the eventfd
+ * notification count only changing IFF a new CQE has been
+ * added to the CQ ring. There's no dependency on 1:1
+ * relationship between how many times this function is called
+ * (and hence the eventfd count) and number of CQEs posted to
+ * the CQ ring.
+ */
+ spin_lock(&ctx->completion_lock);
+ skip = ctx->cached_cq_tail == ev_fd->last_cq_tail;
+ ev_fd->last_cq_tail = ctx->cached_cq_tail;
+ spin_unlock(&ctx->completion_lock);
}
-out:
- if (refcount_dec_and_test(&ev_fd->refs))
- call_rcu(&ev_fd->rcu, io_eventfd_free);
-}
-
-void io_eventfd_flush_signal(struct io_ring_ctx *ctx)
-{
- bool skip;
-
- spin_lock(&ctx->completion_lock);
-
- /*
- * Eventfd should only get triggered when at least one event has been
- * posted. Some applications rely on the eventfd notification count
- * only changing IFF a new CQE has been added to the CQ ring. There's
- * no depedency on 1:1 relationship between how many times this
- * function is called (and hence the eventfd count) and number of CQEs
- * posted to the CQ ring.
- */
- skip = ctx->cached_cq_tail == ctx->evfd_last_cq_tail;
- ctx->evfd_last_cq_tail = ctx->cached_cq_tail;
- spin_unlock(&ctx->completion_lock);
- if (skip)
- return;
- io_eventfd_signal(ctx);
+ if (skip || __io_eventfd_signal(ev_fd))
+ io_eventfd_put(ev_fd);
}
int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
@@ -132,7 +140,7 @@ int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
}
spin_lock(&ctx->completion_lock);
- ctx->evfd_last_cq_tail = ctx->cached_cq_tail;
+ ev_fd->last_cq_tail = ctx->cached_cq_tail;
spin_unlock(&ctx->completion_lock);
ev_fd->eventfd_async = eventfd_async;
@@ -152,8 +160,7 @@ int io_eventfd_unregister(struct io_ring_ctx *ctx)
if (ev_fd) {
ctx->has_evfd = false;
rcu_assign_pointer(ctx->io_ev_fd, NULL);
- if (refcount_dec_and_test(&ev_fd->refs))
- call_rcu(&ev_fd->rcu, io_eventfd_free);
+ io_eventfd_put(ev_fd);
return 0;
}