1// SPDX-License-Identifier: GPL-2.0
2#include <linux/kernel.h>
3#include <linux/errno.h>
4#include <linux/mm.h>
5#include <linux/slab.h>
6#include <linux/eventfd.h>
7#include <linux/eventpoll.h>
8#include <linux/io_uring.h>
9#include <linux/io_uring_types.h>
10
11#include "io-wq.h"
12#include "eventfd.h"
13
14struct io_ev_fd {
15 struct eventfd_ctx *cq_ev_fd;
16 unsigned int eventfd_async;
17 /* protected by ->completion_lock */
18 unsigned last_cq_tail;
19 refcount_t refs;
20 atomic_t ops;
21 struct rcu_head rcu;
22};
23
24enum {
25 IO_EVENTFD_OP_SIGNAL_BIT,
26};
27
28static void io_eventfd_free(struct rcu_head *rcu)
29{
30 struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
31
32 eventfd_ctx_put(ctx: ev_fd->cq_ev_fd);
33 kfree(objp: ev_fd);
34}
35
36static void io_eventfd_put(struct io_ev_fd *ev_fd)
37{
38 if (refcount_dec_and_test(r: &ev_fd->refs))
39 call_rcu(head: &ev_fd->rcu, func: io_eventfd_free);
40}
41
42static void io_eventfd_do_signal(struct rcu_head *rcu)
43{
44 struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
45
46 eventfd_signal_mask(ctx: ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
47 io_eventfd_put(ev_fd);
48}
49
50/*
51 * Returns true if the caller should put the ev_fd reference, false if not.
52 */
53static bool __io_eventfd_signal(struct io_ev_fd *ev_fd)
54{
55 if (eventfd_signal_allowed()) {
56 eventfd_signal_mask(ctx: ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
57 return true;
58 }
59 if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), v: &ev_fd->ops)) {
60 call_rcu_hurry(head: &ev_fd->rcu, func: io_eventfd_do_signal);
61 return false;
62 }
63 return true;
64}
65
66/*
67 * Trigger if eventfd_async isn't set, or if it's set and the caller is
68 * an async worker.
69 */
70static bool io_eventfd_trigger(struct io_ev_fd *ev_fd)
71{
72 return !ev_fd->eventfd_async || io_wq_current_is_worker();
73}
74
75void io_eventfd_signal(struct io_ring_ctx *ctx, bool cqe_event)
76{
77 bool skip = false;
78 struct io_ev_fd *ev_fd;
79
80 if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
81 return;
82
83 guard(rcu)();
84 ev_fd = rcu_dereference(ctx->io_ev_fd);
85 /*
86 * Check again if ev_fd exists in case an io_eventfd_unregister call
87 * completed between the NULL check of ctx->io_ev_fd at the start of
88 * the function and rcu_read_lock.
89 */
90 if (!ev_fd)
91 return;
92 if (!io_eventfd_trigger(ev_fd) || !refcount_inc_not_zero(r: &ev_fd->refs))
93 return;
94
95 if (cqe_event) {
96 /*
97 * Eventfd should only get triggered when at least one event
98 * has been posted. Some applications rely on the eventfd
99 * notification count only changing IFF a new CQE has been
100 * added to the CQ ring. There's no dependency on 1:1
101 * relationship between how many times this function is called
102 * (and hence the eventfd count) and number of CQEs posted to
103 * the CQ ring.
104 */
105 spin_lock(lock: &ctx->completion_lock);
106 skip = ctx->cached_cq_tail == ev_fd->last_cq_tail;
107 ev_fd->last_cq_tail = ctx->cached_cq_tail;
108 spin_unlock(lock: &ctx->completion_lock);
109 }
110
111 if (skip || __io_eventfd_signal(ev_fd))
112 io_eventfd_put(ev_fd);
113}
114
115int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
116 unsigned int eventfd_async)
117{
118 struct io_ev_fd *ev_fd;
119 __s32 __user *fds = arg;
120 int fd;
121
122 ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
123 lockdep_is_held(&ctx->uring_lock));
124 if (ev_fd)
125 return -EBUSY;
126
127 if (copy_from_user(to: &fd, from: fds, n: sizeof(*fds)))
128 return -EFAULT;
129
130 ev_fd = kmalloc(sizeof(*ev_fd), GFP_KERNEL);
131 if (!ev_fd)
132 return -ENOMEM;
133
134 ev_fd->cq_ev_fd = eventfd_ctx_fdget(fd);
135 if (IS_ERR(ptr: ev_fd->cq_ev_fd)) {
136 int ret = PTR_ERR(ptr: ev_fd->cq_ev_fd);
137
138 kfree(objp: ev_fd);
139 return ret;
140 }
141
142 spin_lock(lock: &ctx->completion_lock);
143 ev_fd->last_cq_tail = ctx->cached_cq_tail;
144 spin_unlock(lock: &ctx->completion_lock);
145
146 ev_fd->eventfd_async = eventfd_async;
147 ctx->has_evfd = true;
148 refcount_set(r: &ev_fd->refs, n: 1);
149 atomic_set(v: &ev_fd->ops, i: 0);
150 rcu_assign_pointer(ctx->io_ev_fd, ev_fd);
151 return 0;
152}
153
154int io_eventfd_unregister(struct io_ring_ctx *ctx)
155{
156 struct io_ev_fd *ev_fd;
157
158 ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
159 lockdep_is_held(&ctx->uring_lock));
160 if (ev_fd) {
161 ctx->has_evfd = false;
162 rcu_assign_pointer(ctx->io_ev_fd, NULL);
163 io_eventfd_put(ev_fd);
164 return 0;
165 }
166
167 return -ENXIO;
168}
169