1// SPDX-License-Identifier: GPL-2.0
2#include <linux/kernel.h>
3#include <linux/errno.h>
4#include <linux/fs.h>
5#include <linux/file.h>
6#include <linux/io_uring.h>
7
8#include <uapi/linux/io_uring.h>
9
10#include "../kernel/futex/futex.h"
11#include "io_uring.h"
12#include "alloc_cache.h"
13#include "futex.h"
14
15struct io_futex {
16 struct file *file;
17 void __user *uaddr;
18 unsigned long futex_val;
19 unsigned long futex_mask;
20 unsigned long futexv_owned;
21 u32 futex_flags;
22 unsigned int futex_nr;
23 bool futexv_unqueued;
24};
25
26struct io_futex_data {
27 struct futex_q q;
28 struct io_kiocb *req;
29};
30
31#define IO_FUTEX_ALLOC_CACHE_MAX 32
32
33bool io_futex_cache_init(struct io_ring_ctx *ctx)
34{
35 return io_alloc_cache_init(cache: &ctx->futex_cache, IO_FUTEX_ALLOC_CACHE_MAX,
36 size: sizeof(struct io_futex_data), init_bytes: 0);
37}
38
39void io_futex_cache_free(struct io_ring_ctx *ctx)
40{
41 io_alloc_cache_free(cache: &ctx->futex_cache, free: kfree);
42}
43
44static void __io_futex_complete(struct io_kiocb *req, io_tw_token_t tw)
45{
46 hlist_del_init(n: &req->hash_node);
47 io_req_task_complete(req, tw);
48}
49
50static void io_futex_complete(struct io_kiocb *req, io_tw_token_t tw)
51{
52 struct io_ring_ctx *ctx = req->ctx;
53
54 io_tw_lock(ctx, tw);
55 io_cache_free(cache: &ctx->futex_cache, obj: req->async_data);
56 io_req_async_data_clear(req, extra_flags: 0);
57 __io_futex_complete(req, tw);
58}
59
60static void io_futexv_complete(struct io_kiocb *req, io_tw_token_t tw)
61{
62 struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex);
63 struct futex_vector *futexv = req->async_data;
64
65 io_tw_lock(ctx: req->ctx, tw);
66
67 if (!iof->futexv_unqueued) {
68 int res;
69
70 res = futex_unqueue_multiple(v: futexv, count: iof->futex_nr);
71 if (res != -1)
72 io_req_set_res(req, res, cflags: 0);
73 }
74
75 io_req_async_data_free(req);
76 __io_futex_complete(req, tw);
77}
78
79static bool io_futexv_claim(struct io_futex *iof)
80{
81 if (test_bit(0, &iof->futexv_owned) ||
82 test_and_set_bit_lock(nr: 0, addr: &iof->futexv_owned))
83 return false;
84 return true;
85}
86
87static bool __io_futex_cancel(struct io_kiocb *req)
88{
89 /* futex wake already done or in progress */
90 if (req->opcode == IORING_OP_FUTEX_WAIT) {
91 struct io_futex_data *ifd = req->async_data;
92
93 if (!futex_unqueue(q: &ifd->q))
94 return false;
95 req->io_task_work.func = io_futex_complete;
96 } else {
97 struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex);
98
99 if (!io_futexv_claim(iof))
100 return false;
101 req->io_task_work.func = io_futexv_complete;
102 }
103
104 hlist_del_init(n: &req->hash_node);
105 io_req_set_res(req, res: -ECANCELED, cflags: 0);
106 io_req_task_work_add(req);
107 return true;
108}
109
110int io_futex_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
111 unsigned int issue_flags)
112{
113 return io_cancel_remove(ctx, cd, issue_flags, list: &ctx->futex_list, cancel: __io_futex_cancel);
114}
115
116bool io_futex_remove_all(struct io_ring_ctx *ctx, struct io_uring_task *tctx,
117 bool cancel_all)
118{
119 return io_cancel_remove_all(ctx, tctx, list: &ctx->futex_list, cancel_all, cancel: __io_futex_cancel);
120}
121
122int io_futex_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
123{
124 struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex);
125 u32 flags;
126
127 if (unlikely(sqe->len || sqe->futex_flags || sqe->buf_index ||
128 sqe->file_index))
129 return -EINVAL;
130
131 iof->uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr));
132 iof->futex_val = READ_ONCE(sqe->addr2);
133 iof->futex_mask = READ_ONCE(sqe->addr3);
134 flags = READ_ONCE(sqe->fd);
135
136 if (flags & ~FUTEX2_VALID_MASK)
137 return -EINVAL;
138
139 iof->futex_flags = futex2_to_flags(flags2: flags);
140 if (!futex_flags_valid(flags: iof->futex_flags))
141 return -EINVAL;
142
143 if (!futex_validate_input(flags: iof->futex_flags, val: iof->futex_val) ||
144 !futex_validate_input(flags: iof->futex_flags, val: iof->futex_mask))
145 return -EINVAL;
146
147 /* Mark as inflight, so file exit cancelation will find it */
148 io_req_track_inflight(req);
149 return 0;
150}
151
152static void io_futex_wakev_fn(struct wake_q_head *wake_q, struct futex_q *q)
153{
154 struct io_kiocb *req = q->wake_data;
155 struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex);
156
157 if (!io_futexv_claim(iof))
158 return;
159 if (unlikely(!__futex_wake_mark(q)))
160 return;
161
162 io_req_set_res(req, res: 0, cflags: 0);
163 req->io_task_work.func = io_futexv_complete;
164 io_req_task_work_add(req);
165}
166
167int io_futexv_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
168{
169 struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex);
170 struct futex_vector *futexv;
171 int ret;
172
173 /* No flags or mask supported for waitv */
174 if (unlikely(sqe->fd || sqe->buf_index || sqe->file_index ||
175 sqe->addr2 || sqe->futex_flags || sqe->addr3))
176 return -EINVAL;
177
178 iof->uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr));
179 iof->futex_nr = READ_ONCE(sqe->len);
180 if (!iof->futex_nr || iof->futex_nr > FUTEX_WAITV_MAX)
181 return -EINVAL;
182
183 futexv = kcalloc(iof->futex_nr, sizeof(*futexv), GFP_KERNEL);
184 if (!futexv)
185 return -ENOMEM;
186
187 ret = futex_parse_waitv(futexv, uwaitv: iof->uaddr, nr_futexes: iof->futex_nr,
188 wake: io_futex_wakev_fn, wake_data: req);
189 if (ret) {
190 kfree(objp: futexv);
191 return ret;
192 }
193
194 /* Mark as inflight, so file exit cancelation will find it */
195 io_req_track_inflight(req);
196 iof->futexv_owned = 0;
197 iof->futexv_unqueued = 0;
198 req->flags |= REQ_F_ASYNC_DATA;
199 req->async_data = futexv;
200 return 0;
201}
202
203static void io_futex_wake_fn(struct wake_q_head *wake_q, struct futex_q *q)
204{
205 struct io_futex_data *ifd = container_of(q, struct io_futex_data, q);
206 struct io_kiocb *req = ifd->req;
207
208 if (unlikely(!__futex_wake_mark(q)))
209 return;
210
211 io_req_set_res(req, res: 0, cflags: 0);
212 req->io_task_work.func = io_futex_complete;
213 io_req_task_work_add(req);
214}
215
216int io_futexv_wait(struct io_kiocb *req, unsigned int issue_flags)
217{
218 struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex);
219 struct futex_vector *futexv = req->async_data;
220 struct io_ring_ctx *ctx = req->ctx;
221 int ret, woken = -1;
222
223 io_ring_submit_lock(ctx, issue_flags);
224
225 ret = futex_wait_multiple_setup(vs: futexv, count: iof->futex_nr, woken: &woken);
226
227 /*
228 * Error case, ret is < 0. Mark the request as failed.
229 */
230 if (unlikely(ret < 0)) {
231 io_ring_submit_unlock(ctx, issue_flags);
232 req_set_fail(req);
233 io_req_set_res(req, res: ret, cflags: 0);
234 io_req_async_data_free(req);
235 return IOU_COMPLETE;
236 }
237
238 /*
239 * 0 return means that we successfully setup the waiters, and that
240 * nobody triggered a wakeup while we were doing so. If the wakeup
241 * happened post setup, the task_work will be run post this issue and
242 * under the submission lock. 1 means We got woken while setting up,
243 * let that side do the completion. Note that
244 * futex_wait_multiple_setup() will have unqueued all the futexes in
245 * this case. Mark us as having done that already, since this is
246 * different from normal wakeup.
247 */
248 if (!ret) {
249 /*
250 * If futex_wait_multiple_setup() returns 0 for a
251 * successful setup, then the task state will not be
252 * runnable. This is fine for the sync syscall, as
253 * it'll be blocking unless we already got one of the
254 * futexes woken, but it obviously won't work for an
255 * async invocation. Mark us runnable again.
256 */
257 __set_current_state(TASK_RUNNING);
258 hlist_add_head(n: &req->hash_node, h: &ctx->futex_list);
259 } else {
260 iof->futexv_unqueued = 1;
261 if (woken != -1)
262 io_req_set_res(req, res: woken, cflags: 0);
263 }
264
265 io_ring_submit_unlock(ctx, issue_flags);
266 return IOU_ISSUE_SKIP_COMPLETE;
267}
268
269int io_futex_wait(struct io_kiocb *req, unsigned int issue_flags)
270{
271 struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex);
272 struct io_ring_ctx *ctx = req->ctx;
273 struct io_futex_data *ifd = NULL;
274 int ret;
275
276 if (!iof->futex_mask) {
277 ret = -EINVAL;
278 goto done;
279 }
280
281 io_ring_submit_lock(ctx, issue_flags);
282 ifd = io_cache_alloc(cache: &ctx->futex_cache, GFP_NOWAIT);
283 if (!ifd) {
284 ret = -ENOMEM;
285 goto done_unlock;
286 }
287
288 req->flags |= REQ_F_ASYNC_DATA;
289 req->async_data = ifd;
290 ifd->q = futex_q_init;
291 ifd->q.bitset = iof->futex_mask;
292 ifd->q.wake = io_futex_wake_fn;
293 ifd->req = req;
294
295 ret = futex_wait_setup(uaddr: iof->uaddr, val: iof->futex_val, flags: iof->futex_flags,
296 q: &ifd->q, NULL, NULL);
297 if (!ret) {
298 hlist_add_head(n: &req->hash_node, h: &ctx->futex_list);
299 io_ring_submit_unlock(ctx, issue_flags);
300
301 return IOU_ISSUE_SKIP_COMPLETE;
302 }
303
304done_unlock:
305 io_ring_submit_unlock(ctx, issue_flags);
306done:
307 if (ret < 0)
308 req_set_fail(req);
309 io_req_set_res(req, res: ret, cflags: 0);
310 io_req_async_data_free(req);
311 return IOU_COMPLETE;
312}
313
314int io_futex_wake(struct io_kiocb *req, unsigned int issue_flags)
315{
316 struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex);
317 int ret;
318
319 /*
320 * Strict flags - ensure that waking 0 futexes yields a 0 result.
321 * See commit 43adf8449510 ("futex: FLAGS_STRICT") for details.
322 */
323 ret = futex_wake(uaddr: iof->uaddr, FLAGS_STRICT | iof->futex_flags,
324 nr_wake: iof->futex_val, bitset: iof->futex_mask);
325 if (ret < 0)
326 req_set_fail(req);
327 io_req_set_res(req, res: ret, cflags: 0);
328 return IOU_COMPLETE;
329}
330