| 1 | // SPDX-License-Identifier: GPL-2.0 | 
|---|
| 2 | /* | 
|---|
| 3 | * Code related to the io_uring_register() syscall | 
|---|
| 4 | * | 
|---|
| 5 | * Copyright (C) 2023 Jens Axboe | 
|---|
| 6 | */ | 
|---|
| 7 | #include <linux/kernel.h> | 
|---|
| 8 | #include <linux/errno.h> | 
|---|
| 9 | #include <linux/syscalls.h> | 
|---|
| 10 | #include <linux/refcount.h> | 
|---|
| 11 | #include <linux/bits.h> | 
|---|
| 12 | #include <linux/fs.h> | 
|---|
| 13 | #include <linux/file.h> | 
|---|
| 14 | #include <linux/slab.h> | 
|---|
| 15 | #include <linux/uaccess.h> | 
|---|
| 16 | #include <linux/nospec.h> | 
|---|
| 17 | #include <linux/compat.h> | 
|---|
| 18 | #include <linux/io_uring.h> | 
|---|
| 19 | #include <linux/io_uring_types.h> | 
|---|
| 20 |  | 
|---|
| 21 | #include "filetable.h" | 
|---|
| 22 | #include "io_uring.h" | 
|---|
| 23 | #include "opdef.h" | 
|---|
| 24 | #include "tctx.h" | 
|---|
| 25 | #include "rsrc.h" | 
|---|
| 26 | #include "sqpoll.h" | 
|---|
| 27 | #include "register.h" | 
|---|
| 28 | #include "cancel.h" | 
|---|
| 29 | #include "kbuf.h" | 
|---|
| 30 | #include "napi.h" | 
|---|
| 31 | #include "eventfd.h" | 
|---|
| 32 | #include "msg_ring.h" | 
|---|
| 33 | #include "memmap.h" | 
|---|
| 34 | #include "zcrx.h" | 
|---|
| 35 | #include "query.h" | 
|---|
| 36 |  | 
|---|
| 37 | #define IORING_MAX_RESTRICTIONS	(IORING_RESTRICTION_LAST + \ | 
|---|
| 38 | IORING_REGISTER_LAST + IORING_OP_LAST) | 
|---|
| 39 |  | 
|---|
| 40 | static __cold int io_probe(struct io_ring_ctx *ctx, void __user *arg, | 
|---|
| 41 | unsigned nr_args) | 
|---|
| 42 | { | 
|---|
| 43 | struct io_uring_probe *p; | 
|---|
| 44 | size_t size; | 
|---|
| 45 | int i, ret; | 
|---|
| 46 |  | 
|---|
| 47 | if (nr_args > IORING_OP_LAST) | 
|---|
| 48 | nr_args = IORING_OP_LAST; | 
|---|
| 49 |  | 
|---|
| 50 | size = struct_size(p, ops, nr_args); | 
|---|
| 51 | p = memdup_user(arg, size); | 
|---|
| 52 | if (IS_ERR(ptr: p)) | 
|---|
| 53 | return PTR_ERR(ptr: p); | 
|---|
| 54 | ret = -EINVAL; | 
|---|
| 55 | if (memchr_inv(s: p, c: 0, n: size)) | 
|---|
| 56 | goto out; | 
|---|
| 57 |  | 
|---|
| 58 | p->last_op = IORING_OP_LAST - 1; | 
|---|
| 59 |  | 
|---|
| 60 | for (i = 0; i < nr_args; i++) { | 
|---|
| 61 | p->ops[i].op = i; | 
|---|
| 62 | if (io_uring_op_supported(opcode: i)) | 
|---|
| 63 | p->ops[i].flags = IO_URING_OP_SUPPORTED; | 
|---|
| 64 | } | 
|---|
| 65 | p->ops_len = i; | 
|---|
| 66 |  | 
|---|
| 67 | ret = 0; | 
|---|
| 68 | if (copy_to_user(to: arg, from: p, n: size)) | 
|---|
| 69 | ret = -EFAULT; | 
|---|
| 70 | out: | 
|---|
| 71 | kfree(objp: p); | 
|---|
| 72 | return ret; | 
|---|
| 73 | } | 
|---|
| 74 |  | 
|---|
| 75 | int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id) | 
|---|
| 76 | { | 
|---|
| 77 | const struct cred *creds; | 
|---|
| 78 |  | 
|---|
| 79 | creds = xa_erase(&ctx->personalities, index: id); | 
|---|
| 80 | if (creds) { | 
|---|
| 81 | put_cred(cred: creds); | 
|---|
| 82 | return 0; | 
|---|
| 83 | } | 
|---|
| 84 |  | 
|---|
| 85 | return -EINVAL; | 
|---|
| 86 | } | 
|---|
| 87 |  | 
|---|
| 88 |  | 
|---|
| 89 | static int io_register_personality(struct io_ring_ctx *ctx) | 
|---|
| 90 | { | 
|---|
| 91 | const struct cred *creds; | 
|---|
| 92 | u32 id; | 
|---|
| 93 | int ret; | 
|---|
| 94 |  | 
|---|
| 95 | creds = get_current_cred(); | 
|---|
| 96 |  | 
|---|
| 97 | ret = xa_alloc_cyclic(xa: &ctx->personalities, id: &id, entry: (void *)creds, | 
|---|
| 98 | XA_LIMIT(0, USHRT_MAX), next: &ctx->pers_next, GFP_KERNEL); | 
|---|
| 99 | if (ret < 0) { | 
|---|
| 100 | put_cred(cred: creds); | 
|---|
| 101 | return ret; | 
|---|
| 102 | } | 
|---|
| 103 | return id; | 
|---|
| 104 | } | 
|---|
| 105 |  | 
|---|
| 106 | static __cold int io_parse_restrictions(void __user *arg, unsigned int nr_args, | 
|---|
| 107 | struct io_restriction *restrictions) | 
|---|
| 108 | { | 
|---|
| 109 | struct io_uring_restriction *res; | 
|---|
| 110 | size_t size; | 
|---|
| 111 | int i, ret; | 
|---|
| 112 |  | 
|---|
| 113 | if (!arg || nr_args > IORING_MAX_RESTRICTIONS) | 
|---|
| 114 | return -EINVAL; | 
|---|
| 115 |  | 
|---|
| 116 | size = array_size(nr_args, sizeof(*res)); | 
|---|
| 117 | if (size == SIZE_MAX) | 
|---|
| 118 | return -EOVERFLOW; | 
|---|
| 119 |  | 
|---|
| 120 | res = memdup_user(arg, size); | 
|---|
| 121 | if (IS_ERR(ptr: res)) | 
|---|
| 122 | return PTR_ERR(ptr: res); | 
|---|
| 123 |  | 
|---|
| 124 | ret = -EINVAL; | 
|---|
| 125 |  | 
|---|
| 126 | for (i = 0; i < nr_args; i++) { | 
|---|
| 127 | switch (res[i].opcode) { | 
|---|
| 128 | case IORING_RESTRICTION_REGISTER_OP: | 
|---|
| 129 | if (res[i].register_op >= IORING_REGISTER_LAST) | 
|---|
| 130 | goto err; | 
|---|
| 131 | __set_bit(res[i].register_op, restrictions->register_op); | 
|---|
| 132 | break; | 
|---|
| 133 | case IORING_RESTRICTION_SQE_OP: | 
|---|
| 134 | if (res[i].sqe_op >= IORING_OP_LAST) | 
|---|
| 135 | goto err; | 
|---|
| 136 | __set_bit(res[i].sqe_op, restrictions->sqe_op); | 
|---|
| 137 | break; | 
|---|
| 138 | case IORING_RESTRICTION_SQE_FLAGS_ALLOWED: | 
|---|
| 139 | restrictions->sqe_flags_allowed = res[i].sqe_flags; | 
|---|
| 140 | break; | 
|---|
| 141 | case IORING_RESTRICTION_SQE_FLAGS_REQUIRED: | 
|---|
| 142 | restrictions->sqe_flags_required = res[i].sqe_flags; | 
|---|
| 143 | break; | 
|---|
| 144 | default: | 
|---|
| 145 | goto err; | 
|---|
| 146 | } | 
|---|
| 147 | } | 
|---|
| 148 |  | 
|---|
| 149 | ret = 0; | 
|---|
| 150 |  | 
|---|
| 151 | err: | 
|---|
| 152 | kfree(objp: res); | 
|---|
| 153 | return ret; | 
|---|
| 154 | } | 
|---|
| 155 |  | 
|---|
| 156 | static __cold int io_register_restrictions(struct io_ring_ctx *ctx, | 
|---|
| 157 | void __user *arg, unsigned int nr_args) | 
|---|
| 158 | { | 
|---|
| 159 | int ret; | 
|---|
| 160 |  | 
|---|
| 161 | /* Restrictions allowed only if rings started disabled */ | 
|---|
| 162 | if (!(ctx->flags & IORING_SETUP_R_DISABLED)) | 
|---|
| 163 | return -EBADFD; | 
|---|
| 164 |  | 
|---|
| 165 | /* We allow only a single restrictions registration */ | 
|---|
| 166 | if (ctx->restrictions.registered) | 
|---|
| 167 | return -EBUSY; | 
|---|
| 168 |  | 
|---|
| 169 | ret = io_parse_restrictions(arg, nr_args, restrictions: &ctx->restrictions); | 
|---|
| 170 | /* Reset all restrictions if an error happened */ | 
|---|
| 171 | if (ret != 0) | 
|---|
| 172 | memset(s: &ctx->restrictions, c: 0, n: sizeof(ctx->restrictions)); | 
|---|
| 173 | else | 
|---|
| 174 | ctx->restrictions.registered = true; | 
|---|
| 175 | return ret; | 
|---|
| 176 | } | 
|---|
| 177 |  | 
|---|
| 178 | static int io_register_enable_rings(struct io_ring_ctx *ctx) | 
|---|
| 179 | { | 
|---|
| 180 | if (!(ctx->flags & IORING_SETUP_R_DISABLED)) | 
|---|
| 181 | return -EBADFD; | 
|---|
| 182 |  | 
|---|
| 183 | if (ctx->flags & IORING_SETUP_SINGLE_ISSUER && !ctx->submitter_task) { | 
|---|
| 184 | WRITE_ONCE(ctx->submitter_task, get_task_struct(current)); | 
|---|
| 185 | /* | 
|---|
| 186 | * Lazy activation attempts would fail if it was polled before | 
|---|
| 187 | * submitter_task is set. | 
|---|
| 188 | */ | 
|---|
| 189 | if (wq_has_sleeper(wq_head: &ctx->poll_wq)) | 
|---|
| 190 | io_activate_pollwq(ctx); | 
|---|
| 191 | } | 
|---|
| 192 |  | 
|---|
| 193 | if (ctx->restrictions.registered) | 
|---|
| 194 | ctx->restricted = 1; | 
|---|
| 195 |  | 
|---|
| 196 | ctx->flags &= ~IORING_SETUP_R_DISABLED; | 
|---|
| 197 | if (ctx->sq_data && wq_has_sleeper(wq_head: &ctx->sq_data->wait)) | 
|---|
| 198 | wake_up(&ctx->sq_data->wait); | 
|---|
| 199 | return 0; | 
|---|
| 200 | } | 
|---|
| 201 |  | 
|---|
| 202 | static __cold int __io_register_iowq_aff(struct io_ring_ctx *ctx, | 
|---|
| 203 | cpumask_var_t new_mask) | 
|---|
| 204 | { | 
|---|
| 205 | int ret; | 
|---|
| 206 |  | 
|---|
| 207 | if (!(ctx->flags & IORING_SETUP_SQPOLL)) { | 
|---|
| 208 | ret = io_wq_cpu_affinity(current->io_uring, mask: new_mask); | 
|---|
| 209 | } else { | 
|---|
| 210 | mutex_unlock(lock: &ctx->uring_lock); | 
|---|
| 211 | ret = io_sqpoll_wq_cpu_affinity(ctx, mask: new_mask); | 
|---|
| 212 | mutex_lock(lock: &ctx->uring_lock); | 
|---|
| 213 | } | 
|---|
| 214 |  | 
|---|
| 215 | return ret; | 
|---|
| 216 | } | 
|---|
| 217 |  | 
|---|
| 218 | static __cold int io_register_iowq_aff(struct io_ring_ctx *ctx, | 
|---|
| 219 | void __user *arg, unsigned len) | 
|---|
| 220 | { | 
|---|
| 221 | cpumask_var_t new_mask; | 
|---|
| 222 | int ret; | 
|---|
| 223 |  | 
|---|
| 224 | if (!alloc_cpumask_var(mask: &new_mask, GFP_KERNEL)) | 
|---|
| 225 | return -ENOMEM; | 
|---|
| 226 |  | 
|---|
| 227 | cpumask_clear(dstp: new_mask); | 
|---|
| 228 | if (len > cpumask_size()) | 
|---|
| 229 | len = cpumask_size(); | 
|---|
| 230 |  | 
|---|
| 231 | #ifdef CONFIG_COMPAT | 
|---|
| 232 | if (in_compat_syscall()) | 
|---|
| 233 | ret = compat_get_bitmap(cpumask_bits(new_mask), | 
|---|
| 234 | umask: (const compat_ulong_t __user *)arg, | 
|---|
| 235 | bitmap_size: len * 8 /* CHAR_BIT */); | 
|---|
| 236 | else | 
|---|
| 237 | #endif | 
|---|
| 238 | ret = copy_from_user(to: new_mask, from: arg, n: len); | 
|---|
| 239 |  | 
|---|
| 240 | if (ret) { | 
|---|
| 241 | free_cpumask_var(mask: new_mask); | 
|---|
| 242 | return -EFAULT; | 
|---|
| 243 | } | 
|---|
| 244 |  | 
|---|
| 245 | ret = __io_register_iowq_aff(ctx, new_mask); | 
|---|
| 246 | free_cpumask_var(mask: new_mask); | 
|---|
| 247 | return ret; | 
|---|
| 248 | } | 
|---|
| 249 |  | 
|---|
| 250 | static __cold int io_unregister_iowq_aff(struct io_ring_ctx *ctx) | 
|---|
| 251 | { | 
|---|
| 252 | return __io_register_iowq_aff(ctx, NULL); | 
|---|
| 253 | } | 
|---|
| 254 |  | 
|---|
| 255 | static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx, | 
|---|
| 256 | void __user *arg) | 
|---|
| 257 | __must_hold(&ctx->uring_lock) | 
|---|
| 258 | { | 
|---|
| 259 | struct io_tctx_node *node; | 
|---|
| 260 | struct io_uring_task *tctx = NULL; | 
|---|
| 261 | struct io_sq_data *sqd = NULL; | 
|---|
| 262 | __u32 new_count[2]; | 
|---|
| 263 | int i, ret; | 
|---|
| 264 |  | 
|---|
| 265 | if (copy_from_user(to: new_count, from: arg, n: sizeof(new_count))) | 
|---|
| 266 | return -EFAULT; | 
|---|
| 267 | for (i = 0; i < ARRAY_SIZE(new_count); i++) | 
|---|
| 268 | if (new_count[i] > INT_MAX) | 
|---|
| 269 | return -EINVAL; | 
|---|
| 270 |  | 
|---|
| 271 | if (ctx->flags & IORING_SETUP_SQPOLL) { | 
|---|
| 272 | sqd = ctx->sq_data; | 
|---|
| 273 | if (sqd) { | 
|---|
| 274 | struct task_struct *tsk; | 
|---|
| 275 |  | 
|---|
| 276 | /* | 
|---|
| 277 | * Observe the correct sqd->lock -> ctx->uring_lock | 
|---|
| 278 | * ordering. Fine to drop uring_lock here, we hold | 
|---|
| 279 | * a ref to the ctx. | 
|---|
| 280 | */ | 
|---|
| 281 | refcount_inc(r: &sqd->refs); | 
|---|
| 282 | mutex_unlock(lock: &ctx->uring_lock); | 
|---|
| 283 | mutex_lock(lock: &sqd->lock); | 
|---|
| 284 | mutex_lock(lock: &ctx->uring_lock); | 
|---|
| 285 | tsk = sqpoll_task_locked(sqd); | 
|---|
| 286 | if (tsk) | 
|---|
| 287 | tctx = tsk->io_uring; | 
|---|
| 288 | } | 
|---|
| 289 | } else { | 
|---|
| 290 | tctx = current->io_uring; | 
|---|
| 291 | } | 
|---|
| 292 |  | 
|---|
| 293 | BUILD_BUG_ON(sizeof(new_count) != sizeof(ctx->iowq_limits)); | 
|---|
| 294 |  | 
|---|
| 295 | for (i = 0; i < ARRAY_SIZE(new_count); i++) | 
|---|
| 296 | if (new_count[i]) | 
|---|
| 297 | ctx->iowq_limits[i] = new_count[i]; | 
|---|
| 298 | ctx->iowq_limits_set = true; | 
|---|
| 299 |  | 
|---|
| 300 | if (tctx && tctx->io_wq) { | 
|---|
| 301 | ret = io_wq_max_workers(wq: tctx->io_wq, new_count); | 
|---|
| 302 | if (ret) | 
|---|
| 303 | goto err; | 
|---|
| 304 | } else { | 
|---|
| 305 | memset(s: new_count, c: 0, n: sizeof(new_count)); | 
|---|
| 306 | } | 
|---|
| 307 |  | 
|---|
| 308 | if (sqd) { | 
|---|
| 309 | mutex_unlock(lock: &ctx->uring_lock); | 
|---|
| 310 | mutex_unlock(lock: &sqd->lock); | 
|---|
| 311 | io_put_sq_data(sqd); | 
|---|
| 312 | mutex_lock(lock: &ctx->uring_lock); | 
|---|
| 313 | } | 
|---|
| 314 |  | 
|---|
| 315 | if (copy_to_user(to: arg, from: new_count, n: sizeof(new_count))) | 
|---|
| 316 | return -EFAULT; | 
|---|
| 317 |  | 
|---|
| 318 | /* that's it for SQPOLL, only the SQPOLL task creates requests */ | 
|---|
| 319 | if (sqd) | 
|---|
| 320 | return 0; | 
|---|
| 321 |  | 
|---|
| 322 | /* now propagate the restriction to all registered users */ | 
|---|
| 323 | list_for_each_entry(node, &ctx->tctx_list, ctx_node) { | 
|---|
| 324 | tctx = node->task->io_uring; | 
|---|
| 325 | if (WARN_ON_ONCE(!tctx->io_wq)) | 
|---|
| 326 | continue; | 
|---|
| 327 |  | 
|---|
| 328 | for (i = 0; i < ARRAY_SIZE(new_count); i++) | 
|---|
| 329 | new_count[i] = ctx->iowq_limits[i]; | 
|---|
| 330 | /* ignore errors, it always returns zero anyway */ | 
|---|
| 331 | (void)io_wq_max_workers(wq: tctx->io_wq, new_count); | 
|---|
| 332 | } | 
|---|
| 333 | return 0; | 
|---|
| 334 | err: | 
|---|
| 335 | if (sqd) { | 
|---|
| 336 | mutex_unlock(lock: &ctx->uring_lock); | 
|---|
| 337 | mutex_unlock(lock: &sqd->lock); | 
|---|
| 338 | io_put_sq_data(sqd); | 
|---|
| 339 | mutex_lock(lock: &ctx->uring_lock); | 
|---|
| 340 | } | 
|---|
| 341 | return ret; | 
|---|
| 342 | } | 
|---|
| 343 |  | 
|---|
| 344 | static int io_register_clock(struct io_ring_ctx *ctx, | 
|---|
| 345 | struct io_uring_clock_register __user *arg) | 
|---|
| 346 | { | 
|---|
| 347 | struct io_uring_clock_register reg; | 
|---|
| 348 |  | 
|---|
| 349 | if (copy_from_user(to: ®, from: arg, n: sizeof(reg))) | 
|---|
| 350 | return -EFAULT; | 
|---|
| 351 | if (memchr_inv(s: ®.__resv, c: 0, n: sizeof(reg.__resv))) | 
|---|
| 352 | return -EINVAL; | 
|---|
| 353 |  | 
|---|
| 354 | switch (reg.clockid) { | 
|---|
| 355 | case CLOCK_MONOTONIC: | 
|---|
| 356 | ctx->clock_offset = 0; | 
|---|
| 357 | break; | 
|---|
| 358 | case CLOCK_BOOTTIME: | 
|---|
| 359 | ctx->clock_offset = TK_OFFS_BOOT; | 
|---|
| 360 | break; | 
|---|
| 361 | default: | 
|---|
| 362 | return -EINVAL; | 
|---|
| 363 | } | 
|---|
| 364 |  | 
|---|
| 365 | ctx->clockid = reg.clockid; | 
|---|
| 366 | return 0; | 
|---|
| 367 | } | 
|---|
| 368 |  | 
|---|
| 369 | /* | 
|---|
| 370 | * State to maintain until we can swap. Both new and old state, used for | 
|---|
| 371 | * either mapping or freeing. | 
|---|
| 372 | */ | 
|---|
| 373 | struct io_ring_ctx_rings { | 
|---|
| 374 | struct io_rings *rings; | 
|---|
| 375 | struct io_uring_sqe *sq_sqes; | 
|---|
| 376 |  | 
|---|
| 377 | struct io_mapped_region sq_region; | 
|---|
| 378 | struct io_mapped_region ring_region; | 
|---|
| 379 | }; | 
|---|
| 380 |  | 
|---|
| 381 | static void io_register_free_rings(struct io_ring_ctx *ctx, | 
|---|
| 382 | struct io_uring_params *p, | 
|---|
| 383 | struct io_ring_ctx_rings *r) | 
|---|
| 384 | { | 
|---|
| 385 | io_free_region(ctx, mr: &r->sq_region); | 
|---|
| 386 | io_free_region(ctx, mr: &r->ring_region); | 
|---|
| 387 | } | 
|---|
| 388 |  | 
|---|
| 389 | #define swap_old(ctx, o, n, field)		\ | 
|---|
| 390 | do {					\ | 
|---|
| 391 | (o).field = (ctx)->field;	\ | 
|---|
| 392 | (ctx)->field = (n).field;	\ | 
|---|
| 393 | } while (0) | 
|---|
| 394 |  | 
|---|
| 395 | #define RESIZE_FLAGS	(IORING_SETUP_CQSIZE | IORING_SETUP_CLAMP) | 
|---|
| 396 | #define COPY_FLAGS	(IORING_SETUP_NO_SQARRAY | IORING_SETUP_SQE128 | \ | 
|---|
| 397 | IORING_SETUP_CQE32 | IORING_SETUP_NO_MMAP | \ | 
|---|
| 398 | IORING_SETUP_CQE_MIXED) | 
|---|
| 399 |  | 
|---|
| 400 | static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg) | 
|---|
| 401 | { | 
|---|
| 402 | struct io_uring_region_desc rd; | 
|---|
| 403 | struct io_ring_ctx_rings o = { }, n = { }, *to_free = NULL; | 
|---|
| 404 | size_t size, sq_array_offset; | 
|---|
| 405 | unsigned i, tail, old_head; | 
|---|
| 406 | struct io_uring_params p; | 
|---|
| 407 | int ret; | 
|---|
| 408 |  | 
|---|
| 409 | /* limited to DEFER_TASKRUN for now */ | 
|---|
| 410 | if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN)) | 
|---|
| 411 | return -EINVAL; | 
|---|
| 412 | if (copy_from_user(to: &p, from: arg, n: sizeof(p))) | 
|---|
| 413 | return -EFAULT; | 
|---|
| 414 | if (p.flags & ~RESIZE_FLAGS) | 
|---|
| 415 | return -EINVAL; | 
|---|
| 416 |  | 
|---|
| 417 | /* properties that are always inherited */ | 
|---|
| 418 | p.flags |= (ctx->flags & COPY_FLAGS); | 
|---|
| 419 |  | 
|---|
| 420 | ret = io_uring_fill_params(entries: p.sq_entries, p: &p); | 
|---|
| 421 | if (unlikely(ret)) | 
|---|
| 422 | return ret; | 
|---|
| 423 |  | 
|---|
| 424 | /* nothing to do, but copy params back */ | 
|---|
| 425 | if (p.sq_entries == ctx->sq_entries && p.cq_entries == ctx->cq_entries) { | 
|---|
| 426 | if (copy_to_user(to: arg, from: &p, n: sizeof(p))) | 
|---|
| 427 | return -EFAULT; | 
|---|
| 428 | return 0; | 
|---|
| 429 | } | 
|---|
| 430 |  | 
|---|
| 431 | size = rings_size(flags: p.flags, sq_entries: p.sq_entries, cq_entries: p.cq_entries, | 
|---|
| 432 | sq_offset: &sq_array_offset); | 
|---|
| 433 | if (size == SIZE_MAX) | 
|---|
| 434 | return -EOVERFLOW; | 
|---|
| 435 |  | 
|---|
| 436 | memset(s: &rd, c: 0, n: sizeof(rd)); | 
|---|
| 437 | rd.size = PAGE_ALIGN(size); | 
|---|
| 438 | if (p.flags & IORING_SETUP_NO_MMAP) { | 
|---|
| 439 | rd.user_addr = p.cq_off.user_addr; | 
|---|
| 440 | rd.flags |= IORING_MEM_REGION_TYPE_USER; | 
|---|
| 441 | } | 
|---|
| 442 | ret = io_create_region_mmap_safe(ctx, mr: &n.ring_region, reg: &rd, IORING_OFF_CQ_RING); | 
|---|
| 443 | if (ret) { | 
|---|
| 444 | io_register_free_rings(ctx, p: &p, r: &n); | 
|---|
| 445 | return ret; | 
|---|
| 446 | } | 
|---|
| 447 | n.rings = io_region_get_ptr(mr: &n.ring_region); | 
|---|
| 448 |  | 
|---|
| 449 | /* | 
|---|
| 450 | * At this point n.rings is shared with userspace, just like o.rings | 
|---|
| 451 | * is as well. While we don't expect userspace to modify it while | 
|---|
| 452 | * a resize is in progress, and it's most likely that userspace will | 
|---|
| 453 | * shoot itself in the foot if it does, we can't always assume good | 
|---|
| 454 | * intent... Use read/write once helpers from here on to indicate the | 
|---|
| 455 | * shared nature of it. | 
|---|
| 456 | */ | 
|---|
| 457 | WRITE_ONCE(n.rings->sq_ring_mask, p.sq_entries - 1); | 
|---|
| 458 | WRITE_ONCE(n.rings->cq_ring_mask, p.cq_entries - 1); | 
|---|
| 459 | WRITE_ONCE(n.rings->sq_ring_entries, p.sq_entries); | 
|---|
| 460 | WRITE_ONCE(n.rings->cq_ring_entries, p.cq_entries); | 
|---|
| 461 |  | 
|---|
| 462 | if (copy_to_user(to: arg, from: &p, n: sizeof(p))) { | 
|---|
| 463 | io_register_free_rings(ctx, p: &p, r: &n); | 
|---|
| 464 | return -EFAULT; | 
|---|
| 465 | } | 
|---|
| 466 |  | 
|---|
| 467 | if (p.flags & IORING_SETUP_SQE128) | 
|---|
| 468 | size = array_size(2 * sizeof(struct io_uring_sqe), p.sq_entries); | 
|---|
| 469 | else | 
|---|
| 470 | size = array_size(sizeof(struct io_uring_sqe), p.sq_entries); | 
|---|
| 471 | if (size == SIZE_MAX) { | 
|---|
| 472 | io_register_free_rings(ctx, p: &p, r: &n); | 
|---|
| 473 | return -EOVERFLOW; | 
|---|
| 474 | } | 
|---|
| 475 |  | 
|---|
| 476 | memset(s: &rd, c: 0, n: sizeof(rd)); | 
|---|
| 477 | rd.size = PAGE_ALIGN(size); | 
|---|
| 478 | if (p.flags & IORING_SETUP_NO_MMAP) { | 
|---|
| 479 | rd.user_addr = p.sq_off.user_addr; | 
|---|
| 480 | rd.flags |= IORING_MEM_REGION_TYPE_USER; | 
|---|
| 481 | } | 
|---|
| 482 | ret = io_create_region_mmap_safe(ctx, mr: &n.sq_region, reg: &rd, IORING_OFF_SQES); | 
|---|
| 483 | if (ret) { | 
|---|
| 484 | io_register_free_rings(ctx, p: &p, r: &n); | 
|---|
| 485 | return ret; | 
|---|
| 486 | } | 
|---|
| 487 | n.sq_sqes = io_region_get_ptr(mr: &n.sq_region); | 
|---|
| 488 |  | 
|---|
| 489 | /* | 
|---|
| 490 | * If using SQPOLL, park the thread | 
|---|
| 491 | */ | 
|---|
| 492 | if (ctx->sq_data) { | 
|---|
| 493 | mutex_unlock(lock: &ctx->uring_lock); | 
|---|
| 494 | io_sq_thread_park(sqd: ctx->sq_data); | 
|---|
| 495 | mutex_lock(lock: &ctx->uring_lock); | 
|---|
| 496 | } | 
|---|
| 497 |  | 
|---|
| 498 | /* | 
|---|
| 499 | * We'll do the swap. Grab the ctx->mmap_lock, which will exclude | 
|---|
| 500 | * any new mmap's on the ring fd. Clear out existing mappings to prevent | 
|---|
| 501 | * mmap from seeing them, as we'll unmap them. Any attempt to mmap | 
|---|
| 502 | * existing rings beyond this point will fail. Not that it could proceed | 
|---|
| 503 | * at this point anyway, as the io_uring mmap side needs go grab the | 
|---|
| 504 | * ctx->mmap_lock as well. Likewise, hold the completion lock over the | 
|---|
| 505 | * duration of the actual swap. | 
|---|
| 506 | */ | 
|---|
| 507 | mutex_lock(lock: &ctx->mmap_lock); | 
|---|
| 508 | spin_lock(lock: &ctx->completion_lock); | 
|---|
| 509 | o.rings = ctx->rings; | 
|---|
| 510 | ctx->rings = NULL; | 
|---|
| 511 | o.sq_sqes = ctx->sq_sqes; | 
|---|
| 512 | ctx->sq_sqes = NULL; | 
|---|
| 513 |  | 
|---|
| 514 | /* | 
|---|
| 515 | * Now copy SQ and CQ entries, if any. If either of the destination | 
|---|
| 516 | * rings can't hold what is already there, then fail the operation. | 
|---|
| 517 | */ | 
|---|
| 518 | tail = READ_ONCE(o.rings->sq.tail); | 
|---|
| 519 | old_head = READ_ONCE(o.rings->sq.head); | 
|---|
| 520 | if (tail - old_head > p.sq_entries) | 
|---|
| 521 | goto overflow; | 
|---|
| 522 | for (i = old_head; i < tail; i++) { | 
|---|
| 523 | unsigned src_head = i & (ctx->sq_entries - 1); | 
|---|
| 524 | unsigned dst_head = i & (p.sq_entries - 1); | 
|---|
| 525 |  | 
|---|
| 526 | n.sq_sqes[dst_head] = o.sq_sqes[src_head]; | 
|---|
| 527 | } | 
|---|
| 528 | WRITE_ONCE(n.rings->sq.head, old_head); | 
|---|
| 529 | WRITE_ONCE(n.rings->sq.tail, tail); | 
|---|
| 530 |  | 
|---|
| 531 | tail = READ_ONCE(o.rings->cq.tail); | 
|---|
| 532 | old_head = READ_ONCE(o.rings->cq.head); | 
|---|
| 533 | if (tail - old_head > p.cq_entries) { | 
|---|
| 534 | overflow: | 
|---|
| 535 | /* restore old rings, and return -EOVERFLOW via cleanup path */ | 
|---|
| 536 | ctx->rings = o.rings; | 
|---|
| 537 | ctx->sq_sqes = o.sq_sqes; | 
|---|
| 538 | to_free = &n; | 
|---|
| 539 | ret = -EOVERFLOW; | 
|---|
| 540 | goto out; | 
|---|
| 541 | } | 
|---|
| 542 | for (i = old_head; i < tail; i++) { | 
|---|
| 543 | unsigned src_head = i & (ctx->cq_entries - 1); | 
|---|
| 544 | unsigned dst_head = i & (p.cq_entries - 1); | 
|---|
| 545 |  | 
|---|
| 546 | n.rings->cqes[dst_head] = o.rings->cqes[src_head]; | 
|---|
| 547 | } | 
|---|
| 548 | WRITE_ONCE(n.rings->cq.head, old_head); | 
|---|
| 549 | WRITE_ONCE(n.rings->cq.tail, tail); | 
|---|
| 550 | /* invalidate cached cqe refill */ | 
|---|
| 551 | ctx->cqe_cached = ctx->cqe_sentinel = NULL; | 
|---|
| 552 |  | 
|---|
| 553 | WRITE_ONCE(n.rings->sq_dropped, READ_ONCE(o.rings->sq_dropped)); | 
|---|
| 554 | atomic_set(v: &n.rings->sq_flags, i: atomic_read(v: &o.rings->sq_flags)); | 
|---|
| 555 | WRITE_ONCE(n.rings->cq_flags, READ_ONCE(o.rings->cq_flags)); | 
|---|
| 556 | WRITE_ONCE(n.rings->cq_overflow, READ_ONCE(o.rings->cq_overflow)); | 
|---|
| 557 |  | 
|---|
| 558 | /* all done, store old pointers and assign new ones */ | 
|---|
| 559 | if (!(ctx->flags & IORING_SETUP_NO_SQARRAY)) | 
|---|
| 560 | ctx->sq_array = (u32 *)((char *)n.rings + sq_array_offset); | 
|---|
| 561 |  | 
|---|
| 562 | ctx->sq_entries = p.sq_entries; | 
|---|
| 563 | ctx->cq_entries = p.cq_entries; | 
|---|
| 564 |  | 
|---|
| 565 | ctx->rings = n.rings; | 
|---|
| 566 | ctx->sq_sqes = n.sq_sqes; | 
|---|
| 567 | swap_old(ctx, o, n, ring_region); | 
|---|
| 568 | swap_old(ctx, o, n, sq_region); | 
|---|
| 569 | to_free = &o; | 
|---|
| 570 | ret = 0; | 
|---|
| 571 | out: | 
|---|
| 572 | spin_unlock(lock: &ctx->completion_lock); | 
|---|
| 573 | mutex_unlock(lock: &ctx->mmap_lock); | 
|---|
| 574 | io_register_free_rings(ctx, p: &p, r: to_free); | 
|---|
| 575 |  | 
|---|
| 576 | if (ctx->sq_data) | 
|---|
| 577 | io_sq_thread_unpark(sqd: ctx->sq_data); | 
|---|
| 578 |  | 
|---|
| 579 | return ret; | 
|---|
| 580 | } | 
|---|
| 581 |  | 
|---|
| 582 | static int io_register_mem_region(struct io_ring_ctx *ctx, void __user *uarg) | 
|---|
| 583 | { | 
|---|
| 584 | struct io_uring_mem_region_reg __user *reg_uptr = uarg; | 
|---|
| 585 | struct io_uring_mem_region_reg reg; | 
|---|
| 586 | struct io_uring_region_desc __user *rd_uptr; | 
|---|
| 587 | struct io_uring_region_desc rd; | 
|---|
| 588 | int ret; | 
|---|
| 589 |  | 
|---|
| 590 | if (io_region_is_set(mr: &ctx->param_region)) | 
|---|
| 591 | return -EBUSY; | 
|---|
| 592 | if (copy_from_user(to: ®, from: reg_uptr, n: sizeof(reg))) | 
|---|
| 593 | return -EFAULT; | 
|---|
| 594 | rd_uptr = u64_to_user_ptr(reg.region_uptr); | 
|---|
| 595 | if (copy_from_user(to: &rd, from: rd_uptr, n: sizeof(rd))) | 
|---|
| 596 | return -EFAULT; | 
|---|
| 597 | if (memchr_inv(s: ®.__resv, c: 0, n: sizeof(reg.__resv))) | 
|---|
| 598 | return -EINVAL; | 
|---|
| 599 | if (reg.flags & ~IORING_MEM_REGION_REG_WAIT_ARG) | 
|---|
| 600 | return -EINVAL; | 
|---|
| 601 |  | 
|---|
| 602 | /* | 
|---|
| 603 | * This ensures there are no waiters. Waiters are unlocked and it's | 
|---|
| 604 | * hard to synchronise with them, especially if we need to initialise | 
|---|
| 605 | * the region. | 
|---|
| 606 | */ | 
|---|
| 607 | if ((reg.flags & IORING_MEM_REGION_REG_WAIT_ARG) && | 
|---|
| 608 | !(ctx->flags & IORING_SETUP_R_DISABLED)) | 
|---|
| 609 | return -EINVAL; | 
|---|
| 610 |  | 
|---|
| 611 | ret = io_create_region_mmap_safe(ctx, mr: &ctx->param_region, reg: &rd, | 
|---|
| 612 | IORING_MAP_OFF_PARAM_REGION); | 
|---|
| 613 | if (ret) | 
|---|
| 614 | return ret; | 
|---|
| 615 | if (copy_to_user(to: rd_uptr, from: &rd, n: sizeof(rd))) { | 
|---|
| 616 | io_free_region(ctx, mr: &ctx->param_region); | 
|---|
| 617 | return -EFAULT; | 
|---|
| 618 | } | 
|---|
| 619 |  | 
|---|
| 620 | if (reg.flags & IORING_MEM_REGION_REG_WAIT_ARG) { | 
|---|
| 621 | ctx->cq_wait_arg = io_region_get_ptr(mr: &ctx->param_region); | 
|---|
| 622 | ctx->cq_wait_size = rd.size; | 
|---|
| 623 | } | 
|---|
| 624 | return 0; | 
|---|
| 625 | } | 
|---|
| 626 |  | 
|---|
| 627 | static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, | 
|---|
| 628 | void __user *arg, unsigned nr_args) | 
|---|
| 629 | __releases(ctx->uring_lock) | 
|---|
| 630 | __acquires(ctx->uring_lock) | 
|---|
| 631 | { | 
|---|
| 632 | int ret; | 
|---|
| 633 |  | 
|---|
| 634 | /* | 
|---|
| 635 | * We don't quiesce the refs for register anymore and so it can't be | 
|---|
| 636 | * dying as we're holding a file ref here. | 
|---|
| 637 | */ | 
|---|
| 638 | if (WARN_ON_ONCE(percpu_ref_is_dying(&ctx->refs))) | 
|---|
| 639 | return -ENXIO; | 
|---|
| 640 |  | 
|---|
| 641 | if (ctx->submitter_task && ctx->submitter_task != current) | 
|---|
| 642 | return -EEXIST; | 
|---|
| 643 |  | 
|---|
| 644 | if (ctx->restricted) { | 
|---|
| 645 | opcode = array_index_nospec(opcode, IORING_REGISTER_LAST); | 
|---|
| 646 | if (!test_bit(opcode, ctx->restrictions.register_op)) | 
|---|
| 647 | return -EACCES; | 
|---|
| 648 | } | 
|---|
| 649 |  | 
|---|
| 650 | switch (opcode) { | 
|---|
| 651 | case IORING_REGISTER_BUFFERS: | 
|---|
| 652 | ret = -EFAULT; | 
|---|
| 653 | if (!arg) | 
|---|
| 654 | break; | 
|---|
| 655 | ret = io_sqe_buffers_register(ctx, arg, nr_args, NULL); | 
|---|
| 656 | break; | 
|---|
| 657 | case IORING_UNREGISTER_BUFFERS: | 
|---|
| 658 | ret = -EINVAL; | 
|---|
| 659 | if (arg || nr_args) | 
|---|
| 660 | break; | 
|---|
| 661 | ret = io_sqe_buffers_unregister(ctx); | 
|---|
| 662 | break; | 
|---|
| 663 | case IORING_REGISTER_FILES: | 
|---|
| 664 | ret = -EFAULT; | 
|---|
| 665 | if (!arg) | 
|---|
| 666 | break; | 
|---|
| 667 | ret = io_sqe_files_register(ctx, arg, nr_args, NULL); | 
|---|
| 668 | break; | 
|---|
| 669 | case IORING_UNREGISTER_FILES: | 
|---|
| 670 | ret = -EINVAL; | 
|---|
| 671 | if (arg || nr_args) | 
|---|
| 672 | break; | 
|---|
| 673 | ret = io_sqe_files_unregister(ctx); | 
|---|
| 674 | break; | 
|---|
| 675 | case IORING_REGISTER_FILES_UPDATE: | 
|---|
| 676 | ret = io_register_files_update(ctx, arg, nr_args); | 
|---|
| 677 | break; | 
|---|
| 678 | case IORING_REGISTER_EVENTFD: | 
|---|
| 679 | ret = -EINVAL; | 
|---|
| 680 | if (nr_args != 1) | 
|---|
| 681 | break; | 
|---|
| 682 | ret = io_eventfd_register(ctx, arg, eventfd_async: 0); | 
|---|
| 683 | break; | 
|---|
| 684 | case IORING_REGISTER_EVENTFD_ASYNC: | 
|---|
| 685 | ret = -EINVAL; | 
|---|
| 686 | if (nr_args != 1) | 
|---|
| 687 | break; | 
|---|
| 688 | ret = io_eventfd_register(ctx, arg, eventfd_async: 1); | 
|---|
| 689 | break; | 
|---|
| 690 | case IORING_UNREGISTER_EVENTFD: | 
|---|
| 691 | ret = -EINVAL; | 
|---|
| 692 | if (arg || nr_args) | 
|---|
| 693 | break; | 
|---|
| 694 | ret = io_eventfd_unregister(ctx); | 
|---|
| 695 | break; | 
|---|
| 696 | case IORING_REGISTER_PROBE: | 
|---|
| 697 | ret = -EINVAL; | 
|---|
| 698 | if (!arg || nr_args > 256) | 
|---|
| 699 | break; | 
|---|
| 700 | ret = io_probe(ctx, arg, nr_args); | 
|---|
| 701 | break; | 
|---|
| 702 | case IORING_REGISTER_PERSONALITY: | 
|---|
| 703 | ret = -EINVAL; | 
|---|
| 704 | if (arg || nr_args) | 
|---|
| 705 | break; | 
|---|
| 706 | ret = io_register_personality(ctx); | 
|---|
| 707 | break; | 
|---|
| 708 | case IORING_UNREGISTER_PERSONALITY: | 
|---|
| 709 | ret = -EINVAL; | 
|---|
| 710 | if (arg) | 
|---|
| 711 | break; | 
|---|
| 712 | ret = io_unregister_personality(ctx, id: nr_args); | 
|---|
| 713 | break; | 
|---|
| 714 | case IORING_REGISTER_ENABLE_RINGS: | 
|---|
| 715 | ret = -EINVAL; | 
|---|
| 716 | if (arg || nr_args) | 
|---|
| 717 | break; | 
|---|
| 718 | ret = io_register_enable_rings(ctx); | 
|---|
| 719 | break; | 
|---|
| 720 | case IORING_REGISTER_RESTRICTIONS: | 
|---|
| 721 | ret = io_register_restrictions(ctx, arg, nr_args); | 
|---|
| 722 | break; | 
|---|
| 723 | case IORING_REGISTER_FILES2: | 
|---|
| 724 | ret = io_register_rsrc(ctx, arg, size: nr_args, type: IORING_RSRC_FILE); | 
|---|
| 725 | break; | 
|---|
| 726 | case IORING_REGISTER_FILES_UPDATE2: | 
|---|
| 727 | ret = io_register_rsrc_update(ctx, arg, size: nr_args, | 
|---|
| 728 | type: IORING_RSRC_FILE); | 
|---|
| 729 | break; | 
|---|
| 730 | case IORING_REGISTER_BUFFERS2: | 
|---|
| 731 | ret = io_register_rsrc(ctx, arg, size: nr_args, type: IORING_RSRC_BUFFER); | 
|---|
| 732 | break; | 
|---|
| 733 | case IORING_REGISTER_BUFFERS_UPDATE: | 
|---|
| 734 | ret = io_register_rsrc_update(ctx, arg, size: nr_args, | 
|---|
| 735 | type: IORING_RSRC_BUFFER); | 
|---|
| 736 | break; | 
|---|
| 737 | case IORING_REGISTER_IOWQ_AFF: | 
|---|
| 738 | ret = -EINVAL; | 
|---|
| 739 | if (!arg || !nr_args) | 
|---|
| 740 | break; | 
|---|
| 741 | ret = io_register_iowq_aff(ctx, arg, len: nr_args); | 
|---|
| 742 | break; | 
|---|
| 743 | case IORING_UNREGISTER_IOWQ_AFF: | 
|---|
| 744 | ret = -EINVAL; | 
|---|
| 745 | if (arg || nr_args) | 
|---|
| 746 | break; | 
|---|
| 747 | ret = io_unregister_iowq_aff(ctx); | 
|---|
| 748 | break; | 
|---|
| 749 | case IORING_REGISTER_IOWQ_MAX_WORKERS: | 
|---|
| 750 | ret = -EINVAL; | 
|---|
| 751 | if (!arg || nr_args != 2) | 
|---|
| 752 | break; | 
|---|
| 753 | ret = io_register_iowq_max_workers(ctx, arg); | 
|---|
| 754 | break; | 
|---|
| 755 | case IORING_REGISTER_RING_FDS: | 
|---|
| 756 | ret = io_ringfd_register(ctx, arg: arg, nr_args); | 
|---|
| 757 | break; | 
|---|
| 758 | case IORING_UNREGISTER_RING_FDS: | 
|---|
| 759 | ret = io_ringfd_unregister(ctx, arg: arg, nr_args); | 
|---|
| 760 | break; | 
|---|
| 761 | case IORING_REGISTER_PBUF_RING: | 
|---|
| 762 | ret = -EINVAL; | 
|---|
| 763 | if (!arg || nr_args != 1) | 
|---|
| 764 | break; | 
|---|
| 765 | ret = io_register_pbuf_ring(ctx, arg); | 
|---|
| 766 | break; | 
|---|
| 767 | case IORING_UNREGISTER_PBUF_RING: | 
|---|
| 768 | ret = -EINVAL; | 
|---|
| 769 | if (!arg || nr_args != 1) | 
|---|
| 770 | break; | 
|---|
| 771 | ret = io_unregister_pbuf_ring(ctx, arg); | 
|---|
| 772 | break; | 
|---|
| 773 | case IORING_REGISTER_SYNC_CANCEL: | 
|---|
| 774 | ret = -EINVAL; | 
|---|
| 775 | if (!arg || nr_args != 1) | 
|---|
| 776 | break; | 
|---|
| 777 | ret = io_sync_cancel(ctx, arg); | 
|---|
| 778 | break; | 
|---|
| 779 | case IORING_REGISTER_FILE_ALLOC_RANGE: | 
|---|
| 780 | ret = -EINVAL; | 
|---|
| 781 | if (!arg || nr_args) | 
|---|
| 782 | break; | 
|---|
| 783 | ret = io_register_file_alloc_range(ctx, arg); | 
|---|
| 784 | break; | 
|---|
| 785 | case IORING_REGISTER_PBUF_STATUS: | 
|---|
| 786 | ret = -EINVAL; | 
|---|
| 787 | if (!arg || nr_args != 1) | 
|---|
| 788 | break; | 
|---|
| 789 | ret = io_register_pbuf_status(ctx, arg); | 
|---|
| 790 | break; | 
|---|
| 791 | case IORING_REGISTER_NAPI: | 
|---|
| 792 | ret = -EINVAL; | 
|---|
| 793 | if (!arg || nr_args != 1) | 
|---|
| 794 | break; | 
|---|
| 795 | ret = io_register_napi(ctx, arg); | 
|---|
| 796 | break; | 
|---|
| 797 | case IORING_UNREGISTER_NAPI: | 
|---|
| 798 | ret = -EINVAL; | 
|---|
| 799 | if (nr_args != 1) | 
|---|
| 800 | break; | 
|---|
| 801 | ret = io_unregister_napi(ctx, arg); | 
|---|
| 802 | break; | 
|---|
| 803 | case IORING_REGISTER_CLOCK: | 
|---|
| 804 | ret = -EINVAL; | 
|---|
| 805 | if (!arg || nr_args) | 
|---|
| 806 | break; | 
|---|
| 807 | ret = io_register_clock(ctx, arg); | 
|---|
| 808 | break; | 
|---|
| 809 | case IORING_REGISTER_CLONE_BUFFERS: | 
|---|
| 810 | ret = -EINVAL; | 
|---|
| 811 | if (!arg || nr_args != 1) | 
|---|
| 812 | break; | 
|---|
| 813 | ret = io_register_clone_buffers(ctx, arg); | 
|---|
| 814 | break; | 
|---|
| 815 | case IORING_REGISTER_ZCRX_IFQ: | 
|---|
| 816 | ret = -EINVAL; | 
|---|
| 817 | if (!arg || nr_args != 1) | 
|---|
| 818 | break; | 
|---|
| 819 | ret = io_register_zcrx_ifq(ctx, arg); | 
|---|
| 820 | break; | 
|---|
| 821 | case IORING_REGISTER_RESIZE_RINGS: | 
|---|
| 822 | ret = -EINVAL; | 
|---|
| 823 | if (!arg || nr_args != 1) | 
|---|
| 824 | break; | 
|---|
| 825 | ret = io_register_resize_rings(ctx, arg); | 
|---|
| 826 | break; | 
|---|
| 827 | case IORING_REGISTER_MEM_REGION: | 
|---|
| 828 | ret = -EINVAL; | 
|---|
| 829 | if (!arg || nr_args != 1) | 
|---|
| 830 | break; | 
|---|
| 831 | ret = io_register_mem_region(ctx, uarg: arg); | 
|---|
| 832 | break; | 
|---|
| 833 | case IORING_REGISTER_QUERY: | 
|---|
| 834 | ret = io_query(ctx, arg, nr_args); | 
|---|
| 835 | break; | 
|---|
| 836 | case IORING_REGISTER_ZCRX_REFILL: | 
|---|
| 837 | ret = io_zcrx_return_bufs(ctx, arg, nr_arg: nr_args); | 
|---|
| 838 | break; | 
|---|
| 839 | default: | 
|---|
| 840 | ret = -EINVAL; | 
|---|
| 841 | break; | 
|---|
| 842 | } | 
|---|
| 843 |  | 
|---|
| 844 | return ret; | 
|---|
| 845 | } | 
|---|
| 846 |  | 
|---|
| 847 | /* | 
|---|
| 848 | * Given an 'fd' value, return the ctx associated with if. If 'registered' is | 
|---|
| 849 | * true, then the registered index is used. Otherwise, the normal fd table. | 
|---|
| 850 | * Caller must call fput() on the returned file, unless it's an ERR_PTR. | 
|---|
| 851 | */ | 
|---|
| 852 | struct file *io_uring_register_get_file(unsigned int fd, bool registered) | 
|---|
| 853 | { | 
|---|
| 854 | struct file *file; | 
|---|
| 855 |  | 
|---|
| 856 | if (registered) { | 
|---|
| 857 | /* | 
|---|
| 858 | * Ring fd has been registered via IORING_REGISTER_RING_FDS, we | 
|---|
| 859 | * need only dereference our task private array to find it. | 
|---|
| 860 | */ | 
|---|
| 861 | struct io_uring_task *tctx = current->io_uring; | 
|---|
| 862 |  | 
|---|
| 863 | if (unlikely(!tctx || fd >= IO_RINGFD_REG_MAX)) | 
|---|
| 864 | return ERR_PTR(error: -EINVAL); | 
|---|
| 865 | fd = array_index_nospec(fd, IO_RINGFD_REG_MAX); | 
|---|
| 866 | file = tctx->registered_rings[fd]; | 
|---|
| 867 | if (file) | 
|---|
| 868 | get_file(f: file); | 
|---|
| 869 | } else { | 
|---|
| 870 | file = fget(fd); | 
|---|
| 871 | } | 
|---|
| 872 |  | 
|---|
| 873 | if (unlikely(!file)) | 
|---|
| 874 | return ERR_PTR(error: -EBADF); | 
|---|
| 875 | if (io_is_uring_fops(file)) | 
|---|
| 876 | return file; | 
|---|
| 877 | fput(file); | 
|---|
| 878 | return ERR_PTR(error: -EOPNOTSUPP); | 
|---|
| 879 | } | 
|---|
| 880 |  | 
|---|
| 881 | static int io_uring_register_send_msg_ring(void __user *arg, unsigned int nr_args) | 
|---|
| 882 | { | 
|---|
| 883 | struct io_uring_sqe sqe; | 
|---|
| 884 |  | 
|---|
| 885 | if (!arg || nr_args != 1) | 
|---|
| 886 | return -EINVAL; | 
|---|
| 887 | if (copy_from_user(to: &sqe, from: arg, n: sizeof(sqe))) | 
|---|
| 888 | return -EFAULT; | 
|---|
| 889 | /* no flags supported */ | 
|---|
| 890 | if (sqe.flags) | 
|---|
| 891 | return -EINVAL; | 
|---|
| 892 | if (sqe.opcode != IORING_OP_MSG_RING) | 
|---|
| 893 | return -EINVAL; | 
|---|
| 894 |  | 
|---|
| 895 | return io_uring_sync_msg_ring(sqe: &sqe); | 
|---|
| 896 | } | 
|---|
| 897 |  | 
|---|
| 898 | /* | 
|---|
| 899 | * "blind" registration opcodes are ones where there's no ring given, and | 
|---|
| 900 | * hence the source fd must be -1. | 
|---|
| 901 | */ | 
|---|
| 902 | static int io_uring_register_blind(unsigned int opcode, void __user *arg, | 
|---|
| 903 | unsigned int nr_args) | 
|---|
| 904 | { | 
|---|
| 905 | switch (opcode) { | 
|---|
| 906 | case IORING_REGISTER_SEND_MSG_RING: | 
|---|
| 907 | return io_uring_register_send_msg_ring(arg, nr_args); | 
|---|
| 908 | case IORING_REGISTER_QUERY: | 
|---|
| 909 | return io_query(NULL, arg, nr_args); | 
|---|
| 910 | } | 
|---|
| 911 | return -EINVAL; | 
|---|
| 912 | } | 
|---|
| 913 |  | 
|---|
| 914 | SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode, | 
|---|
| 915 | void __user *, arg, unsigned int, nr_args) | 
|---|
| 916 | { | 
|---|
| 917 | struct io_ring_ctx *ctx; | 
|---|
| 918 | long ret = -EBADF; | 
|---|
| 919 | struct file *file; | 
|---|
| 920 | bool use_registered_ring; | 
|---|
| 921 |  | 
|---|
| 922 | use_registered_ring = !!(opcode & IORING_REGISTER_USE_REGISTERED_RING); | 
|---|
| 923 | opcode &= ~IORING_REGISTER_USE_REGISTERED_RING; | 
|---|
| 924 |  | 
|---|
| 925 | if (opcode >= IORING_REGISTER_LAST) | 
|---|
| 926 | return -EINVAL; | 
|---|
| 927 |  | 
|---|
| 928 | if (fd == -1) | 
|---|
| 929 | return io_uring_register_blind(opcode, arg, nr_args); | 
|---|
| 930 |  | 
|---|
| 931 | file = io_uring_register_get_file(fd, registered: use_registered_ring); | 
|---|
| 932 | if (IS_ERR(ptr: file)) | 
|---|
| 933 | return PTR_ERR(ptr: file); | 
|---|
| 934 | ctx = file->private_data; | 
|---|
| 935 |  | 
|---|
| 936 | mutex_lock(lock: &ctx->uring_lock); | 
|---|
| 937 | ret = __io_uring_register(ctx, opcode, arg, nr_args); | 
|---|
| 938 |  | 
|---|
| 939 | trace_io_uring_register(ctx, opcode, nr_files: ctx->file_table.data.nr, | 
|---|
| 940 | nr_bufs: ctx->buf_table.nr, ret); | 
|---|
| 941 | mutex_unlock(lock: &ctx->uring_lock); | 
|---|
| 942 |  | 
|---|
| 943 | fput(file); | 
|---|
| 944 | return ret; | 
|---|
| 945 | } | 
|---|
| 946 |  | 
|---|