| 1 | // SPDX-License-Identifier: GPL-2.0-only |
| 2 | /* |
| 3 | * binfmt_misc.c |
| 4 | * |
| 5 | * Copyright (C) 1997 Richard Günther |
| 6 | * |
| 7 | * binfmt_misc detects binaries via a magic or filename extension and invokes |
| 8 | * a specified wrapper. See Documentation/admin-guide/binfmt-misc.rst for more details. |
| 9 | */ |
| 10 | |
| 11 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
| 12 | |
| 13 | #include <linux/kernel.h> |
| 14 | #include <linux/module.h> |
| 15 | #include <linux/init.h> |
| 16 | #include <linux/sched/mm.h> |
| 17 | #include <linux/magic.h> |
| 18 | #include <linux/binfmts.h> |
| 19 | #include <linux/slab.h> |
| 20 | #include <linux/ctype.h> |
| 21 | #include <linux/string_helpers.h> |
| 22 | #include <linux/file.h> |
| 23 | #include <linux/pagemap.h> |
| 24 | #include <linux/namei.h> |
| 25 | #include <linux/mount.h> |
| 26 | #include <linux/fs_context.h> |
| 27 | #include <linux/syscalls.h> |
| 28 | #include <linux/fs.h> |
| 29 | #include <linux/uaccess.h> |
| 30 | |
| 31 | #include "internal.h" |
| 32 | |
| 33 | #ifdef DEBUG |
| 34 | # define USE_DEBUG 1 |
| 35 | #else |
| 36 | # define USE_DEBUG 0 |
| 37 | #endif |
| 38 | |
| 39 | enum { |
| 40 | VERBOSE_STATUS = 1 /* make it zero to save 400 bytes kernel memory */ |
| 41 | }; |
| 42 | |
| 43 | enum {Enabled, Magic}; |
| 44 | #define MISC_FMT_PRESERVE_ARGV0 (1UL << 31) |
| 45 | #define MISC_FMT_OPEN_BINARY (1UL << 30) |
| 46 | #define MISC_FMT_CREDENTIALS (1UL << 29) |
| 47 | #define MISC_FMT_OPEN_FILE (1UL << 28) |
| 48 | |
| 49 | typedef struct { |
| 50 | struct list_head list; |
| 51 | unsigned long flags; /* type, status, etc. */ |
| 52 | int offset; /* offset of magic */ |
| 53 | int size; /* size of magic/mask */ |
| 54 | char *magic; /* magic or filename extension */ |
| 55 | char *mask; /* mask, NULL for exact match */ |
| 56 | const char *interpreter; /* filename of interpreter */ |
| 57 | char *name; |
| 58 | struct dentry *dentry; |
| 59 | struct file *interp_file; |
| 60 | refcount_t users; /* sync removal with load_misc_binary() */ |
| 61 | } Node; |
| 62 | |
| 63 | static struct file_system_type bm_fs_type; |
| 64 | |
| 65 | /* |
| 66 | * Max length of the register string. Determined by: |
| 67 | * - 7 delimiters |
| 68 | * - name: ~50 bytes |
| 69 | * - type: 1 byte |
| 70 | * - offset: 3 bytes (has to be smaller than BINPRM_BUF_SIZE) |
| 71 | * - magic: 128 bytes (512 in escaped form) |
| 72 | * - mask: 128 bytes (512 in escaped form) |
| 73 | * - interp: ~50 bytes |
| 74 | * - flags: 5 bytes |
| 75 | * Round that up a bit, and then back off to hold the internal data |
| 76 | * (like struct Node). |
| 77 | */ |
| 78 | #define MAX_REGISTER_LENGTH 1920 |
| 79 | |
| 80 | /** |
| 81 | * search_binfmt_handler - search for a binary handler for @bprm |
| 82 | * @misc: handle to binfmt_misc instance |
| 83 | * @bprm: binary for which we are looking for a handler |
| 84 | * |
| 85 | * Search for a binary type handler for @bprm in the list of registered binary |
| 86 | * type handlers. |
| 87 | * |
| 88 | * Return: binary type list entry on success, NULL on failure |
| 89 | */ |
| 90 | static Node *search_binfmt_handler(struct binfmt_misc *misc, |
| 91 | struct linux_binprm *bprm) |
| 92 | { |
| 93 | char *p = strrchr(bprm->interp, '.'); |
| 94 | Node *e; |
| 95 | |
| 96 | /* Walk all the registered handlers. */ |
| 97 | list_for_each_entry(e, &misc->entries, list) { |
| 98 | char *s; |
| 99 | int j; |
| 100 | |
| 101 | /* Make sure this one is currently enabled. */ |
| 102 | if (!test_bit(Enabled, &e->flags)) |
| 103 | continue; |
| 104 | |
| 105 | /* Do matching based on extension if applicable. */ |
| 106 | if (!test_bit(Magic, &e->flags)) { |
| 107 | if (p && !strcmp(e->magic, p + 1)) |
| 108 | return e; |
| 109 | continue; |
| 110 | } |
| 111 | |
| 112 | /* Do matching based on magic & mask. */ |
| 113 | s = bprm->buf + e->offset; |
| 114 | if (e->mask) { |
| 115 | for (j = 0; j < e->size; j++) |
| 116 | if ((*s++ ^ e->magic[j]) & e->mask[j]) |
| 117 | break; |
| 118 | } else { |
| 119 | for (j = 0; j < e->size; j++) |
| 120 | if ((*s++ ^ e->magic[j])) |
| 121 | break; |
| 122 | } |
| 123 | if (j == e->size) |
| 124 | return e; |
| 125 | } |
| 126 | |
| 127 | return NULL; |
| 128 | } |
| 129 | |
| 130 | /** |
| 131 | * get_binfmt_handler - try to find a binary type handler |
| 132 | * @misc: handle to binfmt_misc instance |
| 133 | * @bprm: binary for which we are looking for a handler |
| 134 | * |
| 135 | * Try to find a binfmt handler for the binary type. If one is found take a |
| 136 | * reference to protect against removal via bm_{entry,status}_write(). |
| 137 | * |
| 138 | * Return: binary type list entry on success, NULL on failure |
| 139 | */ |
| 140 | static Node *get_binfmt_handler(struct binfmt_misc *misc, |
| 141 | struct linux_binprm *bprm) |
| 142 | { |
| 143 | Node *e; |
| 144 | |
| 145 | read_lock(&misc->entries_lock); |
| 146 | e = search_binfmt_handler(misc, bprm); |
| 147 | if (e) |
| 148 | refcount_inc(r: &e->users); |
| 149 | read_unlock(&misc->entries_lock); |
| 150 | return e; |
| 151 | } |
| 152 | |
| 153 | /** |
| 154 | * put_binfmt_handler - put binary handler node |
| 155 | * @e: node to put |
| 156 | * |
| 157 | * Free node syncing with load_misc_binary() and defer final free to |
| 158 | * load_misc_binary() in case it is using the binary type handler we were |
| 159 | * requested to remove. |
| 160 | */ |
| 161 | static void put_binfmt_handler(Node *e) |
| 162 | { |
| 163 | if (refcount_dec_and_test(r: &e->users)) { |
| 164 | if (e->flags & MISC_FMT_OPEN_FILE) |
| 165 | filp_close(e->interp_file, NULL); |
| 166 | kfree(objp: e); |
| 167 | } |
| 168 | } |
| 169 | |
| 170 | /** |
| 171 | * load_binfmt_misc - load the binfmt_misc of the caller's user namespace |
| 172 | * |
| 173 | * To be called in load_misc_binary() to load the relevant struct binfmt_misc. |
| 174 | * If a user namespace doesn't have its own binfmt_misc mount it can make use |
| 175 | * of its ancestor's binfmt_misc handlers. This mimicks the behavior of |
| 176 | * pre-namespaced binfmt_misc where all registered binfmt_misc handlers where |
| 177 | * available to all user and user namespaces on the system. |
| 178 | * |
| 179 | * Return: the binfmt_misc instance of the caller's user namespace |
| 180 | */ |
| 181 | static struct binfmt_misc *load_binfmt_misc(void) |
| 182 | { |
| 183 | const struct user_namespace *user_ns; |
| 184 | struct binfmt_misc *misc; |
| 185 | |
| 186 | user_ns = current_user_ns(); |
| 187 | while (user_ns) { |
| 188 | /* Pairs with smp_store_release() in bm_fill_super(). */ |
| 189 | misc = smp_load_acquire(&user_ns->binfmt_misc); |
| 190 | if (misc) |
| 191 | return misc; |
| 192 | |
| 193 | user_ns = user_ns->parent; |
| 194 | } |
| 195 | |
| 196 | return &init_binfmt_misc; |
| 197 | } |
| 198 | |
| 199 | /* |
| 200 | * the loader itself |
| 201 | */ |
| 202 | static int load_misc_binary(struct linux_binprm *bprm) |
| 203 | { |
| 204 | Node *fmt; |
| 205 | struct file *interp_file = NULL; |
| 206 | int retval = -ENOEXEC; |
| 207 | struct binfmt_misc *misc; |
| 208 | |
| 209 | misc = load_binfmt_misc(); |
| 210 | if (!misc->enabled) |
| 211 | return retval; |
| 212 | |
| 213 | fmt = get_binfmt_handler(misc, bprm); |
| 214 | if (!fmt) |
| 215 | return retval; |
| 216 | |
| 217 | /* Need to be able to load the file after exec */ |
| 218 | retval = -ENOENT; |
| 219 | if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE) |
| 220 | goto ret; |
| 221 | |
| 222 | if (fmt->flags & MISC_FMT_PRESERVE_ARGV0) { |
| 223 | bprm->interp_flags |= BINPRM_FLAGS_PRESERVE_ARGV0; |
| 224 | } else { |
| 225 | retval = remove_arg_zero(bprm); |
| 226 | if (retval) |
| 227 | goto ret; |
| 228 | } |
| 229 | |
| 230 | if (fmt->flags & MISC_FMT_OPEN_BINARY) |
| 231 | bprm->have_execfd = 1; |
| 232 | |
| 233 | /* make argv[1] be the path to the binary */ |
| 234 | retval = copy_string_kernel(arg: bprm->interp, bprm); |
| 235 | if (retval < 0) |
| 236 | goto ret; |
| 237 | bprm->argc++; |
| 238 | |
| 239 | /* add the interp as argv[0] */ |
| 240 | retval = copy_string_kernel(arg: fmt->interpreter, bprm); |
| 241 | if (retval < 0) |
| 242 | goto ret; |
| 243 | bprm->argc++; |
| 244 | |
| 245 | /* Update interp in case binfmt_script needs it. */ |
| 246 | retval = bprm_change_interp(interp: fmt->interpreter, bprm); |
| 247 | if (retval < 0) |
| 248 | goto ret; |
| 249 | |
| 250 | if (fmt->flags & MISC_FMT_OPEN_FILE) { |
| 251 | interp_file = file_clone_open(file: fmt->interp_file); |
| 252 | if (!IS_ERR(ptr: interp_file)) |
| 253 | deny_write_access(file: interp_file); |
| 254 | } else { |
| 255 | interp_file = open_exec(fmt->interpreter); |
| 256 | } |
| 257 | retval = PTR_ERR(ptr: interp_file); |
| 258 | if (IS_ERR(ptr: interp_file)) |
| 259 | goto ret; |
| 260 | |
| 261 | bprm->interpreter = interp_file; |
| 262 | if (fmt->flags & MISC_FMT_CREDENTIALS) |
| 263 | bprm->execfd_creds = 1; |
| 264 | |
| 265 | retval = 0; |
| 266 | ret: |
| 267 | |
| 268 | /* |
| 269 | * If we actually put the node here all concurrent calls to |
| 270 | * load_misc_binary() will have finished. We also know |
| 271 | * that for the refcount to be zero someone must have concurently |
| 272 | * removed the binary type handler from the list and it's our job to |
| 273 | * free it. |
| 274 | */ |
| 275 | put_binfmt_handler(e: fmt); |
| 276 | |
| 277 | return retval; |
| 278 | } |
| 279 | |
| 280 | /* Command parsers */ |
| 281 | |
| 282 | /* |
| 283 | * parses and copies one argument enclosed in del from *sp to *dp, |
| 284 | * recognising the \x special. |
| 285 | * returns pointer to the copied argument or NULL in case of an |
| 286 | * error (and sets err) or null argument length. |
| 287 | */ |
| 288 | static char *scanarg(char *s, char del) |
| 289 | { |
| 290 | char c; |
| 291 | |
| 292 | while ((c = *s++) != del) { |
| 293 | if (c == '\\' && *s == 'x') { |
| 294 | s++; |
| 295 | if (!isxdigit(*s++)) |
| 296 | return NULL; |
| 297 | if (!isxdigit(*s++)) |
| 298 | return NULL; |
| 299 | } |
| 300 | } |
| 301 | s[-1] ='\0'; |
| 302 | return s; |
| 303 | } |
| 304 | |
| 305 | static char *check_special_flags(char *sfs, Node *e) |
| 306 | { |
| 307 | char *p = sfs; |
| 308 | int cont = 1; |
| 309 | |
| 310 | /* special flags */ |
| 311 | while (cont) { |
| 312 | switch (*p) { |
| 313 | case 'P': |
| 314 | pr_debug("register: flag: P (preserve argv0)\n" ); |
| 315 | p++; |
| 316 | e->flags |= MISC_FMT_PRESERVE_ARGV0; |
| 317 | break; |
| 318 | case 'O': |
| 319 | pr_debug("register: flag: O (open binary)\n" ); |
| 320 | p++; |
| 321 | e->flags |= MISC_FMT_OPEN_BINARY; |
| 322 | break; |
| 323 | case 'C': |
| 324 | pr_debug("register: flag: C (preserve creds)\n" ); |
| 325 | p++; |
| 326 | /* this flags also implies the |
| 327 | open-binary flag */ |
| 328 | e->flags |= (MISC_FMT_CREDENTIALS | |
| 329 | MISC_FMT_OPEN_BINARY); |
| 330 | break; |
| 331 | case 'F': |
| 332 | pr_debug("register: flag: F: open interpreter file now\n" ); |
| 333 | p++; |
| 334 | e->flags |= MISC_FMT_OPEN_FILE; |
| 335 | break; |
| 336 | default: |
| 337 | cont = 0; |
| 338 | } |
| 339 | } |
| 340 | |
| 341 | return p; |
| 342 | } |
| 343 | |
| 344 | /* |
| 345 | * This registers a new binary format, it recognises the syntax |
| 346 | * ':name:type:offset:magic:mask:interpreter:flags' |
| 347 | * where the ':' is the IFS, that can be chosen with the first char |
| 348 | */ |
| 349 | static Node *create_entry(const char __user *buffer, size_t count) |
| 350 | { |
| 351 | Node *e; |
| 352 | int memsize, err; |
| 353 | char *buf, *p; |
| 354 | char del; |
| 355 | |
| 356 | pr_debug("register: received %zu bytes\n" , count); |
| 357 | |
| 358 | /* some sanity checks */ |
| 359 | err = -EINVAL; |
| 360 | if ((count < 11) || (count > MAX_REGISTER_LENGTH)) |
| 361 | goto out; |
| 362 | |
| 363 | err = -ENOMEM; |
| 364 | memsize = sizeof(Node) + count + 8; |
| 365 | e = kmalloc(memsize, GFP_KERNEL_ACCOUNT); |
| 366 | if (!e) |
| 367 | goto out; |
| 368 | |
| 369 | p = buf = (char *)e + sizeof(Node); |
| 370 | |
| 371 | memset(s: e, c: 0, n: sizeof(Node)); |
| 372 | if (copy_from_user(to: buf, from: buffer, n: count)) |
| 373 | goto efault; |
| 374 | |
| 375 | del = *p++; /* delimeter */ |
| 376 | |
| 377 | pr_debug("register: delim: %#x {%c}\n" , del, del); |
| 378 | |
| 379 | /* Pad the buffer with the delim to simplify parsing below. */ |
| 380 | memset(s: buf + count, c: del, n: 8); |
| 381 | |
| 382 | /* Parse the 'name' field. */ |
| 383 | e->name = p; |
| 384 | p = strchr(p, del); |
| 385 | if (!p) |
| 386 | goto einval; |
| 387 | *p++ = '\0'; |
| 388 | if (!e->name[0] || |
| 389 | !strcmp(e->name, "." ) || |
| 390 | !strcmp(e->name, ".." ) || |
| 391 | strchr(e->name, '/')) |
| 392 | goto einval; |
| 393 | |
| 394 | pr_debug("register: name: {%s}\n" , e->name); |
| 395 | |
| 396 | /* Parse the 'type' field. */ |
| 397 | switch (*p++) { |
| 398 | case 'E': |
| 399 | pr_debug("register: type: E (extension)\n" ); |
| 400 | e->flags = 1 << Enabled; |
| 401 | break; |
| 402 | case 'M': |
| 403 | pr_debug("register: type: M (magic)\n" ); |
| 404 | e->flags = (1 << Enabled) | (1 << Magic); |
| 405 | break; |
| 406 | default: |
| 407 | goto einval; |
| 408 | } |
| 409 | if (*p++ != del) |
| 410 | goto einval; |
| 411 | |
| 412 | if (test_bit(Magic, &e->flags)) { |
| 413 | /* Handle the 'M' (magic) format. */ |
| 414 | char *s; |
| 415 | |
| 416 | /* Parse the 'offset' field. */ |
| 417 | s = strchr(p, del); |
| 418 | if (!s) |
| 419 | goto einval; |
| 420 | *s = '\0'; |
| 421 | if (p != s) { |
| 422 | int r = kstrtoint(s: p, base: 10, res: &e->offset); |
| 423 | if (r != 0 || e->offset < 0) |
| 424 | goto einval; |
| 425 | } |
| 426 | p = s; |
| 427 | if (*p++) |
| 428 | goto einval; |
| 429 | pr_debug("register: offset: %#x\n" , e->offset); |
| 430 | |
| 431 | /* Parse the 'magic' field. */ |
| 432 | e->magic = p; |
| 433 | p = scanarg(s: p, del); |
| 434 | if (!p) |
| 435 | goto einval; |
| 436 | if (!e->magic[0]) |
| 437 | goto einval; |
| 438 | if (USE_DEBUG) |
| 439 | print_hex_dump_bytes( |
| 440 | KBUILD_MODNAME ": register: magic[raw]: " , |
| 441 | DUMP_PREFIX_NONE, e->magic, p - e->magic); |
| 442 | |
| 443 | /* Parse the 'mask' field. */ |
| 444 | e->mask = p; |
| 445 | p = scanarg(s: p, del); |
| 446 | if (!p) |
| 447 | goto einval; |
| 448 | if (!e->mask[0]) { |
| 449 | e->mask = NULL; |
| 450 | pr_debug("register: mask[raw]: none\n" ); |
| 451 | } else if (USE_DEBUG) |
| 452 | print_hex_dump_bytes( |
| 453 | KBUILD_MODNAME ": register: mask[raw]: " , |
| 454 | DUMP_PREFIX_NONE, e->mask, p - e->mask); |
| 455 | |
| 456 | /* |
| 457 | * Decode the magic & mask fields. |
| 458 | * Note: while we might have accepted embedded NUL bytes from |
| 459 | * above, the unescape helpers here will stop at the first one |
| 460 | * it encounters. |
| 461 | */ |
| 462 | e->size = string_unescape_inplace(buf: e->magic, UNESCAPE_HEX); |
| 463 | if (e->mask && |
| 464 | string_unescape_inplace(buf: e->mask, UNESCAPE_HEX) != e->size) |
| 465 | goto einval; |
| 466 | if (e->size > BINPRM_BUF_SIZE || |
| 467 | BINPRM_BUF_SIZE - e->size < e->offset) |
| 468 | goto einval; |
| 469 | pr_debug("register: magic/mask length: %i\n" , e->size); |
| 470 | if (USE_DEBUG) { |
| 471 | print_hex_dump_bytes( |
| 472 | KBUILD_MODNAME ": register: magic[decoded]: " , |
| 473 | DUMP_PREFIX_NONE, e->magic, e->size); |
| 474 | |
| 475 | if (e->mask) { |
| 476 | int i; |
| 477 | char *masked = kmalloc(e->size, GFP_KERNEL_ACCOUNT); |
| 478 | |
| 479 | print_hex_dump_bytes( |
| 480 | KBUILD_MODNAME ": register: mask[decoded]: " , |
| 481 | DUMP_PREFIX_NONE, e->mask, e->size); |
| 482 | |
| 483 | if (masked) { |
| 484 | for (i = 0; i < e->size; ++i) |
| 485 | masked[i] = e->magic[i] & e->mask[i]; |
| 486 | print_hex_dump_bytes( |
| 487 | KBUILD_MODNAME ": register: magic[masked]: " , |
| 488 | DUMP_PREFIX_NONE, masked, e->size); |
| 489 | |
| 490 | kfree(objp: masked); |
| 491 | } |
| 492 | } |
| 493 | } |
| 494 | } else { |
| 495 | /* Handle the 'E' (extension) format. */ |
| 496 | |
| 497 | /* Skip the 'offset' field. */ |
| 498 | p = strchr(p, del); |
| 499 | if (!p) |
| 500 | goto einval; |
| 501 | *p++ = '\0'; |
| 502 | |
| 503 | /* Parse the 'magic' field. */ |
| 504 | e->magic = p; |
| 505 | p = strchr(p, del); |
| 506 | if (!p) |
| 507 | goto einval; |
| 508 | *p++ = '\0'; |
| 509 | if (!e->magic[0] || strchr(e->magic, '/')) |
| 510 | goto einval; |
| 511 | pr_debug("register: extension: {%s}\n" , e->magic); |
| 512 | |
| 513 | /* Skip the 'mask' field. */ |
| 514 | p = strchr(p, del); |
| 515 | if (!p) |
| 516 | goto einval; |
| 517 | *p++ = '\0'; |
| 518 | } |
| 519 | |
| 520 | /* Parse the 'interpreter' field. */ |
| 521 | e->interpreter = p; |
| 522 | p = strchr(p, del); |
| 523 | if (!p) |
| 524 | goto einval; |
| 525 | *p++ = '\0'; |
| 526 | if (!e->interpreter[0]) |
| 527 | goto einval; |
| 528 | pr_debug("register: interpreter: {%s}\n" , e->interpreter); |
| 529 | |
| 530 | /* Parse the 'flags' field. */ |
| 531 | p = check_special_flags(sfs: p, e); |
| 532 | if (*p == '\n') |
| 533 | p++; |
| 534 | if (p != buf + count) |
| 535 | goto einval; |
| 536 | |
| 537 | return e; |
| 538 | |
| 539 | out: |
| 540 | return ERR_PTR(error: err); |
| 541 | |
| 542 | efault: |
| 543 | kfree(objp: e); |
| 544 | return ERR_PTR(error: -EFAULT); |
| 545 | einval: |
| 546 | kfree(objp: e); |
| 547 | return ERR_PTR(error: -EINVAL); |
| 548 | } |
| 549 | |
| 550 | /* |
| 551 | * Set status of entry/binfmt_misc: |
| 552 | * '1' enables, '0' disables and '-1' clears entry/binfmt_misc |
| 553 | */ |
| 554 | static int parse_command(const char __user *buffer, size_t count) |
| 555 | { |
| 556 | char s[4]; |
| 557 | |
| 558 | if (count > 3) |
| 559 | return -EINVAL; |
| 560 | if (copy_from_user(to: s, from: buffer, n: count)) |
| 561 | return -EFAULT; |
| 562 | if (!count) |
| 563 | return 0; |
| 564 | if (s[count - 1] == '\n') |
| 565 | count--; |
| 566 | if (count == 1 && s[0] == '0') |
| 567 | return 1; |
| 568 | if (count == 1 && s[0] == '1') |
| 569 | return 2; |
| 570 | if (count == 2 && s[0] == '-' && s[1] == '1') |
| 571 | return 3; |
| 572 | return -EINVAL; |
| 573 | } |
| 574 | |
| 575 | /* generic stuff */ |
| 576 | |
| 577 | static void entry_status(Node *e, char *page) |
| 578 | { |
| 579 | char *dp = page; |
| 580 | const char *status = "disabled" ; |
| 581 | |
| 582 | if (test_bit(Enabled, &e->flags)) |
| 583 | status = "enabled" ; |
| 584 | |
| 585 | if (!VERBOSE_STATUS) { |
| 586 | sprintf(buf: page, fmt: "%s\n" , status); |
| 587 | return; |
| 588 | } |
| 589 | |
| 590 | dp += sprintf(buf: dp, fmt: "%s\ninterpreter %s\n" , status, e->interpreter); |
| 591 | |
| 592 | /* print the special flags */ |
| 593 | dp += sprintf(buf: dp, fmt: "flags: " ); |
| 594 | if (e->flags & MISC_FMT_PRESERVE_ARGV0) |
| 595 | *dp++ = 'P'; |
| 596 | if (e->flags & MISC_FMT_OPEN_BINARY) |
| 597 | *dp++ = 'O'; |
| 598 | if (e->flags & MISC_FMT_CREDENTIALS) |
| 599 | *dp++ = 'C'; |
| 600 | if (e->flags & MISC_FMT_OPEN_FILE) |
| 601 | *dp++ = 'F'; |
| 602 | *dp++ = '\n'; |
| 603 | |
| 604 | if (!test_bit(Magic, &e->flags)) { |
| 605 | sprintf(buf: dp, fmt: "extension .%s\n" , e->magic); |
| 606 | } else { |
| 607 | dp += sprintf(buf: dp, fmt: "offset %i\nmagic " , e->offset); |
| 608 | dp = bin2hex(dst: dp, src: e->magic, count: e->size); |
| 609 | if (e->mask) { |
| 610 | dp += sprintf(buf: dp, fmt: "\nmask " ); |
| 611 | dp = bin2hex(dst: dp, src: e->mask, count: e->size); |
| 612 | } |
| 613 | *dp++ = '\n'; |
| 614 | *dp = '\0'; |
| 615 | } |
| 616 | } |
| 617 | |
| 618 | static struct inode *bm_get_inode(struct super_block *sb, int mode) |
| 619 | { |
| 620 | struct inode *inode = new_inode(sb); |
| 621 | |
| 622 | if (inode) { |
| 623 | inode->i_ino = get_next_ino(); |
| 624 | inode->i_mode = mode; |
| 625 | simple_inode_init_ts(inode); |
| 626 | } |
| 627 | return inode; |
| 628 | } |
| 629 | |
| 630 | /** |
| 631 | * i_binfmt_misc - retrieve struct binfmt_misc from a binfmt_misc inode |
| 632 | * @inode: inode of the relevant binfmt_misc instance |
| 633 | * |
| 634 | * This helper retrieves struct binfmt_misc from a binfmt_misc inode. This can |
| 635 | * be done without any memory barriers because we are guaranteed that |
| 636 | * user_ns->binfmt_misc is fully initialized. It was fully initialized when the |
| 637 | * binfmt_misc mount was first created. |
| 638 | * |
| 639 | * Return: struct binfmt_misc of the relevant binfmt_misc instance |
| 640 | */ |
| 641 | static struct binfmt_misc *i_binfmt_misc(struct inode *inode) |
| 642 | { |
| 643 | return inode->i_sb->s_user_ns->binfmt_misc; |
| 644 | } |
| 645 | |
| 646 | /** |
| 647 | * bm_evict_inode - cleanup data associated with @inode |
| 648 | * @inode: inode to which the data is attached |
| 649 | * |
| 650 | * Cleanup the binary type handler data associated with @inode if a binary type |
| 651 | * entry is removed or the filesystem is unmounted and the super block is |
| 652 | * shutdown. |
| 653 | * |
| 654 | * If the ->evict call was not caused by a super block shutdown but by a write |
| 655 | * to remove the entry or all entries via bm_{entry,status}_write() the entry |
| 656 | * will have already been removed from the list. We keep the list_empty() check |
| 657 | * to make that explicit. |
| 658 | */ |
| 659 | static void bm_evict_inode(struct inode *inode) |
| 660 | { |
| 661 | Node *e = inode->i_private; |
| 662 | |
| 663 | clear_inode(inode); |
| 664 | |
| 665 | if (e) { |
| 666 | struct binfmt_misc *misc; |
| 667 | |
| 668 | misc = i_binfmt_misc(inode); |
| 669 | write_lock(&misc->entries_lock); |
| 670 | if (!list_empty(head: &e->list)) |
| 671 | list_del_init(entry: &e->list); |
| 672 | write_unlock(&misc->entries_lock); |
| 673 | put_binfmt_handler(e); |
| 674 | } |
| 675 | } |
| 676 | |
| 677 | /** |
| 678 | * remove_binfmt_handler - remove a binary type handler |
| 679 | * @misc: handle to binfmt_misc instance |
| 680 | * @e: binary type handler to remove |
| 681 | * |
| 682 | * Remove a binary type handler from the list of binary type handlers and |
| 683 | * remove its associated dentry. This is called from |
| 684 | * binfmt_{entry,status}_write(). In the future, we might want to think about |
| 685 | * adding a proper ->unlink() method to binfmt_misc instead of forcing caller's |
| 686 | * to use writes to files in order to delete binary type handlers. But it has |
| 687 | * worked for so long that it's not a pressing issue. |
| 688 | */ |
| 689 | static void remove_binfmt_handler(struct binfmt_misc *misc, Node *e) |
| 690 | { |
| 691 | write_lock(&misc->entries_lock); |
| 692 | list_del_init(entry: &e->list); |
| 693 | write_unlock(&misc->entries_lock); |
| 694 | locked_recursive_removal(e->dentry, NULL); |
| 695 | } |
| 696 | |
| 697 | /* /<entry> */ |
| 698 | |
| 699 | static ssize_t |
| 700 | bm_entry_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) |
| 701 | { |
| 702 | Node *e = file_inode(f: file)->i_private; |
| 703 | ssize_t res; |
| 704 | char *page; |
| 705 | |
| 706 | page = (char *) __get_free_page(GFP_KERNEL); |
| 707 | if (!page) |
| 708 | return -ENOMEM; |
| 709 | |
| 710 | entry_status(e, page); |
| 711 | |
| 712 | res = simple_read_from_buffer(to: buf, count: nbytes, ppos, from: page, available: strlen(page)); |
| 713 | |
| 714 | free_page((unsigned long) page); |
| 715 | return res; |
| 716 | } |
| 717 | |
| 718 | static ssize_t bm_entry_write(struct file *file, const char __user *buffer, |
| 719 | size_t count, loff_t *ppos) |
| 720 | { |
| 721 | struct inode *inode = file_inode(f: file); |
| 722 | Node *e = inode->i_private; |
| 723 | int res = parse_command(buffer, count); |
| 724 | |
| 725 | switch (res) { |
| 726 | case 1: |
| 727 | /* Disable this handler. */ |
| 728 | clear_bit(nr: Enabled, addr: &e->flags); |
| 729 | break; |
| 730 | case 2: |
| 731 | /* Enable this handler. */ |
| 732 | set_bit(nr: Enabled, addr: &e->flags); |
| 733 | break; |
| 734 | case 3: |
| 735 | /* Delete this handler. */ |
| 736 | inode = d_inode(dentry: inode->i_sb->s_root); |
| 737 | inode_lock_nested(inode, subclass: I_MUTEX_PARENT); |
| 738 | |
| 739 | /* |
| 740 | * In order to add new element or remove elements from the list |
| 741 | * via bm_{entry,register,status}_write() inode_lock() on the |
| 742 | * root inode must be held. |
| 743 | * The lock is exclusive ensuring that the list can't be |
| 744 | * modified. Only load_misc_binary() can access but does so |
| 745 | * read-only. So we only need to take the write lock when we |
| 746 | * actually remove the entry from the list. |
| 747 | */ |
| 748 | if (!list_empty(head: &e->list)) |
| 749 | remove_binfmt_handler(misc: i_binfmt_misc(inode), e); |
| 750 | |
| 751 | inode_unlock(inode); |
| 752 | break; |
| 753 | default: |
| 754 | return res; |
| 755 | } |
| 756 | |
| 757 | return count; |
| 758 | } |
| 759 | |
| 760 | static const struct file_operations bm_entry_operations = { |
| 761 | .read = bm_entry_read, |
| 762 | .write = bm_entry_write, |
| 763 | .llseek = default_llseek, |
| 764 | }; |
| 765 | |
| 766 | /* /register */ |
| 767 | |
| 768 | static ssize_t bm_register_write(struct file *file, const char __user *buffer, |
| 769 | size_t count, loff_t *ppos) |
| 770 | { |
| 771 | Node *e; |
| 772 | struct inode *inode; |
| 773 | struct super_block *sb = file_inode(f: file)->i_sb; |
| 774 | struct dentry *root = sb->s_root, *dentry; |
| 775 | struct binfmt_misc *misc; |
| 776 | int err = 0; |
| 777 | struct file *f = NULL; |
| 778 | |
| 779 | e = create_entry(buffer, count); |
| 780 | |
| 781 | if (IS_ERR(ptr: e)) |
| 782 | return PTR_ERR(ptr: e); |
| 783 | |
| 784 | if (e->flags & MISC_FMT_OPEN_FILE) { |
| 785 | const struct cred *old_cred; |
| 786 | |
| 787 | /* |
| 788 | * Now that we support unprivileged binfmt_misc mounts make |
| 789 | * sure we use the credentials that the register @file was |
| 790 | * opened with to also open the interpreter. Before that this |
| 791 | * didn't matter much as only a privileged process could open |
| 792 | * the register file. |
| 793 | */ |
| 794 | old_cred = override_creds(override_cred: file->f_cred); |
| 795 | f = open_exec(e->interpreter); |
| 796 | revert_creds(revert_cred: old_cred); |
| 797 | if (IS_ERR(ptr: f)) { |
| 798 | pr_notice("register: failed to install interpreter file %s\n" , |
| 799 | e->interpreter); |
| 800 | kfree(objp: e); |
| 801 | return PTR_ERR(ptr: f); |
| 802 | } |
| 803 | e->interp_file = f; |
| 804 | } |
| 805 | |
| 806 | inode_lock(inode: d_inode(dentry: root)); |
| 807 | dentry = lookup_noperm(&QSTR(e->name), root); |
| 808 | err = PTR_ERR(ptr: dentry); |
| 809 | if (IS_ERR(ptr: dentry)) |
| 810 | goto out; |
| 811 | |
| 812 | err = -EEXIST; |
| 813 | if (d_really_is_positive(dentry)) |
| 814 | goto out2; |
| 815 | |
| 816 | inode = bm_get_inode(sb, S_IFREG | 0644); |
| 817 | |
| 818 | err = -ENOMEM; |
| 819 | if (!inode) |
| 820 | goto out2; |
| 821 | |
| 822 | refcount_set(r: &e->users, n: 1); |
| 823 | e->dentry = dget(dentry); |
| 824 | inode->i_private = e; |
| 825 | inode->i_fop = &bm_entry_operations; |
| 826 | |
| 827 | d_instantiate(dentry, inode); |
| 828 | misc = i_binfmt_misc(inode); |
| 829 | write_lock(&misc->entries_lock); |
| 830 | list_add(new: &e->list, head: &misc->entries); |
| 831 | write_unlock(&misc->entries_lock); |
| 832 | |
| 833 | err = 0; |
| 834 | out2: |
| 835 | dput(dentry); |
| 836 | out: |
| 837 | inode_unlock(inode: d_inode(dentry: root)); |
| 838 | |
| 839 | if (err) { |
| 840 | if (f) |
| 841 | filp_close(f, NULL); |
| 842 | kfree(objp: e); |
| 843 | return err; |
| 844 | } |
| 845 | return count; |
| 846 | } |
| 847 | |
| 848 | static const struct file_operations bm_register_operations = { |
| 849 | .write = bm_register_write, |
| 850 | .llseek = noop_llseek, |
| 851 | }; |
| 852 | |
| 853 | /* /status */ |
| 854 | |
| 855 | static ssize_t |
| 856 | bm_status_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) |
| 857 | { |
| 858 | struct binfmt_misc *misc; |
| 859 | char *s; |
| 860 | |
| 861 | misc = i_binfmt_misc(inode: file_inode(f: file)); |
| 862 | s = misc->enabled ? "enabled\n" : "disabled\n" ; |
| 863 | return simple_read_from_buffer(to: buf, count: nbytes, ppos, from: s, available: strlen(s)); |
| 864 | } |
| 865 | |
| 866 | static ssize_t bm_status_write(struct file *file, const char __user *buffer, |
| 867 | size_t count, loff_t *ppos) |
| 868 | { |
| 869 | struct binfmt_misc *misc; |
| 870 | int res = parse_command(buffer, count); |
| 871 | Node *e, *next; |
| 872 | struct inode *inode; |
| 873 | |
| 874 | misc = i_binfmt_misc(inode: file_inode(f: file)); |
| 875 | switch (res) { |
| 876 | case 1: |
| 877 | /* Disable all handlers. */ |
| 878 | misc->enabled = false; |
| 879 | break; |
| 880 | case 2: |
| 881 | /* Enable all handlers. */ |
| 882 | misc->enabled = true; |
| 883 | break; |
| 884 | case 3: |
| 885 | /* Delete all handlers. */ |
| 886 | inode = d_inode(dentry: file_inode(f: file)->i_sb->s_root); |
| 887 | inode_lock_nested(inode, subclass: I_MUTEX_PARENT); |
| 888 | |
| 889 | /* |
| 890 | * In order to add new element or remove elements from the list |
| 891 | * via bm_{entry,register,status}_write() inode_lock() on the |
| 892 | * root inode must be held. |
| 893 | * The lock is exclusive ensuring that the list can't be |
| 894 | * modified. Only load_misc_binary() can access but does so |
| 895 | * read-only. So we only need to take the write lock when we |
| 896 | * actually remove the entry from the list. |
| 897 | */ |
| 898 | list_for_each_entry_safe(e, next, &misc->entries, list) |
| 899 | remove_binfmt_handler(misc, e); |
| 900 | |
| 901 | inode_unlock(inode); |
| 902 | break; |
| 903 | default: |
| 904 | return res; |
| 905 | } |
| 906 | |
| 907 | return count; |
| 908 | } |
| 909 | |
| 910 | static const struct file_operations bm_status_operations = { |
| 911 | .read = bm_status_read, |
| 912 | .write = bm_status_write, |
| 913 | .llseek = default_llseek, |
| 914 | }; |
| 915 | |
| 916 | /* Superblock handling */ |
| 917 | |
| 918 | static void bm_put_super(struct super_block *sb) |
| 919 | { |
| 920 | struct user_namespace *user_ns = sb->s_fs_info; |
| 921 | |
| 922 | sb->s_fs_info = NULL; |
| 923 | put_user_ns(ns: user_ns); |
| 924 | } |
| 925 | |
| 926 | static const struct super_operations s_ops = { |
| 927 | .statfs = simple_statfs, |
| 928 | .evict_inode = bm_evict_inode, |
| 929 | .put_super = bm_put_super, |
| 930 | }; |
| 931 | |
| 932 | static int bm_fill_super(struct super_block *sb, struct fs_context *fc) |
| 933 | { |
| 934 | int err; |
| 935 | struct user_namespace *user_ns = sb->s_user_ns; |
| 936 | struct binfmt_misc *misc; |
| 937 | static const struct tree_descr bm_files[] = { |
| 938 | [2] = {"status" , &bm_status_operations, S_IWUSR|S_IRUGO}, |
| 939 | [3] = {.name: "register" , .ops: &bm_register_operations, S_IWUSR}, |
| 940 | /* last one */ {"" } |
| 941 | }; |
| 942 | |
| 943 | if (WARN_ON(user_ns != current_user_ns())) |
| 944 | return -EINVAL; |
| 945 | |
| 946 | /* |
| 947 | * Lazily allocate a new binfmt_misc instance for this namespace, i.e. |
| 948 | * do it here during the first mount of binfmt_misc. We don't need to |
| 949 | * waste memory for every user namespace allocation. It's likely much |
| 950 | * more common to not mount a separate binfmt_misc instance than it is |
| 951 | * to mount one. |
| 952 | * |
| 953 | * While multiple superblocks can exist they are keyed by userns in |
| 954 | * s_fs_info for binfmt_misc. Hence, the vfs guarantees that |
| 955 | * bm_fill_super() is called exactly once whenever a binfmt_misc |
| 956 | * superblock for a userns is created. This in turn lets us conclude |
| 957 | * that when a binfmt_misc superblock is created for the first time for |
| 958 | * a userns there's no one racing us. Therefore we don't need any |
| 959 | * barriers when we dereference binfmt_misc. |
| 960 | */ |
| 961 | misc = user_ns->binfmt_misc; |
| 962 | if (!misc) { |
| 963 | /* |
| 964 | * If it turns out that most user namespaces actually want to |
| 965 | * register their own binary type handler and therefore all |
| 966 | * create their own separate binfmt_misc mounts we should |
| 967 | * consider turning this into a kmem cache. |
| 968 | */ |
| 969 | misc = kzalloc(sizeof(struct binfmt_misc), GFP_KERNEL); |
| 970 | if (!misc) |
| 971 | return -ENOMEM; |
| 972 | |
| 973 | INIT_LIST_HEAD(list: &misc->entries); |
| 974 | rwlock_init(&misc->entries_lock); |
| 975 | |
| 976 | /* Pairs with smp_load_acquire() in load_binfmt_misc(). */ |
| 977 | smp_store_release(&user_ns->binfmt_misc, misc); |
| 978 | } |
| 979 | |
| 980 | /* |
| 981 | * When the binfmt_misc superblock for this userns is shutdown |
| 982 | * ->enabled might have been set to false and we don't reinitialize |
| 983 | * ->enabled again in put_super() as someone might already be mounting |
| 984 | * binfmt_misc again. It also would be pointless since by the time |
| 985 | * ->put_super() is called we know that the binary type list for this |
| 986 | * bintfmt_misc mount is empty making load_misc_binary() return |
| 987 | * -ENOEXEC independent of whether ->enabled is true. Instead, if |
| 988 | * someone mounts binfmt_misc for the first time or again we simply |
| 989 | * reset ->enabled to true. |
| 990 | */ |
| 991 | misc->enabled = true; |
| 992 | |
| 993 | err = simple_fill_super(sb, BINFMTFS_MAGIC, bm_files); |
| 994 | if (!err) |
| 995 | sb->s_op = &s_ops; |
| 996 | return err; |
| 997 | } |
| 998 | |
| 999 | static void bm_free(struct fs_context *fc) |
| 1000 | { |
| 1001 | if (fc->s_fs_info) |
| 1002 | put_user_ns(ns: fc->s_fs_info); |
| 1003 | } |
| 1004 | |
| 1005 | static int bm_get_tree(struct fs_context *fc) |
| 1006 | { |
| 1007 | return get_tree_keyed(fc, fill_super: bm_fill_super, key: get_user_ns(ns: fc->user_ns)); |
| 1008 | } |
| 1009 | |
| 1010 | static const struct fs_context_operations bm_context_ops = { |
| 1011 | .free = bm_free, |
| 1012 | .get_tree = bm_get_tree, |
| 1013 | }; |
| 1014 | |
| 1015 | static int bm_init_fs_context(struct fs_context *fc) |
| 1016 | { |
| 1017 | fc->ops = &bm_context_ops; |
| 1018 | return 0; |
| 1019 | } |
| 1020 | |
| 1021 | static struct linux_binfmt misc_format = { |
| 1022 | .module = THIS_MODULE, |
| 1023 | .load_binary = load_misc_binary, |
| 1024 | }; |
| 1025 | |
| 1026 | static struct file_system_type bm_fs_type = { |
| 1027 | .owner = THIS_MODULE, |
| 1028 | .name = "binfmt_misc" , |
| 1029 | .init_fs_context = bm_init_fs_context, |
| 1030 | .fs_flags = FS_USERNS_MOUNT, |
| 1031 | .kill_sb = kill_litter_super, |
| 1032 | }; |
| 1033 | MODULE_ALIAS_FS("binfmt_misc" ); |
| 1034 | |
| 1035 | static int __init init_misc_binfmt(void) |
| 1036 | { |
| 1037 | int err = register_filesystem(&bm_fs_type); |
| 1038 | if (!err) |
| 1039 | insert_binfmt(fmt: &misc_format); |
| 1040 | return err; |
| 1041 | } |
| 1042 | |
| 1043 | static void __exit exit_misc_binfmt(void) |
| 1044 | { |
| 1045 | unregister_binfmt(&misc_format); |
| 1046 | unregister_filesystem(&bm_fs_type); |
| 1047 | } |
| 1048 | |
| 1049 | core_initcall(init_misc_binfmt); |
| 1050 | module_exit(exit_misc_binfmt); |
| 1051 | MODULE_DESCRIPTION("Kernel support for miscellaneous binaries" ); |
| 1052 | MODULE_LICENSE("GPL" ); |
| 1053 | |