| 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | #include <linux/syscalls.h> |
| 3 | #include <linux/slab.h> |
| 4 | #include <linux/fs.h> |
| 5 | #include <linux/file.h> |
| 6 | #include <linux/mount.h> |
| 7 | #include <linux/namei.h> |
| 8 | #include <linux/exportfs.h> |
| 9 | #include <linux/fs_struct.h> |
| 10 | #include <linux/fsnotify.h> |
| 11 | #include <linux/personality.h> |
| 12 | #include <linux/uaccess.h> |
| 13 | #include <linux/compat.h> |
| 14 | #include <linux/nsfs.h> |
| 15 | #include "internal.h" |
| 16 | #include "mount.h" |
| 17 | |
| 18 | static long do_sys_name_to_handle(const struct path *path, |
| 19 | struct file_handle __user *ufh, |
| 20 | void __user *mnt_id, bool unique_mntid, |
| 21 | int fh_flags) |
| 22 | { |
| 23 | long retval; |
| 24 | struct file_handle f_handle; |
| 25 | int handle_dwords, handle_bytes; |
| 26 | struct file_handle *handle = NULL; |
| 27 | |
| 28 | /* |
| 29 | * We need to make sure whether the file system support decoding of |
| 30 | * the file handle if decodeable file handle was requested. |
| 31 | */ |
| 32 | if (!exportfs_can_encode_fh(nop: path->dentry->d_sb->s_export_op, fh_flags)) |
| 33 | return -EOPNOTSUPP; |
| 34 | |
| 35 | /* |
| 36 | * A request to encode a connectable handle for a disconnected dentry |
| 37 | * is unexpected since AT_EMPTY_PATH is not allowed. |
| 38 | */ |
| 39 | if (fh_flags & EXPORT_FH_CONNECTABLE && |
| 40 | WARN_ON(path->dentry->d_flags & DCACHE_DISCONNECTED)) |
| 41 | return -EINVAL; |
| 42 | |
| 43 | if (copy_from_user(to: &f_handle, from: ufh, n: sizeof(struct file_handle))) |
| 44 | return -EFAULT; |
| 45 | |
| 46 | if (f_handle.handle_bytes > MAX_HANDLE_SZ) |
| 47 | return -EINVAL; |
| 48 | |
| 49 | handle = kzalloc(struct_size(handle, f_handle, f_handle.handle_bytes), |
| 50 | GFP_KERNEL); |
| 51 | if (!handle) |
| 52 | return -ENOMEM; |
| 53 | |
| 54 | /* convert handle size to multiple of sizeof(u32) */ |
| 55 | handle_dwords = f_handle.handle_bytes >> 2; |
| 56 | |
| 57 | /* Encode a possibly decodeable/connectable file handle */ |
| 58 | retval = exportfs_encode_fh(dentry: path->dentry, |
| 59 | fid: (struct fid *)handle->f_handle, |
| 60 | max_len: &handle_dwords, flags: fh_flags); |
| 61 | handle->handle_type = retval; |
| 62 | /* convert handle size to bytes */ |
| 63 | handle_bytes = handle_dwords * sizeof(u32); |
| 64 | handle->handle_bytes = handle_bytes; |
| 65 | if ((handle->handle_bytes > f_handle.handle_bytes) || |
| 66 | (retval == FILEID_INVALID) || (retval < 0)) { |
| 67 | /* As per old exportfs_encode_fh documentation |
| 68 | * we could return ENOSPC to indicate overflow |
| 69 | * But file system returned 255 always. So handle |
| 70 | * both the values |
| 71 | */ |
| 72 | if (retval == FILEID_INVALID || retval == -ENOSPC) |
| 73 | retval = -EOVERFLOW; |
| 74 | /* |
| 75 | * set the handle size to zero so we copy only |
| 76 | * non variable part of the file_handle |
| 77 | */ |
| 78 | handle_bytes = 0; |
| 79 | } else { |
| 80 | /* |
| 81 | * When asked to encode a connectable file handle, encode this |
| 82 | * property in the file handle itself, so that we later know |
| 83 | * how to decode it. |
| 84 | * For sanity, also encode in the file handle if the encoded |
| 85 | * object is a directory and verify this during decode, because |
| 86 | * decoding directory file handles is quite different than |
| 87 | * decoding connectable non-directory file handles. |
| 88 | */ |
| 89 | if (fh_flags & EXPORT_FH_CONNECTABLE) { |
| 90 | handle->handle_type |= FILEID_IS_CONNECTABLE; |
| 91 | if (d_is_dir(dentry: path->dentry)) |
| 92 | handle->handle_type |= FILEID_IS_DIR; |
| 93 | } |
| 94 | retval = 0; |
| 95 | } |
| 96 | /* copy the mount id */ |
| 97 | if (unique_mntid) { |
| 98 | if (put_user(real_mount(path->mnt)->mnt_id_unique, |
| 99 | (u64 __user *) mnt_id)) |
| 100 | retval = -EFAULT; |
| 101 | } else { |
| 102 | if (put_user(real_mount(path->mnt)->mnt_id, |
| 103 | (int __user *) mnt_id)) |
| 104 | retval = -EFAULT; |
| 105 | } |
| 106 | /* copy the handle */ |
| 107 | if (retval != -EFAULT && |
| 108 | copy_to_user(to: ufh, from: handle, |
| 109 | struct_size(handle, f_handle, handle_bytes))) |
| 110 | retval = -EFAULT; |
| 111 | kfree(objp: handle); |
| 112 | return retval; |
| 113 | } |
| 114 | |
| 115 | /** |
| 116 | * sys_name_to_handle_at: convert name to handle |
| 117 | * @dfd: directory relative to which name is interpreted if not absolute |
| 118 | * @name: name that should be converted to handle. |
| 119 | * @handle: resulting file handle |
| 120 | * @mnt_id: mount id of the file system containing the file |
| 121 | * (u64 if AT_HANDLE_MNT_ID_UNIQUE, otherwise int) |
| 122 | * @flag: flag value to indicate whether to follow symlink or not |
| 123 | * and whether a decodable file handle is required. |
| 124 | * |
| 125 | * @handle->handle_size indicate the space available to store the |
| 126 | * variable part of the file handle in bytes. If there is not |
| 127 | * enough space, the field is updated to return the minimum |
| 128 | * value required. |
| 129 | */ |
| 130 | SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name, |
| 131 | struct file_handle __user *, handle, void __user *, mnt_id, |
| 132 | int, flag) |
| 133 | { |
| 134 | struct path path; |
| 135 | int lookup_flags; |
| 136 | int fh_flags = 0; |
| 137 | int err; |
| 138 | |
| 139 | if (flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH | AT_HANDLE_FID | |
| 140 | AT_HANDLE_MNT_ID_UNIQUE | AT_HANDLE_CONNECTABLE)) |
| 141 | return -EINVAL; |
| 142 | |
| 143 | /* |
| 144 | * AT_HANDLE_FID means there is no intention to decode file handle |
| 145 | * AT_HANDLE_CONNECTABLE means there is an intention to decode a |
| 146 | * connected fd (with known path), so these flags are conflicting. |
| 147 | * AT_EMPTY_PATH could be used along with a dfd that refers to a |
| 148 | * disconnected non-directory, which cannot be used to encode a |
| 149 | * connectable file handle, because its parent is unknown. |
| 150 | */ |
| 151 | if (flag & AT_HANDLE_CONNECTABLE && |
| 152 | flag & (AT_HANDLE_FID | AT_EMPTY_PATH)) |
| 153 | return -EINVAL; |
| 154 | else if (flag & AT_HANDLE_FID) |
| 155 | fh_flags |= EXPORT_FH_FID; |
| 156 | else if (flag & AT_HANDLE_CONNECTABLE) |
| 157 | fh_flags |= EXPORT_FH_CONNECTABLE; |
| 158 | |
| 159 | lookup_flags = (flag & AT_SYMLINK_FOLLOW) ? LOOKUP_FOLLOW : 0; |
| 160 | if (flag & AT_EMPTY_PATH) |
| 161 | lookup_flags |= LOOKUP_EMPTY; |
| 162 | err = user_path_at(dfd, name, lookup_flags, &path); |
| 163 | if (!err) { |
| 164 | err = do_sys_name_to_handle(path: &path, ufh: handle, mnt_id, |
| 165 | unique_mntid: flag & AT_HANDLE_MNT_ID_UNIQUE, |
| 166 | fh_flags); |
| 167 | path_put(&path); |
| 168 | } |
| 169 | return err; |
| 170 | } |
| 171 | |
| 172 | static int get_path_anchor(int fd, struct path *root) |
| 173 | { |
| 174 | if (fd >= 0) { |
| 175 | CLASS(fd, f)(fd); |
| 176 | if (fd_empty(f)) |
| 177 | return -EBADF; |
| 178 | *root = fd_file(f)->f_path; |
| 179 | path_get(root); |
| 180 | return 0; |
| 181 | } |
| 182 | |
| 183 | if (fd == AT_FDCWD) { |
| 184 | get_fs_pwd(current->fs, pwd: root); |
| 185 | return 0; |
| 186 | } |
| 187 | |
| 188 | if (fd == FD_PIDFS_ROOT) { |
| 189 | pidfs_get_root(path: root); |
| 190 | return 0; |
| 191 | } |
| 192 | |
| 193 | if (fd == FD_NSFS_ROOT) { |
| 194 | nsfs_get_root(path: root); |
| 195 | return 0; |
| 196 | } |
| 197 | |
| 198 | return -EBADF; |
| 199 | } |
| 200 | |
| 201 | static int vfs_dentry_acceptable(void *context, struct dentry *dentry) |
| 202 | { |
| 203 | struct handle_to_path_ctx *ctx = context; |
| 204 | struct user_namespace *user_ns = current_user_ns(); |
| 205 | struct dentry *d, *root = ctx->root.dentry; |
| 206 | struct mnt_idmap *idmap = mnt_idmap(mnt: ctx->root.mnt); |
| 207 | int retval = 0; |
| 208 | |
| 209 | if (!root) |
| 210 | return 1; |
| 211 | |
| 212 | /* Old permission model with global CAP_DAC_READ_SEARCH. */ |
| 213 | if (!ctx->flags) |
| 214 | return 1; |
| 215 | |
| 216 | /* |
| 217 | * Verify that the decoded dentry itself has a valid id mapping. |
| 218 | * In case the decoded dentry is the mountfd root itself, this |
| 219 | * verifies that the mountfd inode itself has a valid id mapping. |
| 220 | */ |
| 221 | if (!privileged_wrt_inode_uidgid(ns: user_ns, idmap, inode: d_inode(dentry))) |
| 222 | return 0; |
| 223 | |
| 224 | /* |
| 225 | * It's racy as we're not taking rename_lock but we're able to ignore |
| 226 | * permissions and we just need an approximation whether we were able |
| 227 | * to follow a path to the file. |
| 228 | * |
| 229 | * It's also potentially expensive on some filesystems especially if |
| 230 | * there is a deep path. |
| 231 | */ |
| 232 | d = dget(dentry); |
| 233 | while (d != root && !IS_ROOT(d)) { |
| 234 | struct dentry *parent = dget_parent(dentry: d); |
| 235 | |
| 236 | /* |
| 237 | * We know that we have the ability to override DAC permissions |
| 238 | * as we've verified this earlier via CAP_DAC_READ_SEARCH. But |
| 239 | * we also need to make sure that there aren't any unmapped |
| 240 | * inodes in the path that would prevent us from reaching the |
| 241 | * file. |
| 242 | */ |
| 243 | if (!privileged_wrt_inode_uidgid(ns: user_ns, idmap, |
| 244 | inode: d_inode(dentry: parent))) { |
| 245 | dput(d); |
| 246 | dput(parent); |
| 247 | return retval; |
| 248 | } |
| 249 | |
| 250 | dput(d); |
| 251 | d = parent; |
| 252 | } |
| 253 | |
| 254 | if (!(ctx->flags & HANDLE_CHECK_SUBTREE) || d == root) |
| 255 | retval = 1; |
| 256 | /* |
| 257 | * exportfs_decode_fh_raw() does not call acceptable() callback with |
| 258 | * a disconnected directory dentry, so we should have reached either |
| 259 | * mount fd directory or sb root. |
| 260 | */ |
| 261 | if (ctx->fh_flags & EXPORT_FH_DIR_ONLY) |
| 262 | WARN_ON_ONCE(d != root && d != root->d_sb->s_root); |
| 263 | dput(d); |
| 264 | return retval; |
| 265 | } |
| 266 | |
| 267 | static int do_handle_to_path(struct file_handle *handle, struct path *path, |
| 268 | struct handle_to_path_ctx *ctx) |
| 269 | { |
| 270 | int handle_dwords; |
| 271 | struct vfsmount *mnt = ctx->root.mnt; |
| 272 | struct dentry *dentry; |
| 273 | |
| 274 | /* change the handle size to multiple of sizeof(u32) */ |
| 275 | handle_dwords = handle->handle_bytes >> 2; |
| 276 | dentry = exportfs_decode_fh_raw(mnt, fid: (struct fid *)handle->f_handle, |
| 277 | fh_len: handle_dwords, fileid_type: handle->handle_type, |
| 278 | flags: ctx->fh_flags, acceptable: vfs_dentry_acceptable, |
| 279 | context: ctx); |
| 280 | if (IS_ERR_OR_NULL(ptr: dentry)) { |
| 281 | if (dentry == ERR_PTR(error: -ENOMEM)) |
| 282 | return -ENOMEM; |
| 283 | return -ESTALE; |
| 284 | } |
| 285 | path->dentry = dentry; |
| 286 | path->mnt = mntget(mnt); |
| 287 | return 0; |
| 288 | } |
| 289 | |
| 290 | static inline int may_decode_fh(struct handle_to_path_ctx *ctx, |
| 291 | unsigned int o_flags) |
| 292 | { |
| 293 | struct path *root = &ctx->root; |
| 294 | |
| 295 | if (capable(CAP_DAC_READ_SEARCH)) |
| 296 | return 0; |
| 297 | |
| 298 | /* |
| 299 | * Allow relaxed permissions of file handles if the caller has |
| 300 | * the ability to mount the filesystem or create a bind-mount of |
| 301 | * the provided @mountdirfd. |
| 302 | * |
| 303 | * In both cases the caller may be able to get an unobstructed |
| 304 | * way to the encoded file handle. If the caller is only able to |
| 305 | * create a bind-mount we need to verify that there are no |
| 306 | * locked mounts on top of it that could prevent us from getting |
| 307 | * to the encoded file. |
| 308 | * |
| 309 | * In principle, locked mounts can prevent the caller from |
| 310 | * mounting the filesystem but that only applies to procfs and |
| 311 | * sysfs neither of which support decoding file handles. |
| 312 | * |
| 313 | * Restrict to O_DIRECTORY to provide a deterministic API that |
| 314 | * avoids a confusing api in the face of disconnected non-dir |
| 315 | * dentries. |
| 316 | * |
| 317 | * There's only one dentry for each directory inode (VFS rule)... |
| 318 | */ |
| 319 | if (!(o_flags & O_DIRECTORY)) |
| 320 | return -EPERM; |
| 321 | |
| 322 | if (ns_capable(ns: root->mnt->mnt_sb->s_user_ns, CAP_SYS_ADMIN)) |
| 323 | ctx->flags = HANDLE_CHECK_PERMS; |
| 324 | else if (is_mounted(mnt: root->mnt) && |
| 325 | ns_capable(ns: real_mount(mnt: root->mnt)->mnt_ns->user_ns, |
| 326 | CAP_SYS_ADMIN) && |
| 327 | !has_locked_children(mnt: real_mount(mnt: root->mnt), dentry: root->dentry)) |
| 328 | ctx->flags = HANDLE_CHECK_PERMS | HANDLE_CHECK_SUBTREE; |
| 329 | else |
| 330 | return -EPERM; |
| 331 | |
| 332 | /* Are we able to override DAC permissions? */ |
| 333 | if (!ns_capable(ns: current_user_ns(), CAP_DAC_READ_SEARCH)) |
| 334 | return -EPERM; |
| 335 | |
| 336 | ctx->fh_flags = EXPORT_FH_DIR_ONLY; |
| 337 | return 0; |
| 338 | } |
| 339 | |
| 340 | static int handle_to_path(int mountdirfd, struct file_handle __user *ufh, |
| 341 | struct path *path, unsigned int o_flags) |
| 342 | { |
| 343 | int retval = 0; |
| 344 | struct file_handle f_handle; |
| 345 | struct file_handle *handle __free(kfree) = NULL; |
| 346 | struct handle_to_path_ctx ctx = {}; |
| 347 | const struct export_operations *eops; |
| 348 | |
| 349 | if (copy_from_user(to: &f_handle, from: ufh, n: sizeof(struct file_handle))) |
| 350 | return -EFAULT; |
| 351 | |
| 352 | if ((f_handle.handle_bytes > MAX_HANDLE_SZ) || |
| 353 | (f_handle.handle_bytes == 0)) |
| 354 | return -EINVAL; |
| 355 | |
| 356 | if (f_handle.handle_type < 0 || |
| 357 | FILEID_USER_FLAGS(f_handle.handle_type) & ~FILEID_VALID_USER_FLAGS) |
| 358 | return -EINVAL; |
| 359 | |
| 360 | retval = get_path_anchor(fd: mountdirfd, root: &ctx.root); |
| 361 | if (retval) |
| 362 | return retval; |
| 363 | |
| 364 | eops = ctx.root.mnt->mnt_sb->s_export_op; |
| 365 | if (eops && eops->permission) |
| 366 | retval = eops->permission(&ctx, o_flags); |
| 367 | else |
| 368 | retval = may_decode_fh(ctx: &ctx, o_flags); |
| 369 | if (retval) |
| 370 | goto out_path; |
| 371 | |
| 372 | handle = kmalloc(struct_size(handle, f_handle, f_handle.handle_bytes), |
| 373 | GFP_KERNEL); |
| 374 | if (!handle) { |
| 375 | retval = -ENOMEM; |
| 376 | goto out_path; |
| 377 | } |
| 378 | /* copy the full handle */ |
| 379 | *handle = f_handle; |
| 380 | if (copy_from_user(to: &handle->f_handle, |
| 381 | from: &ufh->f_handle, |
| 382 | n: f_handle.handle_bytes)) { |
| 383 | retval = -EFAULT; |
| 384 | goto out_path; |
| 385 | } |
| 386 | |
| 387 | /* |
| 388 | * If handle was encoded with AT_HANDLE_CONNECTABLE, verify that we |
| 389 | * are decoding an fd with connected path, which is accessible from |
| 390 | * the mount fd path. |
| 391 | */ |
| 392 | if (f_handle.handle_type & FILEID_IS_CONNECTABLE) { |
| 393 | ctx.fh_flags |= EXPORT_FH_CONNECTABLE; |
| 394 | ctx.flags |= HANDLE_CHECK_SUBTREE; |
| 395 | } |
| 396 | if (f_handle.handle_type & FILEID_IS_DIR) |
| 397 | ctx.fh_flags |= EXPORT_FH_DIR_ONLY; |
| 398 | /* Filesystem code should not be exposed to user flags */ |
| 399 | handle->handle_type &= ~FILEID_USER_FLAGS_MASK; |
| 400 | retval = do_handle_to_path(handle, path, ctx: &ctx); |
| 401 | |
| 402 | out_path: |
| 403 | path_put(&ctx.root); |
| 404 | return retval; |
| 405 | } |
| 406 | |
| 407 | static long do_handle_open(int mountdirfd, struct file_handle __user *ufh, |
| 408 | int open_flag) |
| 409 | { |
| 410 | long retval = 0; |
| 411 | struct path path __free(path_put) = {}; |
| 412 | struct file *file; |
| 413 | const struct export_operations *eops; |
| 414 | |
| 415 | retval = handle_to_path(mountdirfd, ufh, path: &path, o_flags: open_flag); |
| 416 | if (retval) |
| 417 | return retval; |
| 418 | |
| 419 | CLASS(get_unused_fd, fd)(flags: open_flag); |
| 420 | if (fd < 0) |
| 421 | return fd; |
| 422 | |
| 423 | eops = path.mnt->mnt_sb->s_export_op; |
| 424 | if (eops->open) |
| 425 | file = eops->open(&path, open_flag); |
| 426 | else |
| 427 | file = file_open_root(&path, "" , open_flag, 0); |
| 428 | if (IS_ERR(ptr: file)) |
| 429 | return PTR_ERR(ptr: file); |
| 430 | |
| 431 | fd_install(fd, file); |
| 432 | return take_fd(fd); |
| 433 | } |
| 434 | |
| 435 | /** |
| 436 | * sys_open_by_handle_at: Open the file handle |
| 437 | * @mountdirfd: directory file descriptor |
| 438 | * @handle: file handle to be opened |
| 439 | * @flags: open flags. |
| 440 | * |
| 441 | * @mountdirfd indicate the directory file descriptor |
| 442 | * of the mount point. file handle is decoded relative |
| 443 | * to the vfsmount pointed by the @mountdirfd. @flags |
| 444 | * value is same as the open(2) flags. |
| 445 | */ |
| 446 | SYSCALL_DEFINE3(open_by_handle_at, int, mountdirfd, |
| 447 | struct file_handle __user *, handle, |
| 448 | int, flags) |
| 449 | { |
| 450 | long ret; |
| 451 | |
| 452 | if (force_o_largefile()) |
| 453 | flags |= O_LARGEFILE; |
| 454 | |
| 455 | ret = do_handle_open(mountdirfd, ufh: handle, open_flag: flags); |
| 456 | return ret; |
| 457 | } |
| 458 | |
| 459 | #ifdef CONFIG_COMPAT |
| 460 | /* |
| 461 | * Exactly like fs/open.c:sys_open_by_handle_at(), except that it |
| 462 | * doesn't set the O_LARGEFILE flag. |
| 463 | */ |
| 464 | COMPAT_SYSCALL_DEFINE3(open_by_handle_at, int, mountdirfd, |
| 465 | struct file_handle __user *, handle, int, flags) |
| 466 | { |
| 467 | return do_handle_open(mountdirfd, ufh: handle, open_flag: flags); |
| 468 | } |
| 469 | #endif |
| 470 | |